# project 4
---
## Predicting The 2024 Presidential Election outcome with Machine Learning algorithm
---
### Data collection
---

**We will need a set `Historical Presidential Election Data` composed of.**

- `Elections results` from 'kaggle' 
- `Demographic` data from the 'US Census Bureau'
- `Econimic Indicators` data from the 'Federal Reserve Economic Data(FRED)'
- `Social Media and Sentiment Analysis` data from 'Google Trends'.
---
**The data will covert a period of 15 years, covering the last 5 presidential elections and leading up to the next election.**


#### 1-Importing elections_data file 

In [136]:
# Importing dependencies
from pathlib import Path
import pandas as pd

# Defining the path to the csv file
file_path = Path('Resources/elections_data.csv')

# Reading the csv file into a pandas dataframe
elections_data = pd.read_csv(file_path)

# Displaying the dataframe
elections_data.head()

Unnamed: 0,year,county_fips,inctot,mortamt1,avrg_age,ftotinc,foodstmp_1_freq,foodstmp_2_freq,sex_2_freq,sex_1_freq,...,empstat_3.0_freq,empstat_2.0_freq,state_po,county_name,democrat,green,liberitarian,other,republican,winner
0,2000,1,24566.4,634.6,46.0,46912.7,93.6,6.4,54.0,46.0,...,38.4,3.2,,,,,,,,
1,2000,2,33842.9,1080.1,42.4,65021.9,95.3,4.7,50.3,49.7,...,25.2,4.8,,,,,,,,
2,2000,4,28331.7,814.7,45.3,52826.7,95.8,4.2,52.1,47.9,...,38.3,3.2,,,,,,,,
3,2000,5,22782.6,557.5,46.2,43941.3,92.5,7.5,53.0,47.0,...,37.8,3.2,,,,,,,,
4,2000,6,32245.0,1216.3,43.8,61455.3,95.7,4.3,52.0,48.0,...,35.1,3.6,,,,,,,,


In [138]:
# import re
import re

# extracting the state fips codes from the county fips code with regex
elections_data['county_fips'] = elections_data['county_fips'].astype(str)
elections_data['code'] = elections_data['county_fips'].str.extract(r'(\d{1,2})')[0]
elections_data.head()

Unnamed: 0,year,county_fips,inctot,mortamt1,avrg_age,ftotinc,foodstmp_1_freq,foodstmp_2_freq,sex_2_freq,sex_1_freq,...,empstat_2.0_freq,state_po,county_name,democrat,green,liberitarian,other,republican,winner,code
0,2000,1,24566.4,634.6,46.0,46912.7,93.6,6.4,54.0,46.0,...,3.2,,,,,,,,,1
1,2000,2,33842.9,1080.1,42.4,65021.9,95.3,4.7,50.3,49.7,...,4.8,,,,,,,,,2
2,2000,4,28331.7,814.7,45.3,52826.7,95.8,4.2,52.1,47.9,...,3.2,,,,,,,,,4
3,2000,5,22782.6,557.5,46.2,43941.3,92.5,7.5,53.0,47.0,...,3.2,,,,,,,,,5
4,2000,6,32245.0,1216.3,43.8,61455.3,95.7,4.3,52.0,48.0,...,3.6,,,,,,,,,6


In [140]:
# Convert 'code' column to numeric values
elections_data['code'] = pd.to_numeric(elections_data['code'], errors='coerce')

# Filter the dataset based on the specified conditions
elections_data = elections_data.loc[(elections_data['year'] >= 2008) & 
                                    (elections_data['year'] <= 2020) & 
                                    (elections_data['code'] < 56)]

In [142]:
# Mapping the 'county_fips'column values to their corresponding codes
state_dict = {
            1:'AL' , 2:'AK', 4:'AZ',5:'AR', 6:'CA' , 8:'CO' , 9:'CT' , 10:'DE', 11:'DC', 12:'FL',13:'GA',
            15:'HI', 16: 'ID',17: 'IL',18: 'IN',19: 'IA',20:'KS' , 21:'KY' , 22:'LA', 23:'ME' , 24:'MD' ,
            25:'MA' , 26:'MI' , 27:'MN' , 28:'MS', 29:'MO',30:'MT' , 31:'NE',32:'NV',33:'NH',34: 'NJ',35:'NM',
            36:'NY' , 37:'NC' ,38:'ND' , 39:'OH' , 40:'OK' ,41:'OR' , 42:'PA',44:'RI', 45:'SC',46:'SD' ,
            47:'TN' , 48:'TX' , 49:'UT',50: 'VT',51:'VA' , 53:'WA' , 54:'WV' , 55:'WI', 56:'WY'
            }
# Mapping the sate_dict to the code column
elections_data['state'] = elections_data['code'].map(state_dict)

#Showing dataframe
elections_data.head()

Unnamed: 0,year,county_fips,inctot,mortamt1,avrg_age,ftotinc,foodstmp_1_freq,foodstmp_2_freq,sex_2_freq,sex_1_freq,...,state_po,county_name,democrat,green,liberitarian,other,republican,winner,code,state
1528,2008,1000,22523.9,591.8,47.0,44879.5,90.6,9.4,53.4,46.6,...,,,,,,,,,10,DE
1529,2008,1003,27222.4,902.2,48.6,53308.6,94.2,5.8,52.5,47.5,...,AL,BALDWIN,19386.0,0.0,0.0,756.0,61271.0,1.0,10,DE
1530,2008,1015,20951.6,551.7,47.1,39712.5,89.1,10.9,54.8,45.2,...,AL,CALHOUN,16334.0,0.0,0.0,560.0,32348.0,1.0,10,DE
1531,2008,1055,24017.6,658.6,48.2,47650.0,94.2,5.8,54.6,45.4,...,AL,ETOWAH,13497.0,0.0,0.0,645.0,30595.0,1.0,10,DE
1532,2008,1073,28109.5,812.4,47.2,54993.2,93.5,6.5,55.0,45.0,...,AL,JEFFERSON,166121.0,0.0,0.0,2482.0,149921.0,0.0,10,DE


In [144]:
# renaming the columns
elections_data = elections_data.rename(columns = {
                                              'year':'Cycle','county_fips':'County FIP Code','inctot':'Avg Annual Income',
                                              'mortamt1':'Avg Mortgage','avrg_age':'Avg Age','ftotinc':'Avg Household Income',
                                              'foodstmp_1_freq':'% of With Foodstamps','foodstmp_2_freq':'% Without Foodstamps',	
                                              'sex_2_freq':'%Females','sex_1_freq':'%Males','marst_5_freq':'% Widowed',
                                              'marst_6_freq':'% Never Married','marst_1_freq':'% Married Spouse Present',
                                              'marst_4_freq':'% Divorced','marst_3_freq':'% Seperated','marst_2_freq':'% Married Spouse Absent',
                                              'race_1_freq':'% White','race_2_freq':'% Black','race_7_freq':'% Other Race',
                                              'race_8_freq':'% Two major Races','race_5_freq':'% Japanese','race_6_freq':'% Other Asian',
                                              'race_3_freq':'% Native Indian or Alaska','race_4_freq':'% Chinese','race_9_freq':'% Three or More Races',
                                              'ctz_stat_1_freq':'% Citizen','ctz_stat_3_freq':'% Non-Citizen','ctz_stat_2_freq':'% Naturalized Citizen',
                                              'lang_1_freq':'% Speak_English ','lang_2_freq':'% Other_Language','educ_attain_2.0_freq':'% Bachelor Degree',
                                              'educ_attain_1.0_freq':'% Lower Education','educ_attain_3.0_freq':'% Masters or Professional Certificate',
                                              'educ_attain_4.0_freq':'% Doctoral Degree','empstat_1.0_freq':'% Employed',
                                              'empstat_3.0_freq':'% Self_Employed','empstat_2.0_freq':'% Unemployed',
                                              'state_po':'State_county','county_name':'County_Name','democrat':'Democrat',
                                              'green':'Green','liberitarian':'Independant','other':'Other',
                                              'republican':'Republican','winner':'Winner', 'state':'State'
                                            })
#Displaying the dataframe
elections_data.head()

Unnamed: 0,Cycle,County FIP Code,Avg Annual Income,Avg Mortgage,Avg Age,Avg Household Income,% of With Foodstamps,% Without Foodstamps,%Females,%Males,...,State_county,County_Name,Democrat,Green,Independant,Other,Republican,Winner,code,State
1528,2008,1000,22523.9,591.8,47.0,44879.5,90.6,9.4,53.4,46.6,...,,,,,,,,,10,DE
1529,2008,1003,27222.4,902.2,48.6,53308.6,94.2,5.8,52.5,47.5,...,AL,BALDWIN,19386.0,0.0,0.0,756.0,61271.0,1.0,10,DE
1530,2008,1015,20951.6,551.7,47.1,39712.5,89.1,10.9,54.8,45.2,...,AL,CALHOUN,16334.0,0.0,0.0,560.0,32348.0,1.0,10,DE
1531,2008,1055,24017.6,658.6,48.2,47650.0,94.2,5.8,54.6,45.4,...,AL,ETOWAH,13497.0,0.0,0.0,645.0,30595.0,1.0,10,DE
1532,2008,1073,28109.5,812.4,47.2,54993.2,93.5,6.5,55.0,45.0,...,AL,JEFFERSON,166121.0,0.0,0.0,2482.0,149921.0,0.0,10,DE


In [146]:
#Dropping unwanted columns and all nulls
elections_data = elections_data.drop(columns = ['County FIP Code','State_county','County_Name','Democrat','Green','Independant',
                                                	'Other','Republican','Winner','code'])

In [148]:
# Reorganizing the columns
elections_data = elections_data[['Cycle','State','Avg Annual Income', 'Avg Mortgage', 'Avg Age',
       'Avg Household Income', '% of With Foodstamps', '% Without Foodstamps',
       '%Females', '%Males', '% Widowed', '% Never Married',
       '% Married Spouse Present', '% Divorced', '% Seperated',
       '% Married Spouse Absent', '% White', '% Black', '% Other Race',
       '% Two major Races', '% Japanese', '% Other Asian',
       '% Native Indian or Alaska', '% Chinese', '% Three or More Races',
       '% Citizen', '% Non-Citizen', '% Naturalized Citizen',
       '% Speak_English ', '% Other_Language', '% Bachelor Degree',
       '% Lower Education', '% Masters or Professional Certificate',
       '% Doctoral Degree', '% Employed', '% Self_Employed', '% Unemployed']]

#Displaying the dataframe
elections_data.head()

Unnamed: 0,Cycle,State,Avg Annual Income,Avg Mortgage,Avg Age,Avg Household Income,% of With Foodstamps,% Without Foodstamps,%Females,%Males,...,% Naturalized Citizen,% Speak_English,% Other_Language,% Bachelor Degree,% Lower Education,% Masters or Professional Certificate,% Doctoral Degree,% Employed,% Self_Employed,% Unemployed
1528,2008,DE,22523.9,591.8,47.0,44879.5,90.6,9.4,53.4,46.6,...,1.0,96.4,3.6,40.4,53.3,5.9,0.5,54.9,41.6,3.6
1529,2008,DE,27222.4,902.2,48.6,53308.6,94.2,5.8,52.5,47.5,...,1.6,97.1,2.9,51.8,39.3,8.4,0.5,54.1,43.5,2.4
1530,2008,DE,20951.6,551.7,47.1,39712.5,89.1,10.9,54.8,45.2,...,1.2,95.4,4.6,41.1,52.6,5.0,1.2,54.3,41.1,4.5
1531,2008,DE,24017.6,658.6,48.2,47650.0,94.2,5.8,54.6,45.4,...,0.5,98.0,2.0,45.3,46.6,7.9,0.2,50.3,46.8,2.9
1532,2008,DE,28109.5,812.4,47.2,54993.2,93.5,6.5,55.0,45.0,...,1.2,96.6,3.4,47.8,42.0,9.3,0.9,58.5,38.1,3.4


#### 2-Importing Campaign finance data

In [151]:
# Importing dependencies
from pathlib import Path
import numpy as np
import pandas as pd

# Reading the csv files into a pandas dataframe
df1 =pd.read_csv(Path('Resources/file1.csv'))
df2 =pd.read_csv(Path('Resources/file2.csv'))
df3 =pd.read_csv(Path('Resources/file3.csv'))
df4 =pd.read_csv(Path('Resources/file4.csv'))
df5 =pd.read_csv(Path('Resources/file5.csv'))
df6 =pd.read_csv(Path('Resources/file6.csv'))
df7 =pd.read_csv(Path('Resources/file7.csv'))
df8 =pd.read_csv(Path('Resources/file8.csv'))
df9 =pd.read_csv(Path('Resources/file9.csv'))

#dRoping unwanted columns
df1 = df1.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])
df2 = df2.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])
df3 = df3.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])
df4 = df4.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])
df5 = df5.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])
df6 = df6.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])
df7 = df7.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])
df8 = df8.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])
df9 = df9.drop(columns = ['Cand_Office_St','Cand_Office_Dist','Cand_Incumbent_Challenger_Open_Seat',
'Coverage_End_Date','Cand_Street_1','Cand_Street_2','Cand_City','Cand_Zip','Coverage_Start_Date'])

# Storing the dataframes into a list
df = [df1,df2,df3,df4,df5,df6,df7,df8,df9]

#concatenating the dataframes into a single dataframe
finances_data = pd.concat(df,ignore_index=False)

#extracting the year from link column using regular expression
finances_data['Cycle'] = finances_data['Link_Image'].str.extract(r'cycle=(\d{4})')[0]

# Displaying the dataframe
finances_data.head()


Unnamed: 0,Link_Image,Cand_Name,Cand_Id,Cand_Office,Cand_Party_Affiliation,Total_Receipt,Total_Disbursement,Cash_On_Hand_COP,Debt_Owed_By_Committee,Cand_State,...,Individual_Refund,Party_Committee_Refund,Other_Committee_Refund,Total_Contribution_Refund,Other_Disbursements,Net_Contribution,Net_Operating_Expenditure,Cash_On_Hand_BOP,Debt_Owe_To_Committee,Cycle
0,https://www.fec.gov/data/candidate/P80003338/?...,"OBAMA, BARACK",P80003338,P,DEM,778643000.0,760370200.0,18272367.39,434954.4,IL,...,5744310.2,300.0,11345.0,5755955.2,47945662.98,4306975000.0,3720067000.0,0.0,0.0,2008
1,https://www.fec.gov/data/candidate/H8FL05116/?...,"CASTAGNERO, CAROL",H8FL05116,H,DEM,0.0,276.0,0.0,0.0,FL,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2008
2,https://www.fec.gov/data/candidate/H8FL06056/?...,"STEARNS, CLIFFORD B",H8FL06056,H,REP,806449.2,789781.2,2272965.45,0.0,FL,...,300.0,0.0,0.0,300.0,267040.0,584573.4,510824.9,2258097.63,0.0,2008
3,https://www.fec.gov/data/candidate/H8FL06080/?...,"CUNHA, TIMOTHY M",H8FL06080,H,DEM,249756.9,249756.9,0.0,0.0,FL,...,25.0,0.0,0.0,25.0,0.0,128731.9,149716.0,0.0,0.0,2008
4,https://www.fec.gov/data/candidate/H8FL07039/?...,"MALLOY, JAMES CLYDE",H8FL07039,H,DEM,30553.0,29670.0,0.0,16245.0,FL,...,0.0,0.0,0.0,0.0,1013.0,13398.0,28647.0,0.0,0.0,2008


In [153]:
#Filtering 'Cand_Office' == P
finances_data = finances_data.loc[finances_data['Cand_Office'] == 'P']

#Filtering dataframe by  'Cycle'<=2020
finances_data['Cycle'] = finances_data['Cycle'].astype(int)
finances_data = finances_data.loc[(finances_data['Cycle'] <= 2020)]

In [155]:
# Aggregating the contributions for each candidate
finances_data['contributions'] = finances_data[['Individual_Itemized_Contribution',
                                           'Individual_Unitemized_Contribution',
                                           'Individual_Contribution',
                                           'Other_Committee_Contribution',
                                           'Party_Committee_Contribution',
                                           'Cand_Contribution','Total_Contribution',
                                           'Net_Contribution']].sum(axis=1)
# Aggregating the loan for each candidate
finances_data['Loans'] = finances_data[['Cand_Loan', 'Other_Loan','Total_Loan']].sum(axis = 1)

# Aggregating the refunds for each candidate
finances_data['Refunds'] = finances_data[['Individual_Refund','Party_Committee_Refund',
                                          'Other_Committee_Refund', 'Total_Contribution_Refund']].sum(axis = 1)

# Aggregating the Disbursements for each candidate
finances_data['Disbursements'] = finances_data[['Total_Disbursement','Exempt_Legal_Accounting_Disbursement',
                                                'Fundraising_Disbursement',
                                                'Other_Disbursements']].sum(axis = 1)

# SAggregating the Expenditures for each candidate
finances_data['Expenditures'] = finances_data[['Net_Operating_Expenditure','Operating_Expenditure',
                                               'Offsets_To_Operating_Expenditure']].sum(axis = 1)

# Aggregating the Repayments for each candidate
finances_data['Repayments'] = finances_data[['Cand_Loan_Repayment','Other_Loan_Repayment',
                                             'Total_Loan_Repayment']].sum(axis = 1)

# Aggregating the Receipts for each candidate
finances_data['Receipts'] = finances_data[['Total_Receipt','Other_Receipts']].sum(axis = 1)

# Aggregating the Cash_on_Hand for each candidate
finances_data['Cash_on_Hand'] = finances_data[['Cash_On_Hand_BOP','Cash_On_Hand_COP']].sum(axis = 1)

# Displaying dataframe
finances_data.head()

Unnamed: 0,Link_Image,Cand_Name,Cand_Id,Cand_Office,Cand_Party_Affiliation,Total_Receipt,Total_Disbursement,Cash_On_Hand_COP,Debt_Owed_By_Committee,Cand_State,...,Debt_Owe_To_Committee,Cycle,contributions,Loans,Refunds,Disbursements,Expenditures,Repayments,Receipts,Cash_on_Hand
0,https://www.fec.gov/data/candidate/P80003338/?...,"OBAMA, BARACK",P80003338,P,DEM,778642962.3,760370195.4,18272367.39,434954.4,IL,...,0.0,2008,6066909000.0,0.0,11511910.4,808315900.0,4451866000.0,0.0,780318300.0,18272367.39
155,https://www.fec.gov/data/candidate/P00000133/?...,"MCDANIELS, EDISON PENROW",P00000133,P,NNE,0.0,0.0,0.0,0.0,CA,...,0.0,2008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
156,https://www.fec.gov/data/candidate/P00001792/?...,"ENGLERIUS, MAX",P00001792,P,OTH,0.0,0.0,0.0,0.0,WA,...,0.0,2008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
181,https://www.fec.gov/data/candidate/P00003236/?...,"PRATTAS, JAMES JOHN",P00003236,P,REP,0.0,0.0,0.0,0.0,HI,...,0.0,2008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
182,https://www.fec.gov/data/candidate/P00003277/?...,"HAMBURG, AL",P00003277,P,IND,0.0,0.0,0.0,0.0,WY,...,0.0,2008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [157]:
#Dropping unwantted columns
finances_data = finances_data.drop(columns = ['Individual_Itemized_Contribution','Individual_Unitemized_Contribution',
                                               'Individual_Contribution','Other_Committee_Contribution','Party_Committee_Contribution',
                                               'Cand_Contribution','Total_Contribution','Net_Contribution','Cand_Loan', 'Other_Loan',
                                               'Total_Loan','Individual_Refund','Party_Committee_Refund','Other_Committee_Refund',
                                               'Total_Contribution_Refund','Total_Disbursement','Exempt_Legal_Accounting_Disbursement',
                                               'Fundraising_Disbursement','Other_Disbursements','Net_Operating_Expenditure',
                                               'Operating_Expenditure','Offsets_To_Operating_Expenditure','Cand_Loan_Repayment',
                                               'Other_Loan_Repayment','Total_Loan_Repayment','Total_Receipt','Other_Receipts',
                                               'Link_Image','Cand_Party_Affiliation','Cand_Office','Cash_On_Hand_BOP','Cash_On_Hand_COP']).dropna()
# Displaying dataframe
finances_data.head()

Unnamed: 0,Cand_Name,Cand_Id,Debt_Owed_By_Committee,Cand_State,Transfer_From_Other_Auth_Committee,Offsets_To_Fundraising,Offsets_To_Leagal_Accounting,Transfer_To_Other_Auth_Committee,Debt_Owe_To_Committee,Cycle,contributions,Loans,Refunds,Disbursements,Expenditures,Repayments,Receipts,Cash_on_Hand
0,"OBAMA, BARACK",P80003338,434954.4,IL,86950000.0,0.0,0.0,0.0,0.0,2008,6066909000.0,0.0,11511910.4,808315900.0,4451866000.0,0.0,780318300.0,18272367.39
155,"MCDANIELS, EDISON PENROW",P00000133,0.0,CA,0.0,0.0,0.0,0.0,0.0,2008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
156,"ENGLERIUS, MAX",P00001792,0.0,WA,0.0,0.0,0.0,0.0,0.0,2008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
181,"PRATTAS, JAMES JOHN",P00003236,0.0,HI,0.0,0.0,0.0,0.0,0.0,2008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
182,"HAMBURG, AL",P00003277,0.0,WY,0.0,0.0,0.0,0.0,0.0,2008,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [159]:
# Renaming the columns
finances_data = finances_data.rename(columns = {'Cand_State':'State' , 'contributions':'Contributions'})

In [161]:
# Reorganizing the columns
finances_data = finances_data[['Cycle','State', 'Cand_Name', 'Cand_Id' , 
                               'Debt_Owed_By_Committee', 'Transfer_From_Other_Auth_Committee',
                               'Offsets_To_Fundraising', 'Offsets_To_Leagal_Accounting',
                               'Transfer_To_Other_Auth_Committee', 'Debt_Owe_To_Committee', 
                               'Contributions', 'Loans', 'Refunds','Disbursements', 
                               'Expenditures', 'Repayments', 'Receipts']]

finances_data.head() 

Unnamed: 0,Cycle,State,Cand_Name,Cand_Id,Debt_Owed_By_Committee,Transfer_From_Other_Auth_Committee,Offsets_To_Fundraising,Offsets_To_Leagal_Accounting,Transfer_To_Other_Auth_Committee,Debt_Owe_To_Committee,Contributions,Loans,Refunds,Disbursements,Expenditures,Repayments,Receipts
0,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,6066909000.0,0.0,11511910.4,808315900.0,4451866000.0,0.0,780318300.0
155,2008,CA,"MCDANIELS, EDISON PENROW",P00000133,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
156,2008,WA,"ENGLERIUS, MAX",P00001792,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
181,2008,HI,"PRATTAS, JAMES JOHN",P00003236,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
182,2008,WY,"HAMBURG, AL",P00003277,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


---
#### 3-Merging the datasets

In [179]:
#Adjusting length of elections_data to match finances_data
elections_data = elections_data.iloc[:len(finances_data)]

In [181]:
# merging the dataframes
campaign_df = pd.merge(finances_data, elections_data ,  on = ['Cycle','State']).dropna()
campaign_df

Unnamed: 0,Cycle,State,Cand_Name,Cand_Id,Debt_Owed_By_Committee,Transfer_From_Other_Auth_Committee,Offsets_To_Fundraising,Offsets_To_Leagal_Accounting,Transfer_To_Other_Auth_Committee,Debt_Owe_To_Committee,...,% Naturalized Citizen,% Speak_English,% Other_Language,% Bachelor Degree,% Lower Education,% Masters or Professional Certificate,% Doctoral Degree,% Employed,% Self_Employed,% Unemployed
0,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,2.5,93.0,7.0,46.8,46.3,6.3,0.6,61.6,34.8,3.6
1,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,3.3,87.4,12.6,54.4,30.7,11.5,3.4,64.2,33.3,2.6
2,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,11.9,70.6,29.4,47.7,39.0,12.0,1.3,62.3,33.1,4.6
3,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,12.4,77.4,22.6,56.3,26.5,15.5,1.7,67.7,28.7,3.6
4,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,1.4,95.1,4.9,46.6,48.1,4.9,0.4,62.4,32.7,4.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33914,2016,HI,"YAMAMOTO, BRANT",P60013257,0.0,0.0,0.0,0.0,0.0,0.0,...,6.8,80.9,19.1,52.0,40.1,6.6,1.4,52.2,45.0,2.8
33915,2016,HI,"YAMAMOTO, BRANT",P60013257,0.0,0.0,0.0,0.0,0.0,0.0,...,13.2,73.4,26.6,53.4,34.8,10.2,1.6,64.0,33.8,2.2
33916,2016,HI,"ELLIOTT, CLAIRE ELISABETH",P80003643,0.0,0.0,0.0,0.0,0.0,0.0,...,10.5,79.0,21.0,52.4,39.6,7.0,1.0,61.6,36.1,2.3
33917,2016,HI,"ELLIOTT, CLAIRE ELISABETH",P80003643,0.0,0.0,0.0,0.0,0.0,0.0,...,6.8,80.9,19.1,52.0,40.1,6.6,1.4,52.2,45.0,2.8


In [183]:
campaign_df.drop_duplicates()


Unnamed: 0,Cycle,State,Cand_Name,Cand_Id,Debt_Owed_By_Committee,Transfer_From_Other_Auth_Committee,Offsets_To_Fundraising,Offsets_To_Leagal_Accounting,Transfer_To_Other_Auth_Committee,Debt_Owe_To_Committee,...,% Naturalized Citizen,% Speak_English,% Other_Language,% Bachelor Degree,% Lower Education,% Masters or Professional Certificate,% Doctoral Degree,% Employed,% Self_Employed,% Unemployed
0,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,2.5,93.0,7.0,46.8,46.3,6.3,0.6,61.6,34.8,3.6
1,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,3.3,87.4,12.6,54.4,30.7,11.5,3.4,64.2,33.3,2.6
2,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,11.9,70.6,29.4,47.7,39.0,12.0,1.3,62.3,33.1,4.6
3,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,12.4,77.4,22.6,56.3,26.5,15.5,1.7,67.7,28.7,3.6
4,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,1.4,95.1,4.9,46.6,48.1,4.9,0.4,62.4,32.7,4.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33914,2016,HI,"YAMAMOTO, BRANT",P60013257,0.0,0.0,0.0,0.0,0.0,0.0,...,6.8,80.9,19.1,52.0,40.1,6.6,1.4,52.2,45.0,2.8
33915,2016,HI,"YAMAMOTO, BRANT",P60013257,0.0,0.0,0.0,0.0,0.0,0.0,...,13.2,73.4,26.6,53.4,34.8,10.2,1.6,64.0,33.8,2.2
33916,2016,HI,"ELLIOTT, CLAIRE ELISABETH",P80003643,0.0,0.0,0.0,0.0,0.0,0.0,...,10.5,79.0,21.0,52.4,39.6,7.0,1.0,61.6,36.1,2.3
33917,2016,HI,"ELLIOTT, CLAIRE ELISABETH",P80003643,0.0,0.0,0.0,0.0,0.0,0.0,...,6.8,80.9,19.1,52.0,40.1,6.6,1.4,52.2,45.0,2.8


#### 4-Creating an Outcome column

In [39]:
campaign_df .columns

Index(['Cycle', 'State', 'Cand_Name', 'Cand_Id', 'Debt_Owed_By_Committee',
       'Transfer_From_Other_Auth_Committee', 'Offsets_To_Fundraising',
       'Offsets_To_Leagal_Accounting', 'Transfer_To_Other_Auth_Committee',
       'Debt_Owe_To_Committee', 'Contributions', 'Loans', 'Refunds',
       'Disbursements', 'Expenditures', 'Repayments', 'Receipts',
       'Avg Annual Income', 'Avg Mortgage', 'Avg Age', 'Avg Household Income',
       '% of With Foodstamps', '% Without Foodstamps', '%Females', '%Males',
       '% Widowed', '% Never Married', '% Married Spouse Present',
       '% Divorced', '% Seperated', '% Married Spouse Absent', '% White',
       '% Black', '% Other Race', '% Two major Races', '% Japanese',
       '% Other Asian', '% Native Indian or Alaska', '% Chinese',
       '% Three or More Races', '% Citizen', '% Non-Citizen',
       '% Naturalized Citizen', '% Speak_English ', '% Other_Language',
       '% Bachelor Degree', '% Lower Education',
       '% Masters or Professi

In [41]:
# Initializing a list to hold our column values
winner = []

#Looping through each rows , searching for the winner
for i in range(len(campaign_df)):
    if campaign_df.iloc[i]['Cand_Name'] == 'Barack Obama' and campaign_df.iloc[i]['cycle'] == 2008:
        winner.append(1)
    elif campaign_df.iloc[i]['Cand_Name'] == 'Barack Obama' and campaign_df.iloc[i]['cycle'] == 2012:
        winner.append(1)
    elif campaign_df.iloc[i]['Cand_Name'] == 'Joseph R. Biden Jr.' and campaign_df.iloc[i]['cycle'] == 2020:
        winner.append(1)
    elif campaign_df.iloc[i]['Cand_Name'] == 'Donald Trump' and campaign_df.iloc[i]['cycle'] == 2016:
        winner.append(1)
    else:
        winner.append(0)

# Appending the new column to the dataframe
campaign_df['Winner'] = winner

#Displaying the dataframe
campaign_df.head()

Unnamed: 0,Cycle,State,Cand_Name,Cand_Id,Debt_Owed_By_Committee,Transfer_From_Other_Auth_Committee,Offsets_To_Fundraising,Offsets_To_Leagal_Accounting,Transfer_To_Other_Auth_Committee,Debt_Owe_To_Committee,...,% Speak_English,% Other_Language,% Bachelor Degree,% Lower Education,% Masters or Professional Certificate,% Doctoral Degree,% Employed,% Self_Employed,% Unemployed,Winner
0,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,93.0,7.0,46.8,46.3,6.3,0.6,61.6,34.8,3.6,0
1,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,87.4,12.6,54.4,30.7,11.5,3.4,64.2,33.3,2.6,0
2,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,70.6,29.4,47.7,39.0,12.0,1.3,62.3,33.1,4.6,0
3,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,77.4,22.6,56.3,26.5,15.5,1.7,67.7,28.7,3.6,0
4,2008,IL,"OBAMA, BARACK",P80003338,434954.4,86950000.0,0.0,0.0,0.0,0.0,...,95.1,4.9,46.6,48.1,4.9,0.4,62.4,32.7,4.9,0


## Models Creation
---

**Given the nature of our analysis which is a binary classification,we'll proceed as follow:**

- Preprocessing our dataset
- creating a Logistic regression
- creating a Neural network model

### 1-Preprocessing
---


In [45]:
# Import our dependencies
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [46]:
#Dropping unwanted columns
campaign_df = campaign_df.drop(columns = ['Cycle','State','Cand_Name','Cand_Id'])

In [49]:
# Split our preprocessed data into our features and target arrays
y = campaign_df['Winner'].values
X = campaign_df.drop('Winner' , axis = 1)


# Split the preprocessed data into a training and testing dataset
X_train , X_test , y_train , y_test = train_test_split(X,y , random_state = 78)

### 2-Creating a Logistic Regression Model with the Original Data

**Step 1: Fit a logistic regression model by using the training data (`X_train` and `y_train`).**

In [53]:

# Import the LogisticRegression module from SKLearn
from sklearn.linear_model import LogisticRegression

# Instantiate the Logistic Regression model
# Assign a random_state parameter of 1 to the model
classifier = LogisticRegression(solver = 'lbfgs' , random_state = 1)
classifier

# Fit the model using training data
classifier.fit(X_train , y_train)

ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 0

**Step 2: Making predictions on the testing data labels by using the testing data (`X_test`) and the fitted model.**

---

In [57]:
# Make a prediction using the testing data
predictions = classifier.predict(X_test)

AttributeError: 'LogisticRegression' object has no attribute 'coef_'

**Step 3: Evaluate the model’s performance by doing the following:**

- Generate a confusion matrix.
- Print the classification report.

In [60]:
# Generate a confusion matrix for the model
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, predictions)


NameError: name 'predictions' is not defined

In [62]:
# Plotting the Confusion Matrix
metrics.plot_confusion_matrix(models[0], X_val, Y_val)
plt.show()


NameError: name 'metrics' is not defined

In [64]:
# Print the classification report for the model
from sklearn.metrics import classification_report
target_names = ["democrat", "republican"]
print(classification_report(y_test, predictions, target_names=target_names))

NameError: name 'predictions' is not defined

In [66]:
# Making a heatmap to visualize the correlation matrix
plt.figure(figsize=(10,10))
sb.heatmap(df.corr() > 0.8, annot=True, cbar=False)
plt.show()

NameError: name 'plt' is not defined

---

### 3-Creating a Neural Network Model
---

**Step 1: Initialize a StandardScaler instance**

In [71]:
# Creating a StandardScaler instances
scaler = StandardScaler()

# Fitting/training the model
X_scaler = scaler.fit(X_train)

# Scaling the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

---
**Step 2: Compile, Train and Evaluate the Model**

In [74]:
X_train_scaled.shape

(25439, 48)

In [76]:
len(X_train_scaled)

25439

In [78]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
hidden_nodes_layer1 = 8
hidden_nodes_layer2 = 5


psychic = tf.keras.models.Sequential()

# First hidden layer
psychic.add(tf.keras.layers.Dense(units = hidden_nodes_layer1, activation="relu", input_dim =48))

# Second hidden layer
psychic.add(tf.keras.layers.Dense(units = hidden_nodes_layer2 , activation = "sigmoid" ))

# Output layer
psychic.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
psychic.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [80]:
# Compile the model
psychic.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [82]:
# Train the model
fit_model = psychic.fit(X_train_scaled , y_train , epochs = 100)

Epoch 1/100
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 243us/step - accuracy: 0.9715 - loss: 0.3488
Epoch 2/100
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236us/step - accuracy: 1.0000 - loss: 0.0402
Epoch 3/100
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237us/step - accuracy: 1.0000 - loss: 0.0154
Epoch 4/100
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236us/step - accuracy: 1.0000 - loss: 0.0079
Epoch 5/100
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233us/step - accuracy: 1.0000 - loss: 0.0045
Epoch 6/100
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238us/step - accuracy: 1.0000 - loss: 0.0027
Epoch 7/100
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235us/step - accuracy: 1.0000 - loss: 0.0017
Epoch 8/100
[1m795/795[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233us/step - accuracy: 1.0000 - loss: 9.5418e-04
Epoch 9/100


In [83]:
# Evaluate the model using the test data
model_loss, model_accuracy = psychic.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

265/265 - 0s - 328us/step - accuracy: 1.0000 - loss: 3.9596e-10
Loss: 3.9596473322411896e-10, Accuracy: 1.0


---
**Step 3: Optimize the model with the `keras_tuner`**

In [87]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    psychic_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])

    # Allow kerastuner to decide number of neurons in first layer
    psychic_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=len(campaign_df)))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        psychic_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))

    psychic_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    psychic_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])

    return psychic_model

In [89]:
# Import the kerastuner library
import keras_tuner as kt

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [91]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 2 Complete [00h 00m 00s]

Best val_accuracy So Far: None
Total elapsed time: 00h 00m 01s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
relu              |tanh              |activation
3                 |9                 |first_units
5                 |6                 |num_layers
7                 |9                 |units_0
7                 |1                 |units_1
5                 |1                 |units_2
5                 |1                 |units_3
9                 |1                 |units_4
1                 |1                 |units_5
3                 |3                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/3


Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/tuners/hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3

RuntimeError: Number of consecutive failures exceeded the limit of 3.
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/tuners/hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/keras_tuner/src/engine/hypermodel.py", line 149, in fit
    return model.fit(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/opt/anaconda3/lib/python3.11/site-packages/keras/src/layers/input_spec.py", line 227, in assert_input_compatibility
    raise ValueError(
ValueError: Exception encountered when calling Sequential.call().

[1mInput 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 33919, but received input with shape (None, 48)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(None, 48), dtype=float32)
  • training=True
  • mask=None


In [95]:
 # Get best model hyperparameters
hyper_psychic = tuner.get_best_hyperparameters(1)[0]
hyper_psychic.values

{'activation': 'tanh',
 'first_units': 9,
 'num_layers': 6,
 'units_0': 9,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0,
 'units_1': 1,
 'units_2': 1,
 'units_3': 1,
 'units_4': 1,
 'units_5': 1}

In [97]:
best_psychic = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_psychic.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = './untitled_project/trial_0000/checkpoint.weights.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [99]:
# Export our model to HDF5 file
best_psychic.save("Project4.h5")

NameError: name 'best_psychic' is not defined

#### Fetching the FEC database

In [74]:
# Importing our dependencies
import requests
import time
import json

# Storing our request endpoint into a variable
#url = f'https://api.open.fec.gov/v1/presidential/financial_summary/?page=[i]&per_page=100&election_year=2004&election_year=2008&election_year=2012&election_year=2016&election_year=2020&election_year=2024&candidate_id=P00003335&candidate_id=P80002801&candidate_id=P80003353&candidate_id=P80001571&candidate_id=P80002856&candidate_id=P80000271&candidate_id=P80003338&candidate_id=P00003392&candidate_id=P80000722&candidate_id=P00011422&sort=net_receipts&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key=x30Ch1dltBLFNifcgTMbjLIedcehzGUJTi5le2I6'

# Defining a list to store our results
results = []

#Looping through the request to get 1000 calls
for i in range(1, 500):

    # Storing our request endpoint into a variable
    url = f'https://api.open.fec.gov/v1/presidential/financial_summary/?page={i}&per_page=100&election_year=2004&election_year=2008&election_year=2012&election_year=2016&election_year=2020&election_year=2024&candidate_id=P00003335&candidate_id=P80002801&candidate_id=P80003353&candidate_id=P80001571&candidate_id=P80002856&candidate_id=P80000271&candidate_id=P80003338&candidate_id=P00003392&candidate_id=P80000722&candidate_id=P00011422&sort=net_receipts&sort_hide_null=false&sort_null_only=false&sort_nulls_last=false&api_key=x30Ch1dltBLFNifcgTMbjLIedcehzGUJTi5le2I6'

    # Making a API request 
    response = requests.get(url)

    # Extracting the query results
    data = response.json()

    # appending the results to list
    results.append(data['results'])

    # Sleep to handle rate limit
    time.sleep(1)

#Saving the results to a file
with open('results.json' , 'w') as csv_file:
    json.dump(results , csv_file)
