In [179]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import glob

pd.options.mode.chained_assignment = None #remove warning

## Get all elections in one table

In [180]:
#Pres is different number of columns than sen/gov
#Just read in seperatly then concatnate columns of interest

pathPres = r'C:\Users\Roy\Desktop\Project\ElectionData\RawFiles\President'
pres_files = glob.glob(pathPres + "/*.csv")

dfStoreP = []

for filename in pres_files:
    df = pd.read_csv(filename)
    dfStoreP.append(df)
    
president = pd.concat(dfStoreP, axis = 0, ignore_index = True)

pathSenGov = r'C:\Users\Roy\Desktop\Project\ElectionData\RawFiles\SenateGovernor'
senGov_files = glob.glob(pathSenGov + "/*.csv")

dfStoreSG = []

for filename in senGov_files:
    df = pd.read_csv(filename)
    dfStoreSG.append(df)

senGov = pd.concat(dfStoreSG, axis = 0, ignore_index = True)


In [181]:
cols = ['Office','State','RaceDate','Area','RepVotes','RepCandidate',
       'RepStatus','DemVotes','DemCandidate','DemStatus',
       'PluralityVotes','PluralityParty','RepVotesMajorPercent',
       'DemVotesMajorPercent']

president = president[cols]
senGov = senGov[cols]

elections = pd.concat([president, senGov], axis = 0, ignore_index = True)

elections = elections.astype({"RaceDate":"str"})
elections['RaceDate'] = elections['RaceDate'].str[0:4]
elections = elections[elections['RaceDate'] != 'nan']
elections = elections.astype({"RaceDate":"int"})

elections = elections.rename(columns = str.lower)

In [182]:
#Sometimes it will be a 1 party race i.e. no dem/rep candidate. Replace with 0 instead of dropping.
#(Drop these rows later, just need them as non-null to correctly get vote %s of previous elections)
elections = elections[elections['demvotes'] != 'Unopposed'] # 1 case

elections['repvotes'] = elections['repvotes'].fillna("0") 
elections['repcandidate'] = elections['repcandidate'].fillna("0") 
elections['repstatus'] = elections['repstatus'].fillna("0") 

elections['demvotes'] = elections['demvotes'].fillna("0")
elections['demstatus'] = elections['demstatus'].fillna("0")
elections['demcandidate'] = elections['demcandidate'].fillna("0")

In [183]:
elections['repvotes'] = elections['repvotes'].str.replace(",","")
elections['demvotes'] = elections['demvotes'].str.replace(",","")
elections['pluralityvotes'] = elections['pluralityvotes'].str.replace(",","")

elections = elections.astype({"repvotes":"int","demvotes":"int",
                             "pluralityvotes":"int"})

In [184]:
#Make county name lowercase then capitalize first letter each word ie "los angeles" --> "Los Angeles"
#This is for name matching with other tables that will be joined
elections['area'] = elections['area'].str.lower()
elections['area'] = elections['area'].str.title()

In [185]:
#Rename some columns
elections = elections.rename(columns = {"area":"county", "racedate":"year", "pluralityparty":"winningParty"})

## Add previous election data in county / state

In [186]:
pres = elections[elections['office'] == 'President']
sen = elections[elections['office'] == 'Senate']
gov = elections[elections['office'] == 'Governor']

In [187]:
#LO = last outcome, LRP = Last Republican vote %, LDP = Last Democract vote %
presLO = np.zeros(shape = (len(pres),3) , dtype = 'object')
presLRP, presLDP = np.zeros(shape = (len(pres),3) , dtype = 'float'), np.zeros(shape = (len(pres),3) , dtype = 'float')

senLO = np.zeros(shape = (len(sen),3) , dtype = 'object')
senLRP, senLDP = np.zeros(shape = (len(sen),3), dtype = 'float'), np.zeros(shape = (len(sen),3), dtype = 'float')

govLO = np.zeros(shape = (len(gov),3) , dtype = 'object')
govLRP, govLDP = np.zeros(shape = (len(gov),3), dtype = 'float'), np.zeros(shape = (len(gov),3), dtype = 'float')

In [188]:
pres = pres.sort_values(by=['year'], ascending = False)
sen = sen.sort_values(by=['year'], ascending = False)
gov = gov.sort_values(by=['year'], ascending = False)

presE = pres[['year','state','county','winningParty','demvotesmajorpercent','repvotesmajorpercent']]
senE = sen[['year','state','county','winningParty','demvotesmajorpercent','repvotesmajorpercent']]
govE = gov[['year','state','county','winningParty','demvotesmajorpercent','repvotesmajorpercent']]

presL = presE.values.tolist()
senL = senE.values.tolist()
govL = govE.values.tolist()

In [189]:
#Make more efficient??

def findPrevData(dataList, df, prevOutcome, prevDemVotes, prevRepVotes, senFlag):
    '''Find the 3 previous county election data(Outcome, rep/dem votes%)'''
    for i in range(0, len(dataList)):
        
        year = dataList[i][0]
        state = dataList[i][1]
        county = dataList[i][2]
        
        if year >= 1990: #years of interest
        
            subDF = df[(df['county'] == county) & (df['state'] == state) & (df['year'] < year)]
            
            if senFlag == 0: #President/Governor election
                
                #last election
                if len(subDF) >= 1:
                    prevOutcome[i][0] = subDF.iloc[0]['winningParty']
                    prevDemVotes[i][0] = subDF.iloc[0]['demvotesmajorpercent']
                    prevRepVotes[i][0] = subDF.iloc[0]['repvotesmajorpercent']
                
                #2nd last election
                if len(subDF) >= 2:
                    prevOutcome[i][1] = subDF.iloc[1]['winningParty']
                    prevDemVotes[i][1] = subDF.iloc[1]['demvotesmajorpercent']
                    prevRepVotes[i][1] = subDF.iloc[1]['repvotesmajorpercent']
                
                #3rd last election
                if len(subDF) >= 3:
                    prevOutcome[i][2] = subDF.iloc[2]['winningParty']
                    prevDemVotes[i][2] = subDF.iloc[2]['demvotesmajorpercent']
                    prevRepVotes[i][2] = subDF.iloc[2]['repvotesmajorpercent']
                
            else: #Senate, make sure to use previous elections for the SAME seat(each state has 2 seats)
                
                #last elections
                if len(subDF) >= 2:
                    prevOutcome[i][0] = subDF.iloc[1]['winningParty']
                    prevDemVotes[i][0] = subDF.iloc[1]['demvotesmajorpercent']
                    prevRepVotes[i][0] = subDF.iloc[1]['repvotesmajorpercent']
                
                #2nd last election
                if len(subDF) >= 4:
                    prevOutcome[i][1] = subDF.iloc[3]['winningParty']
                    prevDemVotes[i][1] = subDF.iloc[3]['demvotesmajorpercent']
                    prevRepVotes[i][1] = subDF.iloc[3]['repvotesmajorpercent']
                
                #3rd last election
                if len(subDF) >= 6:
                    prevOutcome[i][2] = subDF.iloc[5]['winningParty']
                    prevDemVotes[i][2] = subDF.iloc[5]['demvotesmajorpercent']
                    prevRepVotes[i][2] = subDF.iloc[5]['repvotesmajorpercent']
        
    return (prevOutcome, prevDemVotes, prevRepVotes)
    
presLO, presLDP, presLRP = findPrevData(presL, presE, presLO, presLDP, presLRP, 0)
senLO, senLDP, senLRP = findPrevData(senL, senE, senLO, senLDP, senLRP, 1)
govLO, govLDP, govLRP = findPrevData(govL, govE, govLO, govLDP, govLRP, 0)

In [190]:
pres['LastPartyWon'], pres['LastDemVotePercent'], pres['LastRepVotePercent'] = presLO[:,0].tolist(), presLDP[:,0].tolist(), presLRP[:,0].tolist()
pres['LastPartyWon_2'], pres['LastDemVotePercent_2'], pres['LastRepVotePercent_2'] = presLO[:,1].tolist(), presLDP[:,1].tolist(), presLRP[:,1].tolist()
pres['LastPartyWon_3'], pres['LastDemVotePercent_3'], pres['LastRepVotePercent_3'] = presLO[:,2].tolist(), presLDP[:,2].tolist(), presLRP[:,2].tolist()

sen['LastPartyWon'], sen['LastDemVotePercent'], sen['LastRepVotePercent'] = senLO[:,0].tolist(), senLDP[:,0].tolist(), senLRP[:,0].tolist()
sen['LastPartyWon_2'], sen['LastDemVotePercent_2'], sen['LastRepVotePercent_2'] = senLO[:,1].tolist(), senLDP[:,1].tolist(), senLRP[:,1].tolist()
sen['LastPartyWon_3'], sen['LastDemVotePercent_3'], sen['LastRepVotePercent_3'] = senLO[:,2].tolist(), senLDP[:,2].tolist(), senLRP[:,2].tolist()


gov['LastPartyWon'], gov['LastDemVotePercent'], gov['LastRepVotePercent'] = govLO[:,0].tolist(), govLDP[:,0].tolist(), govLRP[:,0].tolist()
gov['LastPartyWon_2'], gov['LastDemVotePercent_2'], gov['LastRepVotePercent_2'] = govLO[:,1].tolist(), govLDP[:,1].tolist(), govLRP[:,1].tolist()
gov['LastPartyWon_3'], gov['LastDemVotePercent_3'], gov['LastRepVotePercent_3'] = govLO[:,2].tolist(), govLDP[:,2].tolist(), govLRP[:,2].tolist()


In [191]:
del pres['demvotesmajorpercent']
del sen['demvotesmajorpercent']
del gov['demvotesmajorpercent']

del pres['repvotesmajorpercent']
del sen['repvotesmajorpercent']
del gov['repvotesmajorpercent']

In [192]:
elections = pd.concat([pres,sen,gov])

In [193]:
#CHECK
elections = elections[elections['year'] >= 1990]

In [194]:
#elections[elections['LastPartyWon'] == 0]
#Remove rows that had data inconsistencies
elections = elections[elections['LastPartyWon'] != 0]
elections = elections[elections['LastRepVotePercent_3'] != 0]

In [195]:
elections = elections[(elections['winningParty'] == 'D') | (elections['winningParty'] == 'R')]

#Remove elections that were 1 party races i.e. 2014 Alabama senate election
elections = elections[(elections['demvotes']!= 0 ) & (elections['demstatus'] != '0') & (elections['demcandidate'] != '0')]
elections = elections[(elections['repvotes']!= 0 ) & (elections['repstatus'] != '0') & (elections['repcandidate'] != '0')]

In [196]:
# Add results of each election at state level each county row
testFrame = elections

df_new = testFrame.groupby(['state', 'office', 'year'])["repvotes", "demvotes"].sum()

In [197]:
df_new['StateResult'] = np.where(
    df_new['repvotes'] > df_new['demvotes'], 'R', 'D')

del df_new['repvotes']
del df_new['demvotes']

elections = pd.merge(elections, df_new, how = "inner",
                    on = ["state", "office", "year"])

In [198]:
# Add CountyStateRatio - Ratio of how many times county election outcome matches state election outcome
testFrame = elections[['state','county','winningParty','StateResult']]

# Set binary outcome column to 1 if county outcome matches state outcome, 0 if no match and take mean to get ratio 
testFrame['matchingOutcomes'] = np.where(
    testFrame['winningParty'] == testFrame['StateResult'], 1, 0)

In [199]:
del testFrame['winningParty']
del testFrame['StateResult']

df = testFrame.groupby(['state','county'], as_index = False)['matchingOutcomes'].mean()

In [200]:
elections = pd.merge(elections, df, how = "inner",
                    on = ["state","county"])

In [201]:
#Now add matching outcomes ratio for specific office type between county/state
pres = elections[elections['office'] == 'President']
sen = elections[elections['office'] == 'Senate']
gov = elections[elections['office'] == 'Governor']

presT = pres[['state','county','winningParty','StateResult']]
senT = sen[['state','county','winningParty','StateResult']]
govT = gov[['state','county','winningParty','StateResult']]

presT['officeMatchingOutcome'] = np.where(
    presT['winningParty'] == presT['StateResult'], 1, 0)

senT['officeMatchingOutcome'] = np.where(
    senT['winningParty'] == senT['StateResult'], 1, 0)

govT['officeMatchingOutcome'] = np.where(
    govT['winningParty'] == govT['StateResult'], 1, 0)

del presT['winningParty']
del presT['StateResult']
del senT['winningParty']
del senT['StateResult']
del govT['winningParty']
del govT['StateResult']

In [202]:
presDF = presT.groupby(['state','county'], as_index = False)['officeMatchingOutcome'].mean()
pres = pd.merge(pres, presDF, how = "inner",
               on = ['state','county'])

senDF = senT.groupby(['state','county'], as_index = False)['officeMatchingOutcome'].mean()
sen = pd.merge(sen, senDF, how = "inner",
               on = ['state','county'])

govDF = govT.groupby(['state','county'], as_index = False)['officeMatchingOutcome'].mean()
gov = pd.merge(gov, govDF, how = "inner",
               on = ['state','county'])

elections = pd.concat([pres,sen,gov])

## Add 2020 Presidential Polling Data (State level)

In [203]:
#df is the combined data before adding polling data
df = pd.read_csv("C:\\Users\\Roy\\Desktop\\Project\\combinedData.csv")
pres2020 = pd.read_csv("C:\\Users\\Roy\\Desktop\\Project\\StatePollingData\\2020Pres538PollAvgs.csv")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [204]:
#Only the latest poll values since polls closer to election are better than polls
#months before elections due to undecided voters making their mind

pres2020 = pres2020.loc[pres2020['modeldate'] == '11/3/2020']
del pres2020['pct_estimate']

In [205]:
pres2020D = pres2020[pres2020['candidate_name'] == 'Joseph R. Biden Jr.']
pres2020R = pres2020[pres2020['candidate_name'] == 'Donald Trump']

In [206]:
del pres2020D['candidate_name']
del pres2020D['modeldate']

del pres2020R['candidate_name']
del pres2020R['modeldate']

In [207]:
pres2020D = pres2020D.rename(columns={"pct_trend_adjusted": "DemPoll", "cycle": "year"})
pres2020R = pres2020R.rename(columns={"pct_trend_adjusted": "RepPoll", "cycle": "year"})

In [208]:
poll2020Pres = pd.merge(pres2020D, pres2020R, how = "inner",
                        on = ["state", "year"])

poll2020Pres = poll2020Pres[['DemPoll','RepPoll','state','year']]

#For joining purposes
poll2020Pres['office'] = 'President'

## Add 1992 - 2016 Presidential Poll Data (State level) (FiveThirtyEight)

In [209]:
#pres contains FiveThirtyEight's weighted polling averages of it's collected polls for presidential
#elections from 1968-2016

pres = pd.read_csv("C:\\Users\\Roy\\Desktop\\Project\\StatePollingData\\1968to2016Pres538PollAvgs.csv",
                  usecols = [0,1,2,3,6])

electionDates = ["11/8/2016","11/6/2012","11/4/2008","11/2/2004",
                 "11/7/2000","11/5/1996","11/3/1992"]

In [210]:
pres = pres.loc[pres['modeldate'].isin(electionDates)]

In [211]:
demCandidates = ['Hillary Rodham Clinton','Barack Obama',
                 'John Kerry','Al Gore','Bill Clinton']

presD = pres.loc[pres['candidate_name'].isin(demCandidates)]

repCandidates = ['Donald Trump','Mitt Romney', 'John McCain', 
                 'George W. Bush', 'George Bush', 'Bob Dole']

presR = pres.loc[pres['candidate_name'].isin(repCandidates)]

In [212]:
del presR["modeldate"]
del presR["candidate_name"]
del presD["modeldate"]
del presD["candidate_name"]

In [213]:
presD = presD.rename(columns={"pct_trend_adjusted": "DemPoll", "cycle": "year"})
presR = presR.rename(columns={"pct_trend_adjusted": "RepPoll", "cycle": "year"})

In [214]:
pollPres = pd.merge(presD, presR, how = "inner",
                        on = ["state", "year"])

pollPres = pollPres[['DemPoll','RepPoll','state','year']]
pollPres['office'] = 'President'

In [215]:
allPresPolls = pd.concat([poll2020Pres, pollPres], ignore_index = True)

## Add Senate/Governor polling (Roper Center)

In [216]:
#Needed because some years use state number instead of state name
stateOrder = {
    1:"Alabama",
    2:"Alaska",
    3:"Arizona",
    4:"Arkansas",
    5:"California",
    6:"Colorado",
    7:"Connecticut",
    8:"Delaware",
    9:"Florida",
    10:"Georgia",
    11:"Hawaii",
    12:"Idaho",
    13:"Illinois",
    14:"Indiana",
    15:"Iowa",
    16:"Kansas",
    17:"Kentucky",
    18:"Louisiana",
    19:"Maine",
    20:"Maryland",
    21:"Massachusetts",
    22:"Michigan",
    23:"Minnesota",
    24:"Mississippi",
    25:"Missouri",
    26:"Montana",
    27:"Nebraska",
    28:"Nevada",
    29:"New Hampshire",
    30:"New Jersey",
    31:"New Mexico",
    32:"New York",
    33:"North Carolina",
    34:"North Dakota",
    35:"Ohio",
    36:"Oklahoma",
    37:"Oregon",
    38:"Pennsylvania",
    39:"Rhode Island",
    40:"South Carolina",
    41:"South Dakota",
    42:"Tennessee",
    43:"Texas",
    44:"Utah",
    45:"Vermont",
    46:"Virginia",
    47:"Washington",
    48:"West Virginia",
    49:"Wisconsin",
    50:"Wyoming",
    51:"Washington DC"
}

In [217]:
path = r'C:\Users\Roy\Desktop\Project\StatePollingData\SenGovPolls'
all_files = glob.glob(path +  "/*.csv")

dfStore = []

cols = ["year", "STATE", "SEN", "GOV"]

for fileName in all_files:
    df = pd.read_csv(fileName)
    dfStore.append(df)
    
senGovPolls = pd.concat(dfStore, axis = 0, ignore_index = True)

In [218]:
senGovPolls = senGovPolls.replace({'STATE':stateOrder})
senGovPolls['STATE'] = senGovPolls['STATE'].str.lstrip()
senGovPolls['STATE'] = senGovPolls['STATE'].str.lower()
senGovPolls['STATE'] = senGovPolls['STATE'].str.title()

In [219]:
#Weird issue with 2010 data file
senGovPolls = senGovPolls[senGovPolls['GOV'] != 'Did not vote for U.S. House']
senGovPolls = senGovPolls[senGovPolls['SEN'] != 'Did not vote for U.S. House']

In [220]:
senDict = {'DEMOCRAT':'D','REPUBLICAN':'R',
           'OTHER':"Other",'OTHER PARTY':"Other", "DID NOT VOTE":"Other", 'INDEPENDENT':"Other",
           'INDEPENDENT_duplicated_4':"Other", "REPUBLICAN (ONLY LOUSIANA)_duplicated_7":"R",
           'REPUBLICAN (ONLY LOUSIANA)':"R", 'The Democratic candidate':"D",
           'The Republican candidate':"R", 'Did not vote for U.S. senator':"Other", "Did not vote":"Other"}

govDict ={'DEMOCRAT':"D",'REPUBLICAN':"R", "OTHER":"Other",'OTHER PARTY':"Other","DID NOT VOTE":"Other",
         "INDEPENDENT":"Other",'INDEPENDENT_duplicated_4':"Other",'The Democratic candidate':"D",
         "The Republican candidate":"R",'Did not vote for governor':"Other","Did not vote":"Other"}

In [221]:
senGovPolls = senGovPolls.replace({"SEN":senDict})
senGovPolls = senGovPolls.replace({"GOV":govDict})

In [222]:
senPolls = senGovPolls[['year','STATE','SEN']]
govPolls = senGovPolls[['year','STATE','GOV']]

In [223]:
senPolls = senPolls.dropna()
govPolls = govPolls.dropna()

In [224]:
# FIX SENATE POLLS
senPollsD = senPolls[senPolls['SEN'] == 'D']
senPollsR = senPolls[senPolls['SEN'] == 'R']
senPollsOther = senPolls[senPolls['SEN'] == 'Other']

senPollsD = senPollsD.groupby(['year','STATE'], as_index = False)['SEN'].count()
senPollsD = senPollsD.rename(columns = {'SEN' : 'DemCount'})

senPollsR = senPollsR.groupby(['year','STATE'], as_index = False)['SEN'].count()
senPollsR = senPollsR.rename(columns = {'SEN' : 'RepCount'})

senPollsOther = senPollsOther.groupby(['year','STATE'], as_index = False)['SEN'].count()
senPollsOther = senPollsOther.rename(columns = {'SEN' : 'OtherCount'})

In [225]:
senPolls = pd.merge(senPollsD, senPollsR, how = "inner",
             on = ['year','STATE'])

senPolls = pd.merge(senPolls, senPollsOther, how = "left",
             on = ['year','STATE'])

In [226]:
senPolls['OtherCount'] = senPolls['OtherCount'].fillna(0)

senPolls['DemPoll'] = (senPolls['DemCount'] / (senPolls['DemCount'] + senPolls['RepCount'] + senPolls['OtherCount']))*100
senPolls['RepPoll'] = (senPolls['RepCount'] / (senPolls['DemCount'] + senPolls['RepCount'] + senPolls['OtherCount']))*100

In [227]:
senPolls = senPolls.rename(columns = {'STATE':"state"})
senPolls = senPolls[['year','state','DemPoll','RepPoll']]
senPolls['office'] = 'Senate'

In [228]:
# FIX GOVERNOR POLLS
govPollsD = govPolls[govPolls['GOV'] == 'D']
govPollsR = govPolls[govPolls['GOV'] == 'R']
govPollsOther = govPolls[govPolls['GOV'] == 'Other']

govPollsD = govPollsD.groupby(['year','STATE'], as_index = False)['GOV'].count()
govPollsD = govPollsD.rename(columns = {'GOV':'DemCount'})

govPollsR = govPollsR.groupby(['year','STATE'], as_index = False)['GOV'].count()
govPollsR = govPollsR.rename(columns = {'GOV':'RepCount'})

govPollsOther = govPollsOther.groupby(['year','STATE'], as_index = False)['GOV'].count()
govPollsOther = govPollsOther.rename(columns = {'GOV':'OtherCount'})

In [229]:
govPolls = pd.merge(govPollsD, govPollsR, how = "inner",
             on = ['year','STATE'])

govPolls = pd.merge(govPolls, govPollsOther, how = "left",
             on = ['year','STATE'])

In [230]:
govPolls['OtherCount'] = govPolls['OtherCount'].fillna(0)

govPolls['DemPoll'] = (govPolls['DemCount'] / (govPolls['DemCount'] + govPolls['RepCount'] + govPolls['OtherCount']))*100
govPolls['RepPoll'] = (govPolls['RepCount'] / (govPolls['DemCount'] + govPolls['RepCount'] + govPolls['OtherCount']))*100

In [231]:
govPolls = govPolls.rename(columns = {"STATE":"state"})
govPolls = govPolls[['year','state','DemPoll','RepPoll']]
govPolls['office'] = 'Governor'

In [232]:
senPolls = senPolls[['DemPoll','RepPoll','state','year','office']]
govPolls = govPolls[['DemPoll','RepPoll','state','year','office']]

In [233]:
senGovPolls = pd.concat([senPolls, govPolls], ignore_index = True)
polls = pd.concat([allPresPolls, senGovPolls], ignore_index = True)

In [234]:
elections = pd.merge(elections, polls, how = "left",
             on = ['state','year','office'])

## Substitute Missing Poll Data

In [235]:
# Drop rows where with no poll data at all (Not included in 538 president as well)
# Wyoming 2004&2012, Delaware 2012, Mississippi 2012, Alaska 2012
elections = elections.drop(elections[(elections['state'] == 'Mississippi') & (elections['year'] == 2012)].index)
elections = elections.drop(elections[(elections['state'] == 'Alaska') & (elections['year'] == 2012)].index)
elections = elections.drop(elections[(elections['state'] == 'Delaware') & (elections['year'] == 2012)].index)
elections = elections.drop(elections[(elections['state'] == 'Wyoming') & (elections['year'] == 2012)].index)
elections = elections.drop(elections[(elections['state'] == 'Wyoming') & (elections['year'] == 2004)].index)

In [236]:
noPolls = elections[elections['DemPoll'].isna()]
#noPolls = noPolls[(noPolls['year'] != 2012) & (noPolls['year'] != 2008) & (noPolls['year'] != 2004)]

np = noPolls[['DemPoll','RepPoll','state','year','office']]

In [237]:
np = np.drop_duplicates()
poll2 = allPresPolls.copy()
del poll2['office']
poll2 = poll2.drop_duplicates()

In [238]:
np = np.values.tolist()
poll2 = poll2.values.tolist()

In [239]:
for i in range(0,len(np)):
    
    state = np[i][2]
    year = np[i][3]
    
    if year == 1990 or year == 1991: #Edge case
        desYr = 1992
        DemPoll, RepPoll = 0,0
        
        for j in range(0,len(poll2)):
            
            if state == poll2[j][2] and desYr == poll2[j][3]: #If no 1992 data
                
                DemPoll = poll2[j][0]
                RepPoll = poll2[j][1]
                
        np[i][0] = DemPoll
        np[i][1] = RepPoll
        
    else:
        rem = year % 4
    
        lowerYr = year - rem
        upperYr = year - rem + 4
    
        lowerWeight = 1 - (0.25*rem)
        upperWeight = 1 - lowerWeight
    
        DemPollLow,DemPollHigh,RepPollLow,RepPollHigh = 0,0,0,0
    
        for j in range(0,len(poll2)):
        
            if state == poll2[j][2] and lowerYr == poll2[j][3]:
            
                DemPollLow = lowerWeight * poll2[j][0]
                RepPollLow = lowerWeight * poll2[j][1]
        
            if state == poll2[j][2] and upperYr == poll2[j][3]:
            
                DemPollHigh = upperWeight * poll2[j][0]
                RepPollHigh = upperWeight * poll2[j][1]
    
        np[i][0] = DemPollLow + DemPollHigh
        np[i][1] = RepPollLow + RepPollHigh
    

In [240]:
fixedPolls = pd.DataFrame(np, columns = ['DemPoll','RepPoll','state','year','office'])

In [241]:
del noPolls['DemPoll']
del noPolls['RepPoll']

In [242]:
fixNoPolls = noPolls.merge(fixedPolls, how = "inner", on = ['state','year','office'])

In [243]:
elections = elections.dropna()

In [244]:
#elections

In [245]:
elections = pd.concat([elections, fixNoPolls], ignore_index = True)

In [246]:
del elections['repvotes']
del elections['demvotes']
del elections['pluralityvotes']

In [247]:
elections = elections.sort_values(by=['year'], ascending = False)
elections.to_csv('electionsFixed.csv', encoding = 'utf-8', index = False)