In [292]:
import pandas as pd
import numpy as np
import datetime
import glob

UR_PATH = '../raw_data/unemployment'
SAVE_PATH = '../processed_data/unemp.csv'

In [293]:
# Read all unemployment data for all states and concatenate
dfs = []
df = ''
for filename in glob.glob(UR_PATH + '/*.csv'):
    state_po = filename.split('/')[-1][:2]
    df = pd.read_csv(filename)
    df.rename(columns={state_po + 'UR': 'unemp_rate'}, inplace=True)
    df['state_po'] = state_po
    dfs.append(df)

all_unemp = pd.concat(dfs)
df

Unnamed: 0,DATE,unemp_rate,state_po
0,1976-01-01,4.1,WY
1,1976-02-01,4.1,WY
2,1976-03-01,4.2,WY
3,1976-04-01,4.1,WY
4,1976-05-01,4.1,WY
...,...,...,...
530,2020-03-01,3.8,WY
531,2020-04-01,9.6,WY
532,2020-05-01,8.8,WY
533,2020-06-01,7.6,WY


In [294]:
all_unemp

Unnamed: 0,DATE,unemp_rate,state_po
0,1976-01-01,7.1,AK
1,1976-02-01,7.1,AK
2,1976-03-01,7.0,AK
3,1976-04-01,7.0,AK
4,1976-05-01,7.0,AK
...,...,...,...
530,2020-03-01,3.8,WY
531,2020-04-01,9.6,WY
532,2020-05-01,8.8,WY
533,2020-06-01,7.6,WY


In [295]:
# Only include data from October of each year (1 mo before election)
dti = pd.date_range(start='1976-10-01', end='2016-10-01', freq='48MS')
dti = [str(x).split(' ')[0] for x in dti]
all_unemp[all_unemp['DATE'].isin(dti)]

Unnamed: 0,DATE,unemp_rate,state_po
9,1976-10-01,8.1,AK
57,1980-10-01,9.8,AK
105,1984-10-01,9.5,AK
153,1988-10-01,8.5,AK
201,1992-10-01,8.6,AK
...,...,...,...
297,2000-10-01,3.9,WY
345,2004-10-01,3.8,WY
393,2008-10-01,3.3,WY
441,2012-10-01,5.3,WY


In [296]:
# Strip date to year
years = [str(x).split('-')[0] for x in all_unemp['DATE']]
# np.array(years)

In [297]:
all_unemp['DATE'] = years
all_unemp

Unnamed: 0,DATE,unemp_rate,state_po
0,1976,7.1,AK
1,1976,7.1,AK
2,1976,7.0,AK
3,1976,7.0,AK
4,1976,7.0,AK
...,...,...,...
530,2020,3.8,WY
531,2020,9.6,WY
532,2020,8.8,WY
533,2020,7.6,WY


In [298]:
all_unemp.rename(columns={'DATE': 'year'})

Unnamed: 0,year,unemp_rate,state_po
0,1976,7.1,AK
1,1976,7.1,AK
2,1976,7.0,AK
3,1976,7.0,AK
4,1976,7.0,AK
...,...,...,...
530,2020,3.8,WY
531,2020,9.6,WY
532,2020,8.8,WY
533,2020,7.6,WY


In [299]:
all_unemp.reset_index(drop=True)

Unnamed: 0,DATE,unemp_rate,state_po
0,1976,7.1,AK
1,1976,7.1,AK
2,1976,7.0,AK
3,1976,7.0,AK
4,1976,7.0,AK
...,...,...,...
27280,2020,3.8,WY
27281,2020,9.6,WY
27282,2020,8.8,WY
27283,2020,7.6,WY


In [300]:
all_unemp.rename(columns={'UR': 'unemp_rate', 'DATE':'year'}, inplace=True)
all_unemp = all_unemp[['state_po', 'year', 'unemp_rate']]
all_unemp.to_csv(SAVE_PATH, index=False)

In [301]:
pd.read_csv(SAVE_PATH)

Unnamed: 0,state_po,year,unemp_rate
0,AK,1976,7.1
1,AK,1976,7.1
2,AK,1976,7.0
3,AK,1976,7.0
4,AK,1976,7.0
...,...,...,...
27280,WY,2020,3.8
27281,WY,2020,9.6
27282,WY,2020,8.8
27283,WY,2020,7.6
