In [1]:
import pandas as pd

In [2]:
gdp_df = pd.read_csv("cleaned_gbp.csv")
gdp_df = gdp_df.drop(['Unnamed: 0','Description', 'GDP_2015'],axis=1)
gdp_df = gdp_df.rename(columns = {'State_Name':'state', 
                                  'GDP_2016':'2016', 
                                  'GDP_2017':'2017',
                                  'GDP_2018':'2018', 
                                  'GDP_2019':'2019'})
gdp_df.head()

Unnamed: 0,state,2016,2017,2018,2019
0,Alabama,191523,193693,198053,200829
1,Alaska,53289,52825,52928,53255
2,Arizona,291259,302117,314016,323597
3,Arkansas,113490,114950,116698,117447
4,California,2519133,2628314,2708966,2800505


In [3]:
gdp_df_melted = pd.melt(gdp_df, id_vars=["state"], value_vars=["2016", "2017", "2018", "2019"])
gdp_df_melted.set_index("state", inplace = True)
gdp_df_melted = gdp_df_melted.rename(columns = {'variable':'year', 
                                                'value':'gdp'})
#gdp_df_melted['year']=pd.to_datetime(gdp_df_melted['year'])
print(gdp_df_melted)

               year      gdp
state                       
Alabama        2016   191523
Alaska         2016    53289
Arizona        2016   291259
Arkansas       2016   113490
California     2016  2519133
...             ...      ...
Virginia       2019   489168
Washington     2019   548686
West Virginia  2019    72340
Wisconsin      2019   308044
Wyoming        2019    39214

[200 rows x 2 columns]


In [4]:
flu_percentages_df = pd.read_csv("flu_percentages.csv")
flu_percentages_df = flu_percentages_df.rename(columns = {'2016_cases_percent':'2016', 
                                                          '2017_cases_percent':'2017', 
                                                          '2018_cases_percent':'2018', 
                                                          '2019_cases_percent':'2019'})
flu_percentages_df.head()

Unnamed: 0,state,2016,2017,2018,2019
0,Alabama,0.2374,0.551402,1.309394,1.40162
1,Alaska,0.143367,0.446938,1.136112,1.127613
2,Arizona,0.322227,0.320031,0.343307,0.281698
3,Arkansas,0.096424,0.174888,0.193738,0.196799
4,California,0.112745,0.117121,0.132161,0.132136


In [5]:
flu_percentages_melted = pd.melt(flu_percentages_df, id_vars=["state"], value_vars=["2016", "2017", "2018", "2019"])
flu_percentages_melted.set_index("state", inplace = True)
flu_percentages_melted = flu_percentages_melted.rename(columns = {'variable':'year', 
                                                          'value':'flu_per'})
#flu_percentages_melted['year']=pd.to_datetime(flu_percentages_melted['year'])
print(flu_percentages_melted)

               year   flu_per
state                        
Alabama        2016  0.237400
Alaska         2016  0.143367
Arizona        2016  0.322227
Arkansas       2016  0.096424
California     2016  0.112745
...             ...       ...
Virginia       2019  1.670068
Washington     2019  0.142418
West Virginia  2019  0.647213
Wisconsin      2019  0.305250
Wyoming        2019  0.542540

[200 rows x 2 columns]


In [6]:
flu_percentages_melted.dtypes

year        object
flu_per    float64
dtype: object

In [7]:
population_density_df = pd.read_csv("Final_Population_Density_Cleaned.csv")
population_density_df = population_density_df.drop(['Land Area (sq mi)'],axis=1)
population_density_df = population_density_df.rename(columns = {'State':'state', 
                                                                '2016 Population Density (persons/square mile)':'2016', 
                                                                '2017 Population Density (persons/square mile)':'2017',
                                                                '2018 Population Density (persons/square mile)':'2018', 
                                                                '2019 Population Density (persons/square mile)':'2019'})
population_density_df.head()

Unnamed: 0,state,2016,2017,2018,2019
0,Alabama,96.03,96.25,96.51,96.81
1,Alaska,1.3,1.3,1.29,1.28
2,Arizona,61.1,62.01,63.01,64.08
3,Arkansas,57.46,57.68,57.84,58.0
4,California,251.43,252.66,253.32,253.64


In [8]:
state_list = population_density_df.state.values.tolist()

In [9]:
population_density_melted = pd.melt(population_density_df, id_vars=["state"], value_vars=["2016", "2017", "2018", "2019"])
population_density_melted = population_density_melted.rename(columns = {'variable':'year', 
                                                                        'value':'pop_dens'})
population_density_melted.drop(population_density_melted.index[population_density_melted['state'] == "District Of Columbia"], axis=0, inplace= True)
population_density_melted.set_index("state", inplace = True)
#population_density_melted['year']=pd.to_datetime(population_density_melted['year'])
print(population_density_melted)

               year  pop_dens
state                        
Alabama        2016     96.03
Alaska         2016      1.30
Arizona        2016     61.10
Arkansas       2016     57.46
California     2016    251.43
...             ...       ...
Virginia       2019    216.14
Washington     2019    114.59
West Virginia  2019     74.55
Wisconsin      2019    107.51
Wyoming        2019      5.96

[200 rows x 2 columns]


In [10]:
population_density_melted.dtypes

year         object
pop_dens    float64
dtype: object

In [11]:
pci_df = pd.read_csv("pci_by_state_cleaned.csv") 
pci_df = pci_df.assign(state = state_list)
pci_df = pci_df[['state','2016','2017','2018', '2019']]
#pci_df = pci_df.drop(['Unnamed: 0'],axis=1)
pci_df.head()

Unnamed: 0,state,2016,2017,2018,2019
0,Alabama,39536.0,41030.0,42710.0,44145.0
1,Alaska,56302.0,57394.0,60355.0,62806.0
2,Arizona,40801.0,42590.0,44597.0,46058.0
3,Arkansas,40385.0,41657.0,43325.0,44629.0
4,California,58048.0,60549.0,63720.0,66619.0


In [12]:
pci_melted = pd.melt(pci_df, id_vars=['state'], value_vars=["2016", "2017", "2018", "2019"])
pci_melted = pci_melted.rename(columns = {'variable':'year', 
                                          'value':'pci'})
pci_melted.drop(pci_melted.index[pci_melted['state'] == "District Of Columbia"], axis=0, inplace= True)
pci_melted.set_index("state", inplace = True)
#pci_melted['year']=pd.to_datetime(pci_melted['year'])
print(pci_melted)

               year      pci
state                       
Alabama        2016  39536.0
Alaska         2016  56302.0
Arizona        2016  40801.0
Arkansas       2016  40385.0
California     2016  58048.0
...             ...      ...
Virginia       2019  59657.0
Washington     2019  64758.0
West Virginia  2019  42315.0
Wisconsin      2019  53227.0
Wyoming        2019  62189.0

[200 rows x 2 columns]


In [13]:
pci_melted.dtypes

year     object
pci     float64
dtype: object

In [14]:
uninsured_rate_df = pd.read_csv("States_Uninsured_Rates.csv")
uninsured_rate_df = uninsured_rate_df.rename(columns = {'State':'state', 
                                                                'Uninsured Rate 2016':'2016', 
                                                                'Uninsured Rate 2017':'2017',
                                                                'Uninsured Rate 2018':'2018', 
                                                                'Uninsured Rate 2019':'2019'})
uninsured_rate_df.head()

Unnamed: 0,state,2016,2017,2018,2019
0,Alabama,9.1,9.4,10.0,9.7
1,Alaska,14.0,13.7,12.6,12.2
2,Arizona,10.0,10.1,10.6,11.3
3,Arkansas,7.9,7.9,8.2,9.1
4,California,7.3,7.2,7.2,7.7


In [15]:
uninsured_rate_melted = pd.melt(uninsured_rate_df, id_vars=['state'], value_vars=["2016", "2017", "2018", "2019"])
uninsured_rate_melted = uninsured_rate_melted.rename(columns = {'variable':'year', 
                                                                'value':'uni_per'})
uninsured_rate_melted.drop(uninsured_rate_melted.index[uninsured_rate_melted['state'] == "District Of Columbia"], axis=0, inplace= True)
uninsured_rate_melted.set_index("state", inplace = True)
#uninsured_rate_melted['year']=pd.to_datetime(uninsured_rate_melted['year'])
print(uninsured_rate_melted)

               year  uni_per
state                       
Alabama        2016      9.1
Alaska         2016     14.0
Arizona        2016     10.0
Arkansas       2016      7.9
California     2016      7.3
...             ...      ...
Virginia       2019      7.9
Washington     2019      6.6
West Virginia  2019      6.7
Wisconsin      2019      5.7
Wyoming        2019     12.3

[200 rows x 2 columns]


In [16]:
uninsured_rate_melted.dtypes

year        object
uni_per    float64
dtype: object

In [17]:
gdp_df_melted.to_csv(r'gdp_melted.csv')
flu_percentages_melted.to_csv(r'flu_percentages_melted.csv')
population_density_melted.to_csv(r'population_density_melted.csv')
pci_melted.to_csv(r'pci_melted.csv')
uninsured_rate_melted.to_csv(r'uninsured_rate_melted.csv')