In [1]:
# Initial imports

import os
import requests
import numpy as np
import pandas as pd
from pathlib import Path


In [2]:
# Read in Corporate Tax data

corporate_tax_path = Path("../Resources/Corporate_Tax_State.csv")

corporate_tax_df = pd.read_csv(corporate_tax_path)

corporate_tax_df.replace(np.nan, 0, inplace=True)

corporate_tax_df.head()

Unnamed: 0,State,corpTaxRate
0,Iowa,0.12
1,New Jersey,0.1005
2,Pennsylvania,0.0999
3,Minnesota,0.098
4,Illinois,0.095


In [3]:
# Assign Rank Weight to tax rates.  Lower is better (better rank), higher is worse (lower rank)

corporate_tax_df['default_rank'] = corporate_tax_df['corpTaxRate'].rank(ascending=False)

corporate_tax_df.set_axis(['State','Tax Rate','Tax Rank Weight'],axis=1, inplace=True)

corporate_tax_df.tail()

Unnamed: 0,State,Tax Rate,Tax Rank Weight
45,Ohio,0.0,47.5
46,South Dakota,0.0,47.5
47,Texas,0.0,47.5
48,Washington,0.0,47.5
49,Wyoming,0.0,47.5


In [4]:
# Read in Census Data (.csv)

census_csv_path = Path("../Resources/2019_Census_US_Population_Data_By_State_Lat_Long.csv")

census_df = pd.read_csv(census_csv_path)

census_df.set_index('STATE', inplace=True)

census_df.drop(columns=['lat','long'],axis=1,inplace=True)

census_df.drop('District of Columbia', inplace=True)

census_df['default_rank'] = census_df['POPESTIMATE2019'].rank(ascending=True)

census_df.set_axis(['Population','Population Rank Weight'],axis=1, inplace=True)

census_df.head()

Unnamed: 0_level_0,Population,Population Rank Weight
STATE,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,4903185,27.0
Alaska,731545,3.0
Arizona,7278717,37.0
Arkansas,3017804,18.0
California,39512223,50.0


In [5]:
# Read in number of business data (.csv)

number_csv_path = Path("../Resources/Businesses_by_State - Sheet1.csv")

number_of_total_businesses_df = pd.read_csv(number_csv_path)

number_of_total_businesses_df.replace(',','', regex=True, inplace=True)

number_of_total_businesses_df = number_of_total_businesses_df.astype({ 'Number of Businesses':'int'})

number_of_total_businesses_df.set_index('State', inplace=True)

number_of_total_businesses_df.drop(['District of Columbia','Puerto Rico','Virgin Islands'], inplace=True)

number_of_total_businesses_df['default_rank'] = number_of_total_businesses_df['Number of Businesses'].rank(ascending=True)

number_of_total_businesses_df.set_axis(['Number of Businesses','Business Total Rank Weight'],axis=1, inplace=True)

number_of_total_businesses_df.head(50)

Unnamed: 0_level_0,Number of Businesses,Business Total Rank Weight
State,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,220365,24.0
Alaska,46179,3.0
Arizona,380459,37.0
Arkansas,131901,17.0
California,2045079,50.0
Colorado,375152,36.0
Connecticut,226801,25.0
Delaware,60444,8.0
Florida,1672716,49.0
Georgia,607808,45.0


In [6]:
# Read in average income per state data (.csv)

income_csv_path = Path("../Resources/Average Income Per State/average_income_by_state.csv")

income_df_2021 = pd.read_csv(income_csv_path)

income_df_2021.drop([0,9,52,53,54,55,56,57,58,59], inplace=True)

income_df_2021= pd.pivot( income_df_2021,  columns = 'Name',  index = ["2017",'2018','2019','2020','2021'])

income_df_2021 = income_df_2021.T

income_df_2021_change = income_df_2021.pct_change().dropna()

income_df_2021_std = income_df_2021_change.std()

income_df_2021_std


2017   2018   2019   2020   2021 
36907  37927  39184  42270  45438   NaN
39169  41300  42378  44868  47817   NaN
39437  41145  42907  46092  49320   NaN
40252  41607  43157  45524  48608   NaN
40770  41977  43629  47058  50699   NaN
41560  43356  45261  48208  51379   NaN
42056  43661  45037  47522  51148   NaN
42819  44326  46173  48838  52074   NaN
43301  45299  47242  51332  54301   NaN
43400  46002  48670  51698  55392   NaN
44074  46062  47606  50810  54435   NaN
44444  46474  48524  50114  53156   NaN
44529  46272  48261  50996  55043   NaN
44969  47006  48781  50801  54873   NaN
44996  46824  48697  51704  55289   NaN
45116  47100  48757  51691  56153   NaN
45376  47230  48922  51673  55159   NaN
45648  47314  48820  52724  55551   NaN
46101  47808  49851  53085  56672   NaN
46421  48220  49986  53728  57159   NaN
46711  48438  49809  53198  56483   NaN
47286  49212  50149  53057  56973   NaN
47542  49737  51785  54137  58233   NaN
47983  50536  52464  56311  60676   NaN
48741 

In [7]:
# Read in new business formation data (.csv); also parse dates and use dates as index

alabama_csv_path = Path("../Resources/New Business Formation by State/Alabama_NAICS.csv")

alabama_df = pd.read_csv(alabama_csv_path, parse_dates=['Period'], index_col=['Period'])

alabama_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,3043
2017-02-01,3060
2017-03-01,2990
2017-04-01,2983
2017-05-01,3021


In [8]:
# Read in new business formation data (.csv); also parse dates and use dates as index

alaska_csv_path = Path("../Resources/New Business Formation by State/Alaska_NAICS.csv")

alaska_df = pd.read_csv(alaska_csv_path, parse_dates=['Period'], index_col=['Period'])

alaska_df.dropna(inplace=True)

alaska_df = alaska_df.astype({'Value':'object'})

alaska_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,535.0
2017-02-01,527.0
2017-03-01,530.0
2017-04-01,584.0
2017-05-01,634.0


In [9]:
# Read in new business formation data (.csv); also parse dates and use dates as index

arizona_csv_path = Path("../Resources/New Business Formation by State/Arizona_NAICS.csv")

arizona_df = pd.read_csv(arizona_csv_path, parse_dates=['Period'], index_col=['Period'])

arizona_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,5542
2017-02-01,5431
2017-03-01,5667
2017-04-01,5575
2017-05-01,5722


In [10]:
# Read in new business formation data (.csv); also parse dates and use dates as index

arkansas_csv_path = Path("../Resources/New Business Formation by State/Arkansas_NAICS.csv")

arkansas_df = pd.read_csv(arkansas_csv_path, parse_dates=['Period'], index_col=['Period'])

arkansas_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1870
2017-02-01,1836
2017-03-01,1895
2017-04-01,1852
2017-05-01,1909


In [11]:
# Read in new business formation data (.csv); also parse dates and use dates as index

california_csv_path = Path("../Resources/New Business Formation by State/California_NAICS.csv")

california_df = pd.read_csv(california_csv_path, parse_dates=['Period'], index_col=['Period'])

california_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,26866
2017-02-01,26933
2017-03-01,27153
2017-04-01,27456
2017-05-01,26278


In [12]:
# Read in new business formation data (.csv); also parse dates and use dates as index

colorado_csv_path = Path("../Resources/New Business Formation by State/Colorado_NAICS.csv")

colorado_df = pd.read_csv(colorado_csv_path, parse_dates=['Period'], index_col=['Period'])

colorado_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,6448
2017-02-01,6504
2017-03-01,6538
2017-04-01,6614
2017-05-01,6693


In [13]:
# Read in new business formation data (.csv); also parse dates and use dates as index

connecticut_csv_path = Path("../Resources/New Business Formation by State/Connecticut_NAICS.csv")

connecticut_df = pd.read_csv(connecticut_csv_path, parse_dates=['Period'], index_col=['Period'])

connecticut_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,2525
2017-02-01,2421
2017-03-01,2413
2017-04-01,2431
2017-05-01,2458


In [14]:
# Read in new business formation data (.csv); also parse dates and use dates as index

delaware_csv_path = Path("../Resources/New Business Formation by State/Delaware_NAICS.csv")

delaware_df = pd.read_csv(delaware_csv_path, parse_dates=['Period'], index_col=['Period'])

delaware_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1892
2017-02-01,1799
2017-03-01,1916
2017-04-01,1806
2017-05-01,1873


In [15]:
# Read in new business formation data (.csv); also parse dates and use dates as index

district_of_columbia_csv_path = Path("../Resources/New Business Formation by State/District_of_Columbia_NAICS.csv")

district_of_columbia_df = pd.read_csv(district_of_columbia_csv_path, parse_dates=['Period'], index_col=['Period'])

district_of_columbia_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,860
2017-02-01,864
2017-03-01,898
2017-04-01,841
2017-05-01,898


In [16]:
# Read in new business formation data (.csv); also parse dates and use dates as index

florida_csv_path = Path("../Resources/New Business Formation by State/Florida_NAICS.csv")

florida_df = pd.read_csv(florida_csv_path, parse_dates=['Period'], index_col=['Period'])

florida_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,32748
2017-02-01,29343
2017-03-01,30842
2017-04-01,30589
2017-05-01,29080


In [17]:
# Read in new business formation data (.csv); also parse dates and use dates as index

georgia_csv_path = Path("../Resources/New Business Formation by State/Georgia_NAICS.csv")

georgia_df = pd.read_csv(georgia_csv_path, parse_dates=['Period'], index_col=['Period'])

georgia_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,11584
2017-02-01,11935
2017-03-01,11700
2017-04-01,11929
2017-05-01,11711


In [18]:
# Read in new business formation data (.csv); also parse dates and use dates as index

hawaii_csv_path = Path("../Resources/New Business Formation by State/Hawaii_NAICS.csv")

hawaii_df = pd.read_csv(hawaii_csv_path, parse_dates=['Period'], index_col=['Period'])

hawaii_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1020
2017-02-01,1046
2017-03-01,1003
2017-04-01,1029
2017-05-01,1093


In [19]:
# Read in new business formation data (.csv); also parse dates and use dates as index

idaho_csv_path = Path("../Resources/New Business Formation by State/Idaho_NAICS.csv")

idaho_df = pd.read_csv(idaho_csv_path, parse_dates=['Period'], index_col=['Period'])

idaho_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1376
2017-02-01,1369
2017-03-01,1470
2017-04-01,1500
2017-05-01,1497


In [20]:
# Read in new business formation data (.csv); also parse dates and use dates as index

illinois_csv_path = Path("../Resources/New Business Formation by State/Illinois_NAICS.csv")

illinois_df = pd.read_csv(illinois_csv_path, parse_dates=['Period'], index_col=['Period'])

illinois_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,8909
2017-02-01,8829
2017-03-01,8474
2017-04-01,8478
2017-05-01,8581


In [21]:
# Read in new business formation data (.csv); also parse dates and use dates as index

indiana_csv_path = Path("../Resources/New Business Formation by State/Indiana_NAICS.csv")

indiana_df = pd.read_csv(indiana_csv_path, parse_dates=['Period'], index_col=['Period'])

indiana_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,3990
2017-02-01,3821
2017-03-01,3882
2017-04-01,3860
2017-05-01,4069


In [22]:
# Read in new business formation data (.csv); also parse dates and use dates as index

iowa_csv_path = Path("../Resources/New Business Formation by State/Iowa_NAICS.csv")

iowa_df = pd.read_csv(iowa_csv_path, parse_dates=['Period'], index_col=['Period'])

iowa_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1616
2017-02-01,1767
2017-03-01,1831
2017-04-01,1884
2017-05-01,1887


In [23]:
# Read in new business formation data (.csv); also parse dates and use dates as index

kansas_csv_path = Path("../Resources/New Business Formation by State/Kansas_NAICS.csv")

kansas_df = pd.read_csv(kansas_csv_path, parse_dates=['Period'], index_col=['Period'])

kansas_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1890
2017-02-01,1841
2017-03-01,1828
2017-04-01,1773
2017-05-01,1874


In [24]:
# Read in new business formation data (.csv); also parse dates and use dates as index

kentucky_csv_path = Path("../Resources/New Business Formation by State/Kentucky_NAICS.csv")

kentucky_df = pd.read_csv(kentucky_csv_path, parse_dates=['Period'], index_col=['Period'])

kentucky_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,2423
2017-02-01,2393
2017-03-01,2538
2017-04-01,2453
2017-05-01,2479


In [25]:
# Read in new business formation data (.csv); also parse dates and use dates as index

louisiana_csv_path = Path("../Resources/New Business Formation by State/Louisiana_NAICS.csv")

louisiana_df = pd.read_csv(louisiana_csv_path, parse_dates=['Period'], index_col=['Period'])

louisiana_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,4430
2017-02-01,4472
2017-03-01,4403
2017-04-01,4337
2017-05-01,4453


In [26]:
# Read in new business formation data (.csv); also parse dates and use dates as index

maine_csv_path = Path("../Resources/New Business Formation by State/Maine_NAICS.csv")

maine_df = pd.read_csv(maine_csv_path, parse_dates=['Period'], index_col=['Period'])

maine_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,705
2017-02-01,659
2017-03-01,739
2017-04-01,728
2017-05-01,721


In [27]:
# Read in new business formation data (.csv); also parse dates and use dates as index

maryland_csv_path = Path("../Resources/New Business Formation by State/Maryland_NAICS.csv")

maryland_df = pd.read_csv(maryland_csv_path, parse_dates=['Period'], index_col=['Period'])

maryland_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,5696
2017-02-01,5531
2017-03-01,5404
2017-04-01,5417
2017-05-01,5619


In [28]:
# Read in new business formation data (.csv); also parse dates and use dates as index

massachusetts_csv_path = Path("../Resources/New Business Formation by State/Massachusetts_NAICS.csv")

massachusetts_df = pd.read_csv(massachusetts_csv_path, parse_dates=['Period'], index_col=['Period'])

massachusetts_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,4645
2017-02-01,4371
2017-03-01,4437
2017-04-01,4434
2017-05-01,4572


In [29]:
# Read in new business formation data (.csv); also parse dates and use dates as index

michigan_csv_path = Path("../Resources/New Business Formation by State/Michigan_NAICS.csv")

michigan_df = pd.read_csv(michigan_csv_path, parse_dates=['Period'], index_col=['Period'])

michigan_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,7020
2017-02-01,7114
2017-03-01,6487
2017-04-01,6911
2017-05-01,6907


In [30]:
# Read in new business formation data (.csv); also parse dates and use dates as index

minnesota_csv_path = Path("../Resources/New Business Formation by State/Minnesota_NAICS.csv")

minnesota_df = pd.read_csv(minnesota_csv_path, parse_dates=['Period'], index_col=['Period'])

minnesota_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,3619
2017-02-01,3532
2017-03-01,3462
2017-04-01,3593
2017-05-01,3551


In [31]:
# Read in new business formation data (.csv); also parse dates and use dates as index

mississippi_csv_path = Path("../Resources/New Business Formation by State/Mississippi_NAICS.csv")

mississippi_df = pd.read_csv(mississippi_csv_path, parse_dates=['Period'], index_col=['Period'])

mississippi_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,2467
2017-02-01,2307
2017-03-01,2222
2017-04-01,2287
2017-05-01,2246


In [32]:
# Read in new business formation data (.csv); also parse dates and use dates as index

missouri_csv_path = Path("../Resources/New Business Formation by State/Missouri_NAICS.csv")

missouri_df = pd.read_csv(missouri_csv_path, parse_dates=['Period'], index_col=['Period'])

missouri_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,4468
2017-02-01,4527
2017-03-01,4586
2017-04-01,4453
2017-05-01,4538


In [33]:
# Read in new business formation data (.csv); also parse dates and use dates as index

montana_csv_path = Path("../Resources/New Business Formation by State/Montana_NAICS.csv")

montana_df = pd.read_csv(montana_csv_path, parse_dates=['Period'], index_col=['Period'])

montana_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,935
2017-02-01,940
2017-03-01,955
2017-04-01,996
2017-05-01,991


In [34]:
# Read in new business formation data (.csv); also parse dates and use dates as index

nebraska_csv_path = Path("../Resources/New Business Formation by State/Nebraska_NAICS.csv")

nebraska_df = pd.read_csv(nebraska_csv_path, parse_dates=['Period'], index_col=['Period'])

nebraska_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1124
2017-02-01,1105
2017-03-01,1100
2017-04-01,1127
2017-05-01,1203


In [35]:
# Read in new business formation data (.csv); also parse dates and use dates as index

nevada_csv_path = Path("../Resources/New Business Formation by State/Nevada_NAICS.csv")

nevada_df = pd.read_csv(nevada_csv_path, parse_dates=['Period'], index_col=['Period'])

nevada_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,3574
2017-02-01,3466
2017-03-01,3541
2017-04-01,3660
2017-05-01,3537


In [36]:
# Read in new business formation data (.csv); also parse dates and use dates as index

new_hampshire_csv_path = Path("../Resources/New Business Formation by State/New_Hampshire_NAICS.csv")

new_hampshire_df = pd.read_csv(new_hampshire_csv_path, parse_dates=['Period'], index_col=['Period'])

new_hampshire_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,894
2017-02-01,768
2017-03-01,863
2017-04-01,838
2017-05-01,897


In [37]:
# Read in new business formation data (.csv); also parse dates and use dates as index

new_jersey_csv_path = Path("../Resources/New Business Formation by State/New_Jersey_NAICS.csv")

new_jersey_df = pd.read_csv(new_jersey_csv_path, parse_dates=['Period'], index_col=['Period'])

new_jersey_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,8340
2017-02-01,8411
2017-03-01,8370
2017-04-01,8299
2017-05-01,8441


In [38]:
# Read in new business formation data (.csv); also parse dates and use dates as index

new_mexico_csv_path = Path("../Resources/New Business Formation by State/New_Mexico_NAICS.csv")

new_mexico_df = pd.read_csv(new_mexico_csv_path, parse_dates=['Period'], index_col=['Period'])

new_mexico_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1225
2017-02-01,1271
2017-03-01,1338
2017-04-01,1342
2017-05-01,1374


In [39]:
# Read in new business formation data (.csv); also parse dates and use dates as index

new_york_csv_path = Path("../Resources/New Business Formation by State/New_York_NAICS.csv")

new_york_df = pd.read_csv(new_york_csv_path, parse_dates=['Period'], index_col=['Period'])

new_york_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,17477
2017-02-01,17423
2017-03-01,17730
2017-04-01,17727
2017-05-01,17627


In [40]:
# Read in new business formation data (.csv); also parse dates and use dates as index

north_carolina_csv_path = Path("../Resources/New Business Formation by State/North_Carolina_NAICS.csv")

north_carolina_df = pd.read_csv(north_carolina_csv_path, parse_dates=['Period'], index_col=['Period'])

north_carolina_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,7382
2017-02-01,7480
2017-03-01,7456
2017-04-01,7399
2017-05-01,7380


In [41]:
# Read in new business formation data (.csv); also parse dates and use dates as index

north_dakota_csv_path = Path("../Resources/New Business Formation by State/North_Dakota_NAICS.csv")

north_dakota_df = pd.read_csv(north_dakota_csv_path, parse_dates=['Period'], index_col=['Period'])

north_dakota_df.dropna(inplace=True)

north_dakota_df = alaska_df.astype({'Value':'object'})

north_dakota_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,535.0
2017-02-01,527.0
2017-03-01,530.0
2017-04-01,584.0
2017-05-01,634.0


In [42]:
# Read in new business formation data (.csv); also parse dates and use dates as index

ohio_csv_path = Path("../Resources/New Business Formation by State/Ohio_NAICS.csv")

ohio_df = pd.read_csv(ohio_csv_path, parse_dates=['Period'], index_col=['Period'])

ohio_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,7114
2017-02-01,7173
2017-03-01,7080
2017-04-01,6940
2017-05-01,6930


In [43]:
# Read in new business formation data (.csv); also parse dates and use dates as index

oklahoma_csv_path = Path("../Resources/New Business Formation by State/Oklahoma_NAICS.csv")

oklahoma_df = pd.read_csv(oklahoma_csv_path, parse_dates=['Period'], index_col=['Period'])

oklahoma_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,2945
2017-02-01,3002
2017-03-01,3087
2017-04-01,2916
2017-05-01,2991


In [44]:
# Read in new business formation data (.csv); also parse dates and use dates as index

oregon_csv_path = Path("../Resources/New Business Formation by State/Oregon_NAICS.csv")

oregon_df = pd.read_csv(oregon_csv_path, parse_dates=['Period'], index_col=['Period'])

oregon_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,2998
2017-02-01,3117
2017-03-01,3244
2017-04-01,3199
2017-05-01,3153


In [45]:
# Read in new business formation data (.csv); also parse dates and use dates as index

pennsylvania_csv_path = Path("../Resources/New Business Formation by State/Pennsylvania_NAICS.csv")

pennsylvania_df = pd.read_csv(pennsylvania_csv_path, parse_dates=['Period'], index_col=['Period'])

pennsylvania_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,7527
2017-02-01,7876
2017-03-01,7808
2017-04-01,7477
2017-05-01,7483


In [46]:
# Read in new business formation data (.csv); also parse dates and use dates as index

rhode_island_csv_path = Path("../Resources/New Business Formation by State/Rhode_Island_NAICS.csv")

rhode_island_df = pd.read_csv(rhode_island_csv_path, parse_dates=['Period'], index_col=['Period'])

rhode_island_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,602
2017-02-01,606
2017-03-01,621
2017-04-01,630
2017-05-01,662


In [47]:
# Read in new business formation data (.csv); also parse dates and use dates as index

south_carolina_csv_path = Path("../Resources/New Business Formation by State/South_Carolina_NAICS.csv")

south_carolina_df = pd.read_csv(south_carolina_csv_path, parse_dates=['Period'], index_col=['Period'])

south_carolina_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,3780
2017-02-01,3836
2017-03-01,3722
2017-04-01,3610
2017-05-01,3794


In [48]:
# Read in new business formation data (.csv); also parse dates and use dates as index

south_dakota_csv_path = Path("../Resources/New Business Formation by State/South_Dakota_NAICS.csv")

south_dakota_df = pd.read_csv(south_dakota_csv_path, parse_dates=['Period'], index_col=['Period'])

south_dakota_df.dropna(inplace=True)

south_dakota_df = alaska_df.astype({'Value':'object'})

south_dakota_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,535.0
2017-02-01,527.0
2017-03-01,530.0
2017-04-01,584.0
2017-05-01,634.0


In [49]:
# Read in new business formation data (.csv); also parse dates and use dates as index

tennessee_csv_path = Path("../Resources/New Business Formation by State/Tennessee_NAICS.csv")

tennessee_df = pd.read_csv(tennessee_csv_path, parse_dates=['Period'], index_col=['Period'])

tennessee_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,4354
2017-02-01,4237
2017-03-01,4233
2017-04-01,4127
2017-05-01,4193


In [50]:
# Read in new business formation data (.csv); also parse dates and use dates as index

texas_csv_path = Path("../Resources/New Business Formation by State/Texas_NAICS.csv")

texas_df = pd.read_csv(texas_csv_path, parse_dates=['Period'], index_col=['Period'])

texas_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,23366
2017-02-01,22623
2017-03-01,22944
2017-04-01,22572
2017-05-01,22597


In [51]:
# Read in new business formation data (.csv); also parse dates and use dates as index

utah_csv_path = Path("../Resources/New Business Formation by State/Utah_NAICS.csv")

utah_df = pd.read_csv(utah_csv_path, parse_dates=['Period'], index_col=['Period'])

utah_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,3819
2017-02-01,3794
2017-03-01,3810
2017-04-01,3819
2017-05-01,3862


In [52]:
# Read in new business formation data (.csv); also parse dates and use dates as index

vermont_csv_path = Path("../Resources/New Business Formation by State/Vermont_NAICS.csv")

vermont_df = pd.read_csv(vermont_csv_path, parse_dates=['Period'], index_col=['Period'])

vermont_df.dropna(inplace=True)

vermont_df = vermont_df.astype({'Value':'object'})

vermont_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,385.0
2017-02-01,391.0
2017-03-01,359.0
2017-04-01,398.0
2017-05-01,480.0


In [53]:
# Read in new business formation data (.csv); also parse dates and use dates as index

virginia_csv_path = Path("../Resources/New Business Formation by State/Virginia_NAICS.csv")

virginia_df = pd.read_csv(virginia_csv_path, parse_dates=['Period'], index_col=['Period'])

virginia_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,6663
2017-02-01,6641
2017-03-01,6601
2017-04-01,6664
2017-05-01,6659


In [54]:
# Read in new business formation data (.csv); also parse dates and use dates as index

washington_csv_path = Path("../Resources/New Business Formation by State/Washington_NAICS.csv")

washington_df = pd.read_csv(washington_csv_path, parse_dates=['Period'], index_col=['Period'])

washington_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,4938
2017-02-01,4956
2017-03-01,5058
2017-04-01,5236
2017-05-01,5253


In [55]:
# Read in new business formation data (.csv); also parse dates and use dates as index

west_virginia_csv_path = Path("../Resources/New Business Formation by State/West_Virginia_NAICS.csv")

west_virginia_df = pd.read_csv(west_virginia_csv_path, parse_dates=['Period'], index_col=['Period'])

west_virginia_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,810
2017-02-01,825
2017-03-01,819
2017-04-01,803
2017-05-01,865


In [56]:
# Read in new business formation data (.csv); also parse dates and use dates as index

wisconsin_csv_path = Path("../Resources/New Business Formation by State/Wisconsin_NAICS.csv")

wisconsin_df = pd.read_csv(wisconsin_csv_path, parse_dates=['Period'], index_col=['Period'])

wisconsin_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,3340
2017-02-01,3386
2017-03-01,3421
2017-04-01,3244
2017-05-01,3445


In [57]:
# Read in new business formation data (.csv); also parse dates and use dates as index

wyoming_csv_path = Path("../Resources/New Business Formation by State/Wyoming_NAICS.csv")

wyoming_df = pd.read_csv(wyoming_csv_path, parse_dates=['Period'], index_col=['Period'])

wyoming_df.head()

Unnamed: 0_level_0,Value
Period,Unnamed: 1_level_1
2017-01-01,1433
2017-02-01,1336
2017-03-01,1413
2017-04-01,1391
2017-05-01,1365


In [58]:
# combine all state df's into a single df

combined_df = pd.concat([alabama_df,
                         alaska_df,
                         arizona_df,
                         arkansas_df,
                         california_df,
                         colorado_df,
                         connecticut_df,
                         delaware_df,
                         florida_df,
                         georgia_df,
                         hawaii_df,
                         idaho_df,
                         illinois_df,
                         indiana_df,
                         iowa_df,
                         kansas_df,
                         kentucky_df,
                         louisiana_df,
                         maine_df,
                         maryland_df,
                         massachusetts_df,
                         michigan_df,
                         minnesota_df,
                         mississippi_df,
                         missouri_df,
                         montana_df,
                         nebraska_df,
                         nevada_df,
                         new_hampshire_df,
                         new_jersey_df,
                         new_mexico_df,
                         new_york_df,
                         north_carolina_df,
                         north_dakota_df,
                         ohio_df,
                         oklahoma_df,
                         oregon_df,
                         pennsylvania_df,
                         rhode_island_df,
                         south_carolina_df,
                         south_dakota_df,
                         tennessee_df,
                         texas_df,
                         utah_df,
                         vermont_df,
                         virginia_df,
                         washington_df,
                         west_virginia_df,
                         wisconsin_df,
                         wyoming_df
                        ], axis=1)



combined_df.head()

Unnamed: 0_level_0,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value,...,Value,Value,Value,Value,Value,Value,Value,Value,Value,Value
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01,3043,535.0,5542,1870,26866,6448,2525,1892,32748,11584,...,535.0,4354,23366,3819,385.0,6663,4938,810,3340,1433
2017-02-01,3060,527.0,5431,1836,26933,6504,2421,1799,29343,11935,...,527.0,4237,22623,3794,391.0,6641,4956,825,3386,1336
2017-03-01,2990,530.0,5667,1895,27153,6538,2413,1916,30842,11700,...,530.0,4233,22944,3810,359.0,6601,5058,819,3421,1413
2017-04-01,2983,584.0,5575,1852,27456,6614,2431,1806,30589,11929,...,584.0,4127,22572,3819,398.0,6664,5236,803,3244,1391
2017-05-01,3021,634.0,5722,1909,26278,6693,2458,1873,29080,11711,...,634.0,4193,22597,3862,480.0,6659,5253,865,3445,1365


In [59]:
# rename columns to states

combined_df = combined_df.set_axis(['Alabama',
                                    'Alaska',
                                    'Arizona',
                                    'Arkansas',
                                    'California',
                                    'Colorado',
                                    'Connecticut',
                                    'Delaware',
                                    'Florida',
                                    'Georgia',
                                    'Hawaii',
                                    'Idaho',
                                    'Illinois',
                                    'Indiana',
                                    'Iowa',
                                    'Kansas',
                                    'Kentucky',
                                    'Louisiana',
                                    'Maine',
                                    'Maryland',
                                    'Massachusetts',
                                    'Michigan',
                                    'Minnesota',
                                    'Mississippi',
                                    'Missouri',
                                    'Montana',
                                    'Nebraska',
                                    'Nevada',
                                    'New Hampshire',
                                    'New Jersey',
                                    'New Mexico',
                                    'New York',
                                    'North Carolina',
                                    'North Dakota',
                                    'Ohio',
                                    'Oklahoma',
                                    'Oregon',
                                    'Pennsylvania',
                                    'Rhode Island',
                                    'South Carolina',
                                    'South Dakota',
                                    'Tennessee',
                                    'Texas',
                                    'Utah',
                                    'Vermont',
                                    'Virgina',
                                    'Washington',
                                    'West Virgina',
                                    'Wisconsin',
                                    'Wyoming'
                                   ], axis=1, inplace=False)
# drop null values

combined_df.dropna(inplace=True)

In [60]:
combined_df.head(100)

Unnamed: 0_level_0,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,Florida,Georgia,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virgina,Washington,West Virgina,Wisconsin,Wyoming
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01,3043,535.0,5542,1870,26866,6448,2525,1892,32748,11584,...,535.0,4354,23366,3819,385.0,6663,4938,810,3340,1433
2017-02-01,3060,527.0,5431,1836,26933,6504,2421,1799,29343,11935,...,527.0,4237,22623,3794,391.0,6641,4956,825,3386,1336
2017-03-01,2990,530.0,5667,1895,27153,6538,2413,1916,30842,11700,...,530.0,4233,22944,3810,359.0,6601,5058,819,3421,1413
2017-04-01,2983,584.0,5575,1852,27456,6614,2431,1806,30589,11929,...,584.0,4127,22572,3819,398.0,6664,5236,803,3244,1391
2017-05-01,3021,634.0,5722,1909,26278,6693,2458,1873,29080,11711,...,634.0,4193,22597,3862,480.0,6659,5253,865,3445,1365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-01,5739,702.0,9702,2975,38461,9351,3869,3800,47506,22112,...,702.0,7531,38556,5428,614.0,10591,7501,1157,5242,3342
2022-04-01,6851,711.0,9500,3627,28902,9435,3897,3460,49363,24208,...,711.0,8711,41814,5436,722.0,11625,7582,1167,5734,3191
2022-05-01,6042,711.0,9356,3169,39736,9045,3681,3699,51044,22490,...,711.0,7813,39973,5453,683.0,10340,7682,1107,5347,3149
2022-06-01,5542,645.0,8986,3111,42333,9017,3552,3568,54829,21570,...,645.0,7507,35742,5274,591.0,9207,5981,1138,5045,3180


In [61]:
combined_df.replace(',','', regex=True, inplace=True)

combined_df = combined_df.astype(float).pct_change()

combined_df.head(100)

Unnamed: 0_level_0,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,Florida,Georgia,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virgina,Washington,West Virgina,Wisconsin,Wyoming
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01,,,,,,,,,,,...,,,,,,,,,,
2017-02-01,0.005587,-0.014953,-0.020029,-0.018182,0.002494,0.008685,-0.041188,-0.049154,-0.103976,0.030300,...,-0.014953,-0.026872,-0.031798,-0.006546,0.015584,-0.003302,0.003645,0.018519,0.013772,-0.067690
2017-03-01,-0.022876,0.005693,0.043454,0.032135,0.008168,0.005228,-0.003304,0.065036,0.051085,-0.019690,...,0.005693,-0.000944,0.014189,0.004217,-0.081841,-0.006023,0.020581,-0.007273,0.010337,0.057635
2017-04-01,-0.002341,0.101887,-0.016234,-0.022691,0.011159,0.011624,0.007460,-0.057411,-0.008203,0.019573,...,0.101887,-0.025041,-0.016213,0.002362,0.108635,0.009544,0.035192,-0.019536,-0.051739,-0.015570
2017-05-01,0.012739,0.085616,0.026368,0.030778,-0.042905,0.011944,0.011107,0.037099,-0.049331,-0.018275,...,0.085616,0.015992,0.001108,0.011259,0.206030,-0.000750,0.003247,0.077210,0.061961,-0.018692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-03-01,0.012526,-0.005666,-0.020495,-0.034091,-0.058113,-0.016823,-0.010739,-0.088073,0.019289,-0.000226,...,-0.005666,0.024905,0.019838,-0.035879,0.031933,0.004553,-0.073951,-0.021151,0.001337,0.078760
2022-04-01,0.193762,0.012821,-0.020820,0.219160,-0.248537,0.008983,0.007237,-0.089474,0.039090,0.094790,...,0.012821,0.156686,0.084500,0.001474,0.175896,0.097630,0.010799,0.008643,0.093857,-0.045183
2022-05-01,-0.118085,0.000000,-0.015158,-0.126275,0.374853,-0.041335,-0.055427,0.069075,0.034054,-0.070968,...,0.000000,-0.103088,-0.044028,0.003127,-0.054017,-0.110538,0.013189,-0.051414,-0.067492,-0.013162
2022-06-01,-0.082754,-0.092827,-0.039547,-0.018302,0.065356,-0.003096,-0.035045,-0.035415,0.074152,-0.040907,...,-0.092827,-0.039165,-0.105846,-0.032826,-0.134700,-0.109574,-0.221427,0.028004,-0.056480,0.009844


In [62]:
combined_std_df = combined_df.std()

combined_std_df.head(50)

Alabama           0.124869
Alaska            0.131396
Arizona           0.089353
Arkansas          0.123338
California        0.121776
Colorado          0.076507
Connecticut       0.116014
Delaware          0.117350
Florida           0.121735
Georgia           0.137431
Hawaii            0.084313
Idaho             0.108956
Illinois          0.179607
Indiana           0.123602
Iowa              0.106818
Kansas            0.085106
Kentucky          0.091655
Louisiana         0.165655
Maine             0.111800
Maryland          0.101873
Massachusetts     0.086404
Michigan          0.148357
Minnesota         0.094368
Mississippi       0.187511
Missouri          0.107202
Montana           0.104462
Nebraska          0.092817
Nevada            0.129604
New Hampshire     0.102913
New Jersey        0.107340
New Mexico        0.075055
New York          0.109631
North Carolina    0.084747
North Dakota      0.131396
Ohio              0.116834
Oklahoma          0.096666
Oregon            0.073016
P