In [1]:
import numpy as np
import pandas as pd
import json
from urllib.request import urlopen
import os

# State Income Data

Constructs a data set of real income per capita for the continental United States from 1840 to the present.

Nominal income per capita for 1840, 1880, a 1900 were found in Appendix A in "Interregional Differences in Per Capita Income, Population, and Total Income, 1840-1950" by Richard Easterlin in <ins>Trends in the American Economy in the Nineteenth Century</ins> (https://www.nber.org/books-and-chapters/trends-american-economy-nineteenth-century).

The CPI for 1840, 1880, and 1900 was taken from "<ins>Bicentennial Edition: Historical Statistics of the United States, Colonial Times to 1970</ins> (https://www.census.gov/library/publications/1975/compendia/hist_stats_colonial-1970.html)


Income data from 1929 are obtained from the BEA.

## Preliminaries

In [2]:
# Import BEA API key or set manually to variable api_key
try:
    items = os.getcwd().split('/')[:3]
    items.append('bea_api_key.txt')
    path = '/'.join(items)
    with open(path,'r') as api_key_file:
        api_key = api_key_file.readline()

except:
    api_key = None

In [3]:
# Dictionary of state abbreviations
stateAbbr = {
u'Alabama':u'AL',
u'Alaska *':u'AK',
u'Arizona':u'AZ',
u'Arkansas':u'AR',
u'California':u'CA',
u'Colorado':u'CO',
u'Connecticut':u'CT',
u'Delaware':u'DE',
u'District of Columbia':u'DC',
u'Florida':u'FL',
u'Georgia':u'GA',
u'Hawaii *':u'HI',
u'Idaho':u'ID',
u'Illinois':u'IL',
u'Indiana':u'IN',
u'Iowa':u'IA',
u'Kansas':u'KS',
u'Kentucky':u'KY',
u'Louisiana':u'LA',
u'Maine':u'ME',
u'Maryland':u'MD',
u'Massachusetts':u'MA',
u'Michigan':u'MI',
u'Minnesota':u'MN',
u'Mississippi':u'MS',
u'Missouri':u'MO',
u'Montana':u'MT',
u'Nebraska':u'NE',
u'Nevada':u'NV',
u'New Hampshire':u'NH',
u'New Jersey':u'NJ',
u'New Mexico':u'NM',
u'New York':u'NY',
u'North Carolina':u'NC',
u'North Dakota':u'ND',
u'Ohio':u'OH',
u'Oklahoma':u'OK',
u'Oregon':u'OR',
u'Pennsylvania':u'PA',
u'Rhode Island':u'RI',
u'South Carolina':u'SC',
u'South Dakota':u'SD',
u'Tennessee':u'TN',
u'Texas':u'TX',
u'Utah':u'UT',
u'Vermont':u'VT',
u'Virginia':u'VA',
u'Washington':u'WA',
u'West Virginia':u'WV',
u'Wisconsin':u'WI',
u'Wyoming':u'WY'
}

# List of states in the US
stateList = [s for s in stateAbbr]

## Deflator data

In [4]:
# Obtain data from BEA
gdp_deflator = urlopen('http://apps.bea.gov/api/data/?UserID='+api_key+'&method=GetData&datasetname=NIPA&TableName=T10109&TableID=13&Frequency=A&Year=X&ResultFormat=JSON&')

# Parse result
result = gdp_deflator.read().decode('utf-8')
json_response = json.loads(result)

# Import to DataFrame and organize
df = pd.DataFrame(json_response['BEAAPI']['Results']['Data'])
df['DataValue'] = df['DataValue'].astype(float)
df = df.set_index(['LineDescription',pd.to_datetime(df['TimePeriod'])])
df.index.names = ['line description','Year']

# Extract price level data
data_p = df['DataValue'].loc['Gross domestic product']/100
data_p.name = 'price level'
data_p = data_p.sort_index()
data_p

Year
1929-01-01    0.09418
1930-01-01    0.09073
1931-01-01    0.08141
1932-01-01    0.07188
1933-01-01    0.06989
               ...   
2018-01-01    1.10339
2019-01-01    1.12318
2020-01-01    1.13784
2021-01-01    1.18895
2022-01-01    1.27224
Name: price level, Length: 94, dtype: float64

In [5]:
base_year = json_response['BEAAPI']['Results']['Notes'][0]['NoteText'].split('Index numbers, ')[-1].split('=')[0]

with open('../csv/state_income_metadata.csv','w') as newfile:
    newfile.write(',Values\n'+'base_year,'+base_year)

## Per capita income data

In [6]:
# Obtain data from BEA
state_y_pc = urlopen('http://apps.bea.gov/api/data/?UserID='+api_key+'&method=GetData&DataSetName=Regional&TableName=SAINC1&LineCode=3&Year=ALL&GeoFips=STATE&ResultFormat=JSON')

# Parse result
result = state_y_pc.read().decode('utf-8')
json_response = json.loads(result)

# Import to DataFrame and organize
df = pd.DataFrame(json_response['BEAAPI']['Results']['Data'])
df.GeoName = df.GeoName.replace(stateAbbr)
df = df.set_index(['GeoName',pd.DatetimeIndex(df['TimePeriod'])])
df.index.names = ['State','Year']
df['DataValue'] = df['DataValue'].replace('(NA)',np.nan)


# Extract income data
data_y = df['DataValue'].str.replace(',','').astype(float)
data_y.name = 'income'
data_y = data_y.unstack('State')
data_y = data_y.sort_index()
data_y = data_y.divide(data_p,axis=0)
data_y

State,AK,AL,AR,AZ,CA,CO,CT,DC,DE,FL,...,TN,TX,UT,United States,VA,VT,WA,WI,WV,WY
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1929-01-01,,3387.131026,3217.243576,6349.543427,10554.257804,6699.936292,10925.886600,14058.186451,10947.122531,5510.724145,...,3960.501168,5032.915693,5818.645148,7421.957953,4618.815035,6689.318327,7857.294542,7114.036951,4873.646209,7135.272882
1930-01-01,,2898.710460,2468.863661,5709.247217,9798.302656,6326.463132,10217.127742,14482.530585,9456.629560,5158.161578,...,3537.969801,4485.837099,5444.726110,6844.483633,4265.402844,6304.419707,7241.265293,6447.701973,4485.837099,6414.636835
1931-01-01,,2702.370716,2542.685174,5281.906400,9249.477951,5797.813536,9912.787127,15391.229579,9531.998526,4876.550792,...,3353.396389,4237.808623,4569.463211,6485.689719,4557.179708,5785.530033,6559.390738,5760.963027,4348.360152,5859.231053
1932-01-01,,2212.020033,2114.635504,4479.688370,8124.652198,4966.611018,8722.871452,15400.667780,8222.036728,4410.127991,...,2698.942682,3644.963829,4229.271007,5592.654424,3978.853645,5050.083472,5592.654424,5036.171397,3575.403450,5217.028381
1933-01-01,,2346.544570,2189.154385,4421.233367,7869.509229,5050.794105,8456.145371,13521.247675,8098.440406,4135.069395,...,2875.947918,3634.282444,4249.534984,5365.574474,4092.144799,4821.862927,5379.882673,4778.938332,3705.823437,5322.649878
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-01,54031.666047,37646.706967,39318.826526,41205.738678,55744.569010,52975.829036,66092.678020,74958.083724,47776.398191,46692.465946,...,42478.180879,47132.020410,41386.091953,48746.136905,51313.678754,47498.164747,54578.163659,46137.811653,37411.069522,54805.644423
2019-01-01,54591.427910,38540.572304,39462.953400,42846.204526,57799.284175,55310.813939,67249.238768,75385.067398,48270.980609,48576.363539,...,43931.515875,48145.444185,43252.194662,50081.019961,52594.419416,49361.633932,57149.343827,47092.184690,38240.531348,57085.240122
2020-01-01,55117.591225,40532.060747,41414.434367,45975.708360,62085.178936,57435.140266,69013.218027,81124.762708,49489.383393,50305.842649,...,45973.071785,48840.786051,45868.487661,52523.201856,54627.188357,52081.136188,60029.529635,49131.688111,39723.511214,57779.652675
2021-01-01,55239.497035,41777.198368,42548.467135,46742.083351,64594.810547,59476.849321,69712.771773,81297.783759,50335.169688,52261.238908,...,47618.486900,50298.162244,47100.382691,53927.414946,55670.970184,51934.900542,62033.727238,50285.546070,40723.327306,58525.589806


# Load Easterlin's data

In [7]:
# Import Easterlin's income data
easterlin_data = pd.read_csv('../historic_data/Historical Statistics of the US - Easterlin State Income Data.csv',index_col=0)

# Import historic CPI data
historic_cpi_data=pd.read_csv('../historic_data/Historical Statistics of the US - cpi.csv',index_col=0)
historic_cpi_data = historic_cpi_data/historic_cpi_data.loc[1929]*float(data_p.loc['1929'])

In [8]:
# Construct series for real incomes in 1840, 1880, and 1900
df_1840 = easterlin_data['Income per capita - 1840 - A [cur dollars]']/float(historic_cpi_data.loc[1840])
df_1880 = easterlin_data['Income per capita - 1880 [cur dollars]']/float(historic_cpi_data.loc[1890])
df_1900 = easterlin_data['Income per capita - 1900 [cur dollars]']/float(historic_cpi_data.loc[1900])

# Put into a DataFrame and concatenate with previous data beginning in 1929
df = pd.DataFrame({pd.to_datetime('1840'):df_1840,pd.to_datetime('1880'):df_1880,pd.to_datetime('1900'):df_1900}).transpose()
df = pd.concat([data_y,df]).sort_index()

In [9]:
# Export data to csv
series = df.sort_index()
dropCols = [u'AK', u'HI', u'New England', u'Mideast', u'Great Lakes', u'Plains', u'Southeast', u'Southwest', u'Rocky Mountain', u'Far West']
for c in dropCols:
    series = series.drop([c],axis=1)

series.to_csv('../csv/state_income_data.csv',na_rep='NaN')