In [1]:
import numpy as np
import pandas as pd
import json
import runProcs
from urllib.request import urlopen
import os

# State Income Data

Constructs a data set of real income per capita for the continental United States from 1840 to the present.

Nominal income per capita for 1840, 1880, a 1900 were found in Appendix A in "Interregional Differences in Per Capita Income, Population, and Total Income, 1840-1950" by Richard Easterlin in <ins>Trends in the American Economy in the Nineteenth Century</ins> (https://www.nber.org/books-and-chapters/trends-american-economy-nineteenth-century).

The CPI for 1840, 1880, and 1900 was taken from "<ins>Bicentennial Edition: Historical Statistics of the United States, Colonial Times to 1970</ins> (https://www.census.gov/library/publications/1975/compendia/hist_stats_colonial-1970.html)


Income data from 1929 are obtained from the BEA.

## Preliminaries

In [2]:
# Import BEA API key or set manually to variable api_key
try:
    items = os.getcwd().split('/')[:3]
    items.append('bea_api_key.txt')
    path = '/'.join(items)
    with open(path,'r') as api_key_file:
        api_key = api_key_file.readline()

except:
    api_key = None

In [3]:
# Dictionary of state abbreviations
stateAbbr = {
u'Alabama':u'AL',
u'Alaska *':u'AK',
u'Arizona':u'AZ',
u'Arkansas':u'AR',
u'California':u'CA',
u'Colorado':u'CO',
u'Connecticut':u'CT',
u'Delaware':u'DE',
u'District of Columbia':u'DC',
u'Florida':u'FL',
u'Georgia':u'GA',
u'Hawaii *':u'HI',
u'Idaho':u'ID',
u'Illinois':u'IL',
u'Indiana':u'IN',
u'Iowa':u'IA',
u'Kansas':u'KS',
u'Kentucky':u'KY',
u'Louisiana':u'LA',
u'Maine':u'ME',
u'Maryland':u'MD',
u'Massachusetts':u'MA',
u'Michigan':u'MI',
u'Minnesota':u'MN',
u'Mississippi':u'MS',
u'Missouri':u'MO',
u'Montana':u'MT',
u'Nebraska':u'NE',
u'Nevada':u'NV',
u'New Hampshire':u'NH',
u'New Jersey':u'NJ',
u'New Mexico':u'NM',
u'New York':u'NY',
u'North Carolina':u'NC',
u'North Dakota':u'ND',
u'Ohio':u'OH',
u'Oklahoma':u'OK',
u'Oregon':u'OR',
u'Pennsylvania':u'PA',
u'Rhode Island':u'RI',
u'South Carolina':u'SC',
u'South Dakota':u'SD',
u'Tennessee':u'TN',
u'Texas':u'TX',
u'Utah':u'UT',
u'Vermont':u'VT',
u'Virginia':u'VA',
u'Washington':u'WA',
u'West Virginia':u'WV',
u'Wisconsin':u'WI',
u'Wyoming':u'WY'
}

# List of states in the US
stateList = [s for s in stateAbbr]

## Deflator data

In [4]:
# Obtain data from BEA
gdp_deflator = urlopen('http://apps.bea.gov/api/data/?UserID='+api_key+'&method=GetData&datasetname=NIPA&TableName=T10109&TableID=13&Frequency=A&Year=X&ResultFormat=JSON&')

# Parse result
result = gdp_deflator.read().decode('utf-8')
json_response = json.loads(result)

# Import to DataFrame and organize
df = pd.DataFrame(json_response['BEAAPI']['Results']['Data'])
df['DataValue'] = df['DataValue'].astype(float)
df = df.set_index(['LineDescription',pd.to_datetime(df['TimePeriod'])])
df.index.names = ['line description','Year']

# Extract price level data
data_p = df['DataValue'].loc['Gross domestic product']/100
data_p.name = 'price level'
data_p = data_p.sort_index()
data_p

Year
1929-01-01    0.09424
1930-01-01    0.09079
1931-01-01    0.08146
1932-01-01    0.07193
1933-01-01    0.06993
               ...   
2016-01-01    1.05722
2017-01-01    1.07710
2018-01-01    1.10296
2019-01-01    1.12265
2020-01-01    1.13625
Name: price level, Length: 92, dtype: float64

## Per capital income data

In [5]:
# Obtain data from BEA
state_y_pc = urlopen('http://apps.bea.gov/api/data/?UserID='+api_key+'&method=GetData&DataSetName=Regional&TableName=SAINC1&LineCode=3&Year=ALL&GeoFips=STATE&ResultFormat=JSON')

# Parse result
result = state_y_pc.read().decode('utf-8')
json_response = json.loads(result)

# Import to DataFrame and organize
df = pd.DataFrame(json_response['BEAAPI']['Results']['Data'])
df.GeoName = df.GeoName.replace(stateAbbr)
df = df.set_index(['GeoName',pd.DatetimeIndex(df['TimePeriod'])])
df.index.names = ['State','Year']
df['DataValue'] = df['DataValue'].replace('(NA)',np.nan)


# Extract income data
data_y = df['DataValue'].str.replace(',','').astype(float)
data_y.name = 'income'
data_y = data_y.unstack('State')
data_y = data_y.sort_index()
data_y = data_y.divide(data_p,axis=0)
data_y

State,AK,AL,AR,AZ,CA,CO,CT,DC,DE,FL,...,TN,TX,UT,United States,VA,VT,WA,WI,WV,WY
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1929-01-01,,3384.974533,3215.195246,6345.500849,10547.538200,6695.670628,10918.930390,14049.235993,10940.152801,5507.215620,...,3957.979626,5029.711375,5814.940577,7417.232598,4615.874363,6685.059423,7852.292020,7109.507640,4870.543294,7130.730051
1930-01-01,,2896.794801,2467.232074,5705.474171,9791.827294,6322.282190,10210.375592,14472.959577,9450.379998,5154.752726,...,3535.631677,4482.872563,5441.127878,6839.960348,4262.583985,6300.253332,7236.479789,6443.440908,4482.872563,6410.397621
1931-01-01,,2700.712006,2541.124478,5278.664375,9243.800638,5794.254849,9906.702676,15381.782470,9526.147803,4873.557574,...,3351.338080,4235.207464,4566.658483,6481.708814,4554.382519,5781.978885,6555.364596,5757.426958,4345.691137,5855.634667
1932-01-01,,2210.482413,2113.165578,4476.574447,8119.004588,4963.158626,8716.808008,15389.962464,8216.321424,4407.062422,...,2697.066593,3642.430140,4226.331155,5588.766857,3976.087863,5046.573057,5588.766857,5032.670652,3572.918115,5213.401919
1933-01-01,,2345.202345,2187.902188,4418.704419,7865.007865,5047.905048,8451.308451,13513.513514,8093.808094,4132.704133,...,2874.302874,3632.203632,4247.104247,5362.505363,4089.804090,4819.104819,5376.805377,4776.204776,3703.703704,5319.605320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2016-01-01,53174.362952,37370.651331,38175.592592,38571.915022,54930.856397,49593.272923,66103.554605,73427.479616,46140.822156,43550.065266,...,41231.720928,43324.000681,40050.320652,47289.116740,50835.209323,47637.199448,52743.988952,45001.986342,35063.657517,51146.402830
2017-01-01,53193.761025,38065.175007,38642.651564,39519.079008,56244.545539,51573.670040,66566.706898,73320.954415,47106.118281,45003.249466,...,41958.035466,44937.331724,40982.267199,48366.911150,51603.379445,47936.124780,54219.663912,45714.418346,36107.139541,52382.322904
2018-01-01,54611.227968,38692.246319,39248.930152,40398.563865,57807.173424,53343.729600,67809.349387,73387.067527,47914.702256,46183.905128,...,42762.203525,46369.768623,42598.099659,49485.928773,52493.290781,48499.492275,56380.104446,46793.174730,37312.323203,54885.943280
2019-01-01,55786.754554,39283.837349,39711.397141,40952.211286,59453.079767,54477.352692,68830.891195,74031.087160,48388.188661,46667.260500,...,43358.125863,47057.408810,43627.132232,50304.190977,53007.615909,49247.761992,57690.286376,47394.112145,37627.043157,55265.666058


# Load Easterlin's data

In [6]:
# Import Easterlin's income data
easterlin_data = pd.read_csv('../historic_data/Historical Statistics of the US - Easterlin State Income Data.csv',index_col=0)

# Import historic CPI data
historic_cpi_data=pd.read_csv('../historic_data/Historical Statistics of the US - cpi.csv',index_col=0)
historic_cpi_data = historic_cpi_data/historic_cpi_data.loc[1929]*float(data_p.loc['1929'])

In [7]:
# Construct series for real incomes in 1840, 1880, and 1900
df_1840 = easterlin_data['Income per capita - 1840 - A [cur dollars]']/float(historic_cpi_data.loc[1840])
df_1880 = easterlin_data['Income per capita - 1880 [cur dollars]']/float(historic_cpi_data.loc[1890])
df_1900 = easterlin_data['Income per capita - 1900 [cur dollars]']/float(historic_cpi_data.loc[1900])

# Put into a DataFrame and concatenate with previous data beginning in 1929
df = pd.DataFrame({pd.to_datetime('1840'):df_1840,pd.to_datetime('1880'):df_1880,pd.to_datetime('1900'):df_1900}).transpose()
df = pd.concat([data_y,df]).sort_index()

In [8]:
# Export data to csv
series = df.sort_index()
dropCols = [u'AK', u'HI', u'New England', u'Mideast', u'Great Lakes', u'Plains', u'Southeast', u'Southwest', u'Rocky Mountain', u'Far West']
for c in dropCols:
    series = series.drop([c],axis=1)

series.to_csv('../csv/state_income_data.csv',na_rep='NaN')

In [9]:
# Export notebook to .py
runProcs.exportNb('state_income_data')