In [14]:
import pandas as pd
from sqlalchemy import create_engine
import pprint
import psycopg2

In [15]:
# Import CSV files
csvfile = "DataFiles/SAEXP1.csv"
csvfile2 = "DataFiles/csvData.csv"
us_expenditures = pd.read_csv(csvfile)
state_abbr = pd.read_csv(csvfile2)

In [16]:
us_expenditures.columns

Index(['GeoFIPS', 'GeoName', 'Region', 'TableName', 'LineCode',
       'IndustryClassification', 'Description', 'Unit', '1997', '1998', '1999',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'],
      dtype='object')

In [17]:
# change year columns to row and keep only columns needed
us_expenditures_df = us_expenditures.melt(id_vars=['GeoFIPS', 'GeoName','Description'],value_vars=['1997', '1998', '1999',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'], var_name="Year")
us_expenditures_df

Unnamed: 0,GeoFIPS,GeoName,Description,Year,value
0,"""00000""",United States,Personal consumption expenditures,1997,5534091.8
1,"""00000""",United States,Goods,1997,2003807.2
2,"""00000""",United States,Durable goods,1997,715529.6
3,"""00000""",United States,Motor vehicles and parts,1997,293082.5
4,"""00000""",United States,Furnishings and durable household equipment,1997,160490.2
...,...,...,...,...,...
33207,"""98000""",Far West,Less: Receipts from sales of goods and serv...,2019,236897.2
33208,Note: See the included footnote file.,,,2019,
33209,SAEXP1: Total personal consumption expenditure...,,,2019,
33210,"Last updated: October 8, 2020-- new PCE by sta...",,,2019,


In [18]:
#remove quotes from GeoGIPS
us_expenditures_df['GeoFIPS']=us_expenditures_df['GeoFIPS'].str.replace('"','')

In [19]:
#remove NAN rows
us_expend = us_expenditures_df[us_expenditures_df['GeoName'].notna()]
us_expend

Unnamed: 0,GeoFIPS,GeoName,Description,Year,value
0,00000,United States,Personal consumption expenditures,1997,5534091.8
1,00000,United States,Goods,1997,2003807.2
2,00000,United States,Durable goods,1997,715529.6
3,00000,United States,Motor vehicles and parts,1997,293082.5
4,00000,United States,Furnishings and durable household equipment,1997,160490.2
...,...,...,...,...,...
33203,98000,Far West,Financial services and insurance,2019,210726.1
33204,98000,Far West,Other services,2019,220767.1
33205,98000,Far West,Final consumption expenditures of nonprofit ...,2019,88265.6
33206,98000,Far West,Gross output of nonprofit institutions,2019,325162.7


In [20]:
# rename columns
us_expenditures_final = us_expend.rename(columns ={'value':'Dollars'})
us_expenditures_final

Unnamed: 0,GeoFIPS,GeoName,Description,Year,Dollars
0,00000,United States,Personal consumption expenditures,1997,5534091.8
1,00000,United States,Goods,1997,2003807.2
2,00000,United States,Durable goods,1997,715529.6
3,00000,United States,Motor vehicles and parts,1997,293082.5
4,00000,United States,Furnishings and durable household equipment,1997,160490.2
...,...,...,...,...,...
33203,98000,Far West,Financial services and insurance,2019,210726.1
33204,98000,Far West,Other services,2019,220767.1
33205,98000,Far West,Final consumption expenditures of nonprofit ...,2019,88265.6
33206,98000,Far West,Gross output of nonprofit institutions,2019,325162.7


In [21]:
state_abbr.columns
#rename columns
state_df = state_abbr.rename(columns = {'State':'GeoName'})
# keep only columns needed
state_df2 = state_df[['GeoName', 'Code']]
state_df2.head()

Unnamed: 0,GeoName,Code
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


In [22]:
# merge dataframes
us_spend_df = pd.merge(state_df2, us_expenditures_final, on="GeoName")
us_spend_df

Unnamed: 0,GeoName,Code,GeoFIPS,Description,Year,Dollars
0,Alabama,AL,01000,Personal consumption expenditures,1997,74340.3
1,Alabama,AL,01000,Goods,1997,30295.9
2,Alabama,AL,01000,Durable goods,1997,10102.1
3,Alabama,AL,01000,Motor vehicles and parts,1997,4891.9
4,Alabama,AL,01000,Furnishings and durable household equipment,1997,2221.4
...,...,...,...,...,...,...
28147,Wyoming,WY,56000,Financial services and insurance,2019,2863.3
28148,Wyoming,WY,56000,Other services,2019,1497.5
28149,Wyoming,WY,56000,Final consumption expenditures of nonprofit ...,2019,588.8
28150,Wyoming,WY,56000,Gross output of nonprofit institutions,2019,2920.4


In [23]:
us_spend_df.replace()

Unnamed: 0,GeoName,Code,GeoFIPS,Description,Year,Dollars
0,Alabama,AL,01000,Personal consumption expenditures,1997,74340.3
1,Alabama,AL,01000,Goods,1997,30295.9
2,Alabama,AL,01000,Durable goods,1997,10102.1
3,Alabama,AL,01000,Motor vehicles and parts,1997,4891.9
4,Alabama,AL,01000,Furnishings and durable household equipment,1997,2221.4
...,...,...,...,...,...,...
28147,Wyoming,WY,56000,Financial services and insurance,2019,2863.3
28148,Wyoming,WY,56000,Other services,2019,1497.5
28149,Wyoming,WY,56000,Final consumption expenditures of nonprofit ...,2019,588.8
28150,Wyoming,WY,56000,Gross output of nonprofit institutions,2019,2920.4


In [24]:
#show nan values
nan_values = us_spend_df.isna()
nan_columns = nan_values.any()
columns_wnan = us_spend_df.columns[nan_columns].tolist()
print(columns_wnan)

[]


In [25]:
# List of categories - 25 categories
description = us_expenditures_df['Description'].unique()
description

array(['Personal consumption expenditures', ' Goods', '  Durable goods',
       '   Motor vehicles and parts',
       '   Furnishings and durable household equipment',
       '   Recreational goods and vehicles', '   Other durable goods',
       '  Nondurable goods',
       '   Food and beverages purchased for off-premises consumption',
       '   Clothing and footwear', '   Gasoline and other energy goods',
       '   Other nondurable goods', ' Services',
       '  Household consumption expenditures (for services)',
       '   Housing and utilities', '   Health care',
       '   Transportation services', '   Recreation services',
       '   Food services and accommodations',
       '   Financial services and insurance', '   Other services',
       '  Final consumption expenditures of nonprofit institutions serving households (NPISHs)',
       '   Gross output of nonprofit institutions',
       '   Less: Receipts from sales of goods and services by nonprofit institutions',
       nan],

In [26]:
#connection = engine.connect()
engine = create_engine('sqlite:///DataFiles/us_spend.sqlite', echo=False)

In [27]:
# save the data into sqlite
us_spend_df.to_sql('us_spend_df', con=engine, if_exists='replace')


In [28]:
engine.execute("SELECT * FROM us_spend_df").fetchall()

[(0, 'Alabama', 'AL', ' 01000', 'Personal consumption expenditures', '1997', 74340.3),
 (1, 'Alabama', 'AL', ' 01000', ' Goods', '1997', 30295.9),
 (2, 'Alabama', 'AL', ' 01000', '  Durable goods', '1997', 10102.1),
 (3, 'Alabama', 'AL', ' 01000', '   Motor vehicles and parts', '1997', 4891.9),
 (4, 'Alabama', 'AL', ' 01000', '   Furnishings and durable household equipment', '1997', 2221.4),
 (5, 'Alabama', 'AL', ' 01000', '   Recreational goods and vehicles', '1997', 1907.8),
 (6, 'Alabama', 'AL', ' 01000', '   Other durable goods', '1997', 1081.0),
 (7, 'Alabama', 'AL', ' 01000', '  Nondurable goods', '1997', 20193.8),
 (8, 'Alabama', 'AL', ' 01000', '   Food and beverages purchased for off-premises consumption', '1997', 7742.9),
 (9, 'Alabama', 'AL', ' 01000', '   Clothing and footwear', '1997', 3549.5),
 (10, 'Alabama', 'AL', ' 01000', '   Gasoline and other energy goods', '1997', 2437.2),
 (11, 'Alabama', 'AL', ' 01000', '   Other nondurable goods', '1997', 6464.2),
 (12, 'Alabama