In [1]:
import pandas as pd
from sqlalchemy import create_engine
import pprint
import psycopg2

In [2]:
# Import CSV files
csvfile = "DataFiles/SAEXP1.csv"
us_expenditures = pd.read_csv(csvfile)

In [3]:
us_expenditures.columns

Index(['GeoFIPS', 'GeoName', 'Region', 'TableName', 'LineCode',
       'IndustryClassification', 'Description', 'Unit', '1997', '1998', '1999',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'],
      dtype='object')

In [4]:
us_expenditures_df = us_expenditures.melt(id_vars=['GeoName', 'Region','Description'],value_vars=['1997', '1998', '1999',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'], var_name="Year")
us_expenditures_df

Unnamed: 0,GeoName,Region,Description,Year,value
0,United States,,Personal consumption expenditures,1997,5534091.8
1,United States,,Goods,1997,2003807.2
2,United States,,Durable goods,1997,715529.6
3,United States,,Motor vehicles and parts,1997,293082.5
4,United States,,Furnishings and durable household equipment,1997,160490.2
...,...,...,...,...,...
33207,Far West,8,Less: Receipts from sales of goods and serv...,2019,236897.2
33208,,,,2019,
33209,,,,2019,
33210,,,,2019,


In [5]:
#remove NAN rows
us_expend = us_expenditures_df[us_expenditures_df['GeoName'].notna()]
us_expend

Unnamed: 0,GeoName,Region,Description,Year,value
0,United States,,Personal consumption expenditures,1997,5534091.8
1,United States,,Goods,1997,2003807.2
2,United States,,Durable goods,1997,715529.6
3,United States,,Motor vehicles and parts,1997,293082.5
4,United States,,Furnishings and durable household equipment,1997,160490.2
...,...,...,...,...,...
33203,Far West,8,Financial services and insurance,2019,210726.1
33204,Far West,8,Other services,2019,220767.1
33205,Far West,8,Final consumption expenditures of nonprofit ...,2019,88265.6
33206,Far West,8,Gross output of nonprofit institutions,2019,325162.7


In [6]:
# rename columns
us_expenditures_final = us_expend.rename(columns ={'value':'Dollars'})
us_expenditures_final

Unnamed: 0,GeoName,Region,Description,Year,Dollars
0,United States,,Personal consumption expenditures,1997,5534091.8
1,United States,,Goods,1997,2003807.2
2,United States,,Durable goods,1997,715529.6
3,United States,,Motor vehicles and parts,1997,293082.5
4,United States,,Furnishings and durable household equipment,1997,160490.2
...,...,...,...,...,...
33203,Far West,8,Financial services and insurance,2019,210726.1
33204,Far West,8,Other services,2019,220767.1
33205,Far West,8,Final consumption expenditures of nonprofit ...,2019,88265.6
33206,Far West,8,Gross output of nonprofit institutions,2019,325162.7


In [7]:
# List of categories - 25 categories
description = us_expenditures_df['Description'].unique()
description

array(['Personal consumption expenditures', ' Goods', '  Durable goods',
       '   Motor vehicles and parts',
       '   Furnishings and durable household equipment',
       '   Recreational goods and vehicles', '   Other durable goods',
       '  Nondurable goods',
       '   Food and beverages purchased for off-premises consumption',
       '   Clothing and footwear', '   Gasoline and other energy goods',
       '   Other nondurable goods', ' Services',
       '  Household consumption expenditures (for services)',
       '   Housing and utilities', '   Health care',
       '   Transportation services', '   Recreation services',
       '   Food services and accommodations',
       '   Financial services and insurance', '   Other services',
       '  Final consumption expenditures of nonprofit institutions serving households (NPISHs)',
       '   Gross output of nonprofit institutions',
       '   Less: Receipts from sales of goods and services by nonprofit institutions',
       nan],

In [8]:
#connection = engine.connect()
engine = create_engine('sqlite:///DataFiles/SAEXP1', echo=False)

In [11]:
# save data into sqlite
us_expenditures_final.to_sql('us_expenditures_final', con=engine, if_exists='replace')

In [12]:
engine.execute("SELECT * FROM us_expenditures_final").fetchall()

[(0, 'United States', ' ', 'Personal consumption expenditures', '1997', 5534091.8),
 (1, 'United States', ' ', ' Goods', '1997', 2003807.2),
 (2, 'United States', ' ', '  Durable goods', '1997', 715529.6),
 (3, 'United States', ' ', '   Motor vehicles and parts', '1997', 293082.5),
 (4, 'United States', ' ', '   Furnishings and durable household equipment', '1997', 160490.2),
 (5, 'United States', ' ', '   Recreational goods and vehicles', '1997', 174632.0),
 (6, 'United States', ' ', '   Other durable goods', '1997', 87324.8),
 (7, 'United States', ' ', '  Nondurable goods', '1997', 1288277.7),
 (8, 'United States', ' ', '   Food and beverages purchased for off-premises consumption', '1997', 474777.2),
 (9, 'United States', ' ', '   Clothing and footwear', '1997', 247496.1),
 (10, 'United States', ' ', '   Gasoline and other energy goods', '1997', 147661.2),
 (11, 'United States', ' ', '   Other nondurable goods', '1997', 418343.1),
 (12, 'United States', ' ', ' Services', '1997', 353