In [33]:
import os
import numpy as np
import pandas as pd
from atusfunclib import load_data
from sqlalchemy import create_engine

In [34]:
PATH = "/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data"

In [35]:
# Import all data
data_import = load_data(loc='data')

In [36]:
# Unpack individual dataframes
df, dfactcodes, dfeducodes, dfinccodes, dfagecodes, \
dfempcodes, dfindcodes, dfraccodes, dfloccodes, dfwhocodes = data_import

In [37]:
# Dtype convertion
dtypedict = {'int64': 'BIGINT', 'float64': 'REAL', 'category': 'TEXT'}

In [38]:
# Code tables
codedfs = {'actcodes': dfactcodes, 'educodes': dfeducodes, 'inccodes': dfinccodes,
           'agecodes': dfagecodes, 'empcodes': dfempcodes, 'indcodes': dfindcodes,
           'raccodes': dfraccodes, 'loccodes': dfloccodes, 'whocodes': dfwhocodes}
# Data tables
datadfs = {'actimesw3': df.filter(regex=r'TUCASEID|t\d{6}_W'),
           'actimes3':  df.filter(regex=r'TUCASEID|t\d{6}$'),
           'actimesw2': df.filter(regex=r'TUCASEID|t\d{4}_W'),
           'actimes2':  df.filter(regex=r'TUCASEID|t\d{4}$'),
           'actimesw1': df.filter(regex=r'TUCASEID|t\d{2}_W'),
           'actimes1':  df.filter(regex=r'TUCASEID|t\d{2}$'),
           'demow':     df.filter(regex=r'TUCASEID|^[A-Z]+_W'),
           'demo':      df.filter(regex=r'TUCASEID|^[A-Z]+[^_W]$')}

# Database name
databasename = 'atusdata'
# Username
username = 'jeremysmith'

In [39]:
# Postgres engine for atusdata database
engine = create_engine("postgresql://{}@localhost:5432/{}".format(username, databasename))

In [40]:
# Create code tables (small tables OK to use pandas)
for k in codedfs.keys():
    try:
        codedfs[k].to_sql(k, engine)
    except ValueError:
        print "Table '{}' already exists in '{}'".format(k, databasename)

Table 'raccodes' already exists in 'atusdata'
Table 'loccodes' already exists in 'atusdata'
Table 'actcodes' already exists in 'atusdata'
Table 'indcodes' already exists in 'atusdata'
Table 'empcodes' already exists in 'atusdata'
Table 'whocodes' already exists in 'atusdata'
Table 'agecodes' already exists in 'atusdata'
Table 'inccodes' already exists in 'atusdata'
Table 'educodes' already exists in 'atusdata'


In [29]:
# Create csv subfiles from data dfs
for k in datadfs.keys():
    datadfs[k].to_csv("data/{}.csv".format(k))

In [41]:
sql_create_statement = """
    CREATE TABLE IF NOT EXISTS {}
        ({});
    """
sql_copy_statement = """
    COPY {} FROM '{}'
        WITH (FORMAT csv, HEADER);
    """
sql_droptable_statement = """
    DROP TABLE {};
"""

In [42]:
# Delete tables
for k in datadfs.keys():
    engine.execute(sql_droptable_statement.format(k))

In [43]:
# Create tables
for k in datadfs.keys():
    
    header = datadfs[k].keys().tolist()
    dtypes = [dtypedict[i.name] for i in datadfs[k].dtypes.tolist()]

    schemestring = "index BIGINT, " + ", ".join([" ".join(i) for i in zip(header, dtypes)])

    engine.execute(sql_create_statement.format(k, schemestring))

In [44]:
%%time
# Copy data from csv files into tables
for k in datadfs.keys():
    
    filenamestring = os.path.join(PATH, "{}.csv".format(k))    
    print filenamestring

    with engine.connect().execution_options(autocommit=True) as con:
        con.execute(sql_copy_statement.format(k, filenamestring))

/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data/actimesw2.csv
/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data/actimesw3.csv
/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data/actimesw1.csv
/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data/actimes2.csv
/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data/actimes3.csv
/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data/actimes1.csv
/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data/demo.csv
/Users/jeremysmith/Documents/python-scripts/TDICapstoneProject/data/demow.csv
CPU times: user 18.5 ms, sys: 7.2 ms, total: 25.7 ms
Wall time: 1min 5s
