In [28]:
import pandas as pd
from sqlite3 import dbapi2 as sqlite
from sqlalchemy import *
engine = create_engine('sqlite+pysqlite:///data/interview.db', module=sqlite)

In [33]:
universe_df = pd.read_pickle('data/universe.zip')

In [34]:
len(universe_df)

11449

In [None]:
fundamental_df = pd.read_pickle('data/fundamental.zip')

In [36]:
len(fundamental_df)

634700

In [None]:
market_df = pd.read_pickle('data/market.zip')

In [37]:
len(market_df)

11342118

In [39]:
metadata = MetaData()
universe = Table("universe", metadata,
    Column('universe_name', Text, nullable=False, primary_key=True),                 
    Column('primary_id', Text, nullable=False, primary_key=True),
    Column('company_name', Text, nullable=True),
    Column('start_date', Date, nullable=False, primary_key=True),
    Column('end_date', Date, nullable=False, primary_key=True)
)

fundamental = Table("fundamental", metadata, *([
    Column('date', Date, nullable=False, primary_key=True),                 
    Column('primary_id', Text, nullable=False, primary_key=True),
    Column('fpe_offset', Integer, nullable=False, primary_key=True)
    ] +  [Column(i, Float, nullable=True) for i in fundamental_df.columns])
)

market = Table("market", metadata,
    Column('date', Date, nullable=False, primary_key=True),                 
    Column('primary_id', Text, nullable=False, primary_key=True),
    Column('close', Float),
    Column('shares', Float),
    Column('return_idx', Float),
    Column('split_factor', Float)
)

metadata.create_all(engine, checkfirst=True)

In [None]:
universe_df.to_sql('universe', engine, if_exists='append', index=False)

In [41]:
fundamental_df.reset_index().rename({'build_date': 'date'}, axis=1).to_sql('fundamental', engine, if_exists='append', index=False)

In [32]:
market_df.reset_index().to_sql('market', engine, if_exists='append', index=False)

In [19]:
fundamental_df = pd.read_sql("select * from fundamental", engine, parse_dates=['date'])

In [47]:
new_fundamental_df = (
    fundamental_df.loc[fundamental_df.date >= pd.Timestamp(2017, 1, 1), ['date', 'primary_id', 'fpe_offset', 'RTLR', 'ETOE', 'NINC', 'ATOT', 'LTLL', 'QTLE']]
    .rename({'RTLR': 'total_revenue', 'ETOE': 'total_operating_expense', 'NINC': 'net_income', 'ATOT': 'total_assets', 'LTLL': 'total_liabilities', 'QTLE': 'total_equity'}, axis=1)
    .sort_values(by=['date', 'primary_id', 'fpe_offset'])
)

In [32]:
new_fundamental_df.head()

Unnamed: 0,date,primary_id,fpe_offset,total_revenue,total_operating_expense,net_income,total_assets,total_liabilities,total_equity
508069,2015-03-31,30710,-3,30.083,28.231,1.019,89.557,75.594,13.963
508070,2015-03-31,30710,-2,29.12,28.325,0.717,93.752,67.344,26.408
508071,2015-03-31,30710,-1,36.599,32.153,2.458,102.141,73.117,29.024
508072,2015-03-31,30710,0,37.166,33.559,3.354,145.952,48.478,97.474
504722,2015-03-31,174410,-3,240.881,225.412,7.63,594.436,369.555,224.881


In [35]:
market_df = pd.read_sql("select * from market", engine, parse_dates=['date'])

In [40]:
new_market_df = (
    market_df.loc[market_df.date >= pd.Timestamp(2017, 1, 1), :]
    .sort_values(by=['date', 'primary_id'])
)

In [44]:
universe_df = pd.read_sql("select * from universe", engine, parse_dates=['start_date', 'end_date'])

In [45]:
universe_df.head()

Unnamed: 0,universe_name,primary_id,company_name,start_date,end_date
0,BacktestUniverse,00079410,A C C CORP,1995-12-31,1998-06-29
1,BacktestUniverse,00107310,AG ASSOCS INC,1995-12-31,1996-03-30
2,BacktestUniverse,00190710,A S T RESEARCH INC,1995-12-31,1997-09-29
3,BacktestUniverse,00192010,ARCO CHEMICAL CO,1995-12-31,1998-09-29
4,BacktestUniverse,00202M10,AMERICAN PRESIDENT COS LTD,1995-12-31,1997-12-30


In [48]:
engine2 = create_engine('sqlite+pysqlite:///data/interview2.db', module=sqlite)

In [49]:
new_fundamental_df.to_sql('fundamental', engine2, if_exists='replace', index=False)

In [50]:
new_market_df.to_sql('market', engine2, if_exists='replace', index=False)

In [51]:
universe_df.to_sql('universe', engine2, if_exists='replace', index=False)