In [68]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy import or_
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float

import numpy as np
import pandas as pd

In [2]:
# create an engine to sqlite db
engine = create_engine("sqlite:///sec13f.sqlite")


In [3]:
# Create a connection to the engine called conn
conn = engine.connect()

In [4]:
# Create a session
session = Session(engine)

In [5]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [6]:
# Use the Base class to reflect the database tables
# create classes based on database
Base.prepare(engine, reflect=True)

In [7]:
# Print all of the classes mapped to the Base
Base.classes.keys()

['latest_positions', 'positions', 'securitiesex']

In [10]:
Positions = Base.classes.positions

In [14]:
df = pd.DataFrame(session.query(Positions.file_date, Positions.cusip, Positions.name, Positions.mval, Positions.shares).all())
df.head()

Unnamed: 0,file_date,cusip,name,mval,shares
0,2013-06-30,25816109,AMERICAN EXPRESS CO,145942,1952142
1,2013-06-30,25816109,AMERICAN EXPRESS CO,1287771,17225400
2,2013-06-30,25816109,AMERICAN EXPRESS CO,62786,839832
3,2013-06-30,25816109,AMERICAN EXPRESS CO,145266,1943100
4,2013-06-30,25816109,AMERICAN EXPRESS CO,597679,7994634


In [26]:
sumdf = df.groupby(["file_date","cusip","name"], as_index = False).sum()
sumdf.head()


Unnamed: 0,file_date,cusip,name,mval,shares
0,2013-06-30,025816109,AMERICAN EXPRESS CO,11334417,151610700
1,2013-06-30,064058100,BANK OF NEW YORK MELLON CORP,691264,24644029
2,2013-06-30,167250109,CHICAGO BRIDGE & IRON CO N V,569798,9550755
3,2013-06-30,191216100,COCA COLA CO,16044001,400000000
4,2013-06-30,20825C104,CONOCOPHILLIPS,1459497,24123911


In [51]:
def get_prior_quarter(date):
    year = date[0:4]
    mthday = date[5:10]
    if mthday == "03-31":
        prior_date = str(int(year)-1) + "-12-31"
    elif mthday == "06-30":
        prior_date = year + "-03-31"
    elif mthday == "09-30":
        prior_date = year + "-06-30"
    elif mthday == "12-31":
        prior_date = year + "-09-30"
    else:
        prior_date = ""

    return prior_date

In [52]:
get_prior_quarter('2013-06-30')

'2013-03-31'

In [53]:
sumdf["prior_qtr"] = sumdf["file_date"].apply(get_prior_quarter)
sumdf.head()

Unnamed: 0,file_date,cusip,name,mval,shares,prior_qtr
0,2013-06-30,025816109,AMERICAN EXPRESS CO,11334417,151610700,2013-03-31
1,2013-06-30,064058100,BANK OF NEW YORK MELLON CORP,691264,24644029,2013-03-31
2,2013-06-30,167250109,CHICAGO BRIDGE & IRON CO N V,569798,9550755,2013-03-31
3,2013-06-30,191216100,COCA COLA CO,16044001,400000000,2013-03-31
4,2013-06-30,20825C104,CONOCOPHILLIPS,1459497,24123911,2013-03-31


In [60]:
joindf = pd.merge(sumdf, sumdf, how="left", left_on=["cusip", "name", "prior_qtr"], right_on=["cusip", "name", "file_date"], suffixes =("_c","_p"))
joindf.head(50)

Unnamed: 0,file_date_c,cusip,name,mval_c,shares_c,prior_qtr_c,file_date_p,mval_p,shares_p,prior_qtr_p
0,2013-06-30,025816109,AMERICAN EXPRESS CO,11334417,151610700,2013-03-31,,,,
1,2013-06-30,064058100,BANK OF NEW YORK MELLON CORP,691264,24644029,2013-03-31,,,,
2,2013-06-30,167250109,CHICAGO BRIDGE & IRON CO N V,569798,9550755,2013-03-31,,,,
3,2013-06-30,191216100,COCA COLA CO,16044001,400000000,2013-03-31,,,,
4,2013-06-30,20825C104,CONOCOPHILLIPS,1459497,24123911,2013-03-31,,,,
5,2013-06-30,22160K105,COSTCO WHSL CORP NEW,479140,4333363,2013-03-31,,,,
6,2013-06-30,23918K108,DAVITA HEALTHCARE PARTNERS I,1808848,14973906,2013-03-31,,,,
7,2013-06-30,244199105,DEERE & CO,323275,3978767,2013-03-31,,,,
8,2013-06-30,25470M109,DISH NETWORK CORP,23272,547312,2013-03-31,,,,
9,2013-06-30,25490A309,DIRECTV,2297655,37275400,2013-03-31,,,,


In [64]:
joindf["cmval"] = joindf["mval_c"] - joindf["mval_p"]

In [65]:
joindf["cshares"] = joindf["shares_c"] - joindf["shares_p"]

In [66]:
joindf.head(50)

Unnamed: 0,file_date_c,cusip,name,mval_c,shares_c,prior_qtr_c,file_date_p,mval_p,shares_p,prior_qtr_p,cmval,cshares
0,2013-06-30,025816109,AMERICAN EXPRESS CO,11334417,151610700,2013-03-31,,,,,,
1,2013-06-30,064058100,BANK OF NEW YORK MELLON CORP,691264,24644029,2013-03-31,,,,,,
2,2013-06-30,167250109,CHICAGO BRIDGE & IRON CO N V,569798,9550755,2013-03-31,,,,,,
3,2013-06-30,191216100,COCA COLA CO,16044001,400000000,2013-03-31,,,,,,
4,2013-06-30,20825C104,CONOCOPHILLIPS,1459497,24123911,2013-03-31,,,,,,
5,2013-06-30,22160K105,COSTCO WHSL CORP NEW,479140,4333363,2013-03-31,,,,,,
6,2013-06-30,23918K108,DAVITA HEALTHCARE PARTNERS I,1808848,14973906,2013-03-31,,,,,,
7,2013-06-30,244199105,DEERE & CO,323275,3978767,2013-03-31,,,,,,
8,2013-06-30,25470M109,DISH NETWORK CORP,23272,547312,2013-03-31,,,,,,
9,2013-06-30,25490A309,DIRECTV,2297655,37275400,2013-03-31,,,,,,


In [84]:
finaldf = joindf[["file_date_c", "cusip", "name", "mval_c", "shares_c", "cmval", "cshares"]].copy()
finaldf.rename(columns = {"file_date_c":"file_date", "mval_c":"mval", "shares_c":"shares"}, inplace=True)
finaldf.head()

Unnamed: 0,file_date,cusip,name,mval,shares,cmval,cshares
0,2013-06-30,025816109,AMERICAN EXPRESS CO,11334417,151610700,,
1,2013-06-30,064058100,BANK OF NEW YORK MELLON CORP,691264,24644029,,
2,2013-06-30,167250109,CHICAGO BRIDGE & IRON CO N V,569798,9550755,,
3,2013-06-30,191216100,COCA COLA CO,16044001,400000000,,
4,2013-06-30,20825C104,CONOCOPHILLIPS,1459497,24123911,,


In [86]:
class Processed_Positions(Base):
    __tablename__ = 'processed_positions'
    __table_args__ = {'extend_existing': True} 
    id = Column(Integer, primary_key=True)
    file_date = Column(Text)
    name = Column(Text)
    cusip = Column(Text)
    mval = Column(Integer)
    cmval = Column(Integer)
    shares = Column (Integer)
    cshares = Column(Integer)
    
    def __repr__(self):
        return f"file_date={self.file_date}, name={self.name}, cusip={self.cusip}, mval={self.mval}, shares={self.shares}, cmval={self.cmval}, cshares={self.cshares}"
    

  item.__name__


In [70]:
# Use `create_all` to create the positions table in the database
Base.metadata.create_all(engine)

In [71]:
# Use MetaData from SQLAlchemy to reflect the tables\n",
metadata = MetaData(bind=engine)
metadata.reflect()

In [87]:
# Save the reference to the `processed_positions` table as a variable called `table`
table = sqlalchemy.Table('processed_positions', metadata, autoload=True)

In [88]:
# Use `table.delete()` to remove any existing data.
conn.execute(table.delete())

<sqlalchemy.engine.result.ResultProxy at 0x20c56573a20>

In [89]:
data = finaldf.to_dict(orient='records')
conn.execute(table.insert(), data)

<sqlalchemy.engine.result.ResultProxy at 0x20c56586908>

In [90]:
conn.execute("select * from processed_positions limit 50").fetchall()

[(1, '2013-06-30', 'AMERICAN EXPRESS CO', '025816109', 11334417, None, 151610700, None),
 (2, '2013-06-30', 'BANK OF NEW YORK MELLON CORP', '064058100', 691264, None, 24644029, None),
 (3, '2013-06-30', 'CHICAGO BRIDGE & IRON CO N V', '167250109', 569798, None, 9550755, None),
 (4, '2013-06-30', 'COCA COLA CO', '191216100', 16044001, None, 400000000, None),
 (5, '2013-06-30', 'CONOCOPHILLIPS', '20825C104', 1459497, None, 24123911, None),
 (6, '2013-06-30', 'COSTCO WHSL CORP NEW', '22160K105', 479140, None, 4333363, None),
 (7, '2013-06-30', 'DAVITA HEALTHCARE PARTNERS I', '23918K108', 1808848, None, 14973906, None),
 (8, '2013-06-30', 'DEERE & CO', '244199105', 323275, None, 3978767, None),
 (9, '2013-06-30', 'DISH NETWORK CORP', '25470M109', 23272, None, 547312, None),
 (10, '2013-06-30', 'DIRECTV', '25490A309', 2297655, None, 37275400, None),
 (11, '2013-06-30', 'GENERAL ELECTRIC CO', '369604103', 13657, None, 588900, None),
 (12, '2013-06-30', 'GENERAL MTRS CO', '37045V100', 1332400