<a target="_blank" href="https://colab.research.google.com/github/kerryback/mgmt638/blob/main/notebooks/05_fundamentals.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

### Factors

- Value 
- Momentum
- Quality
  - Profitability
  - Low accruals
  - Low asset growth
  - Low default probability
- Volatility (low vol and/or low idiosyncratic vol)
- Liquidity (high volume)

### Data

- closeadj, closeunadj, volume from sep_weekly
- marketcap, pb from weekly
- netinc, equity, assets, ncfo from sf1 where dimension="ARQ"



### Financial Statement Variables

- Use trailing 4 quarters:
  - netinc, ncfo = sum of prior 4 quarters
  - equity, assets = average of prior 4 quarters
- Variables:
  - roe = netinc / equity
  - accruals = (netinc - ncfo) / equity
  - agr = % change in assets

## Create connection

In [53]:
import pandas as pd

from sqlalchemy import create_engine
import pymssql
server = 'fs.rice.edu'
database = 'stocks'
username = 'stocks'
password = '6LAZH1'
string = "mssql+pymssql://" + username + ":" + password + "@" + server + "/" + database 
conn = create_engine(string).connect()

## Calculate financial ratios and growth rates

Data from SF1

In [54]:
sf1 = pd.read_sql(
    """ 
    select ticker, datekey, lastupdated, netinc, ncfo, equity, assets 
    from sf1
    where dimension='ARQ' and datekey>='2009-01-01' and equity>0 and assets>0
    order by ticker, datekey
    """,
    conn,
    parse_dates=["datekey"]
)
sf1 = sf1.groupby(["ticker", "datekey", "lastupdated"]).last()
sf1 = sf1.droplevel("lastupdated")
sf1 = sf1.reset_index()

In [55]:
for col in ["netinc", "ncfo"]:
    sf1[col] = sf1.groupby("ticker", group_keys=False)[col].apply(
        lambda x: x.rolling(4).sum()
    )
for col in ["equity", "assets"]:
    sf1[col] = sf1.groupby("ticker", group_keys=False)[col].apply(
        lambda x: x.rolling(4).mean()
    )
sf1["roe"] = sf1.netinc / sf1.equity
sf1["accruals"] = (sf1.netinc - sf1.ncfo) / sf1.equity
sf1["agr"] = sf1.groupby("ticker", group_keys=False)["assets"].pct_change()
sf1 = sf1[["ticker", "datekey", "roe", "accruals", "agr"]].dropna()

In [56]:
sf1.head()

Unnamed: 0,ticker,datekey,roe,accruals,agr
4,A,2010-03-10,-0.006397,-0.17473,0.014755
5,A,2010-06-07,0.07562,-0.123812,0.042033
6,A,2010-09-07,0.158525,-0.053602,0.085586
7,A,2010-12-20,0.24333,-0.012095,0.065017
8,A,2011-03-09,0.266311,-0.003337,0.013768


## Returns, volume, momentum, volatility

Data from sep_weekly

In [57]:
sep_weekly = pd.read_sql(
    """ 
    select ticker, date, volume, closeadj, closeunadj, lastupdated 
    from sep_weekly 
    where date>='2010-01-01'
    order by ticker, date, lastupdated    
    """,
    conn,
    parse_dates=["date"]
)
sep_weekly = sep_weekly.groupby(["ticker", "date", "lastupdated"]).last()
sep_weekly = sep_weekly.droplevel("lastupdated")

In [58]:
sep_weekly["ret"] = sep_weekly.groupby("ticker", group_keys=False).closeadj.pct_change()
sep_weekly["annual"] = sep_weekly.groupby("ticker", group_keys=False).closeadj.pct_change(52)
sep_weekly["monthly"] = sep_weekly.groupby("ticker", group_keys=False).closeadj.pct_change(4)
sep_weekly["mom"] = sep_weekly.groupby("ticker", group_keys=False).apply(
    lambda d: (1+d.annual)/(1+d.monthly) - 1
)
sep_weekly["volatility"] = sep_weekly.groupby("ticker", group_keys=False).ret.apply(
    lambda x: x.rolling(26).std()
)
sep_weekly = sep_weekly[["ret", "mom", "volume", "volatility", "closeunadj"]]
sep_weekly = sep_weekly.reset_index()

## Get marketcap and pb

Data from weekly

In [59]:
weekly = pd.read_sql(
    """ 
    select ticker, date, marketcap, pb, lastupdated
    from weekly
    where date>='2010-01-01' and marketcap>0 and pb>0
    order by ticker, date, lastupdated
    """,
    conn,
    parse_dates=["date"]
)
weekly = weekly.groupby(["ticker", "date", "lastupdated"]).last()
weekly = weekly.droplevel("lastupdated")
weekly = weekly.reset_index()

## Merge 

In [60]:
df = weekly.merge(sep_weekly, on=["ticker", "date"], how="inner")
df["year"] = df.date.apply(lambda x: x.isocalendar()[0])
df["week"] = df.date.apply(lambda x: x.isocalendar()[1])
sf1["year"] = sf1.datekey.apply(lambda x: x.isocalendar()[0])
sf1["week"] = sf1.datekey.apply(lambda x: x.isocalendar()[1])
df = df.merge(sf1, on=["ticker", "year", "week"], how="left")
df = df.drop(columns=["year", "week", "datekey"])

## Fill ratios and growth rates forward

In [61]:
for col in ["roe", "accruals", "agr"]:
    df[col] = df.groupby("ticker", group_keys=False)[col].apply(
        lambda x: x.ffill()
    )

## Shift weekly features forward

In [62]:
for col in ["pb", "mom", "volume", "volatility", "marketcap", "closeunadj"]:
    df[col] = df.groupby("ticker", group_keys=False)[col].shift()

## Filter to small caps and exclude penny stocks

In [63]:
df = df[df.closeunadj>5]
df = df.dropna()
df["rnk"] = df.groupby("date", group_keys=False).marketcap.rank(
    ascending=False, 
    method="first"
)
df = df[(df.rnk>1000) & (df.rnk<=3000)]
df = df.drop(columns=["closeunadj", "rnk"])

In [65]:
df = df.sort_values(by=["date", "ticker"])