# Size-Value-Momentum/Investment Sorts

In [86]:
import os

import jinja2
import pandas as pd

pd.__version__

'0.22.0'

In [101]:
path = "C:/Users/samth/Dropbox/Thesis/Tex/Tables/"

env = jinja2.Environment(
    block_start_string='-%', block_end_string='%-',
    variable_start_string='=%', variable_end_string='%=',
    loader=jinja2.FileSystemLoader(path)
)

template = env.get_template("_K_I_J_chars_template.txt")

In [2]:
def assign_bkts(df, var, brkpts, prefix=None, suffix="Bkt"):
    """
    Assigns buckets to column `var` based on percentiles in `brkpts`.
    
    Assumes breakpoints are already in dataframe `df` and are named the same as given in `brkpts`.
    Buckets are integers beginning with 1 ending with len(brkpts).
    """

    if not prefix:
        prefix = var
    varbkt = prefix + suffix

    df[varbkt] = pd.np.NaN

    for i, brkpt in enumerate(brkpts):  # index begins at 0
        if i==0:
            df.loc[df[var]<=df[brkpt], varbkt] = 1
        else:
            df.loc[(df[var]>df[brkpts[i-1]]) & (df[var]<=df[brkpt]), varbkt] = i + 1

    return df

In [3]:
%%time

df = pd.read_csv("C:/Data/Thesis/Combined_Data.csv")



Wall time: 24 s


In [4]:
df.date = pd.to_datetime(df.date)
#df.date = df.date.astype(pd.Timestamp)

In [5]:
size_bkts = pd.read_csv("C:/Data/Thesis/Bkts_Jun_ME_2.csv")

df = df.merge(size_bkts, on=["PERMNO", "HP"], how="left")

In [6]:
ME_brk = pd.read_csv("C:/Data/Thesis/Brks_Months_ME.csv")

pcts = [.5, 1.]

ME_brk = ME_brk.rename(columns=dict(zip([str(pct) for pct in pcts], pcts)))
ME_brk.date = pd.to_datetime(ME_brk.date, format="%m/%d/%Y")  #.astype(pd.Timestamp)

In [7]:
df.date.dtype, ME_brk.date.dtype

(dtype('<M8[ns]'), dtype('<M8[ns]'))

In [8]:
df = df.merge(ME_brk[["date",] + pcts], on="date", how="left")

df = assign_bkts(df, "ME", pcts)

In [9]:
df["L1_MEBkt"] = df.groupby("PERMNO")["MEBkt"].shift(1)
df = df.drop(pcts, axis=1)

In [10]:
assert df.MEBkt.isna().sum() != df.shape[0]

In [11]:
pcts = [.25, .5, .75, 1.]

In [12]:
var = pd.read_stata("C:/Data/Thesis/Var_monthly.dta")

var = var.rename(columns={"permno": "PERMNO"})

df["yr"] = df.date.dt.year
df["mnth"] = df.date.dt.month

df = df.merge(var[["PERMNO", "yr", "mnth", "Var_ret", "N_good"]], on=["PERMNO", "yr", "mnth"], how="left")

df["L1_Var_ret"] = df.groupby("PERMNO")["Var_ret"].shift(1)

In [29]:
df["F1_Inv"] = df.groupby("PERMNO")["Inv"].shift(-1)

In [31]:
df.columns

Index(['Unnamed: 0', 'PERMNO', 'date', 'EXCHCD', 'SICCD', 'TICKER', 'DCLRDT',
       'DLSTCD', 'DISTCD', 'FACPR', 'FACSHR', 'DLRET', 'PRC', 'RET', 'SHROUT',
       'CFACPR', 'CFACSHR', 'HP', 'ME', 'ME_Jun', 'ME_Dec', 'Ri', 'L1_Ri',
       'RiFctr', 'Size', 'Prior', 'L1_ME', 'PriorOK', 'BE', 'BE_Growth_Future',
       'OP06', 'OP06OK', 'OP16', 'ACC', 'CP', 'Inv', 'BM', 'BMOK', 'BMm',
       'BMmOK', 'ME_JunBkt', 'MEBkt', 'L1_MEBkt', 'yr', 'mnth', 'N_good',
       'Var_ret', 'L1_Var_ret', 'F1_Inv'],
      dtype='object')

# Small

In [33]:
sdf = df[df.L1_MEBkt==1]

sdf.shape, sdf.date.dtype, df.date.dtype

((2495873, 49), dtype('<M8[ns]'), dtype('<M8[ns]'))

## $BM^m$ Buckets

In [34]:
brk = sdf[["date", "BMm"]][(sdf.EXCHCD==1) & (sdf.BMmOK)].groupby("date")
brk = brk.quantile(pcts).unstack()
brk.columns = brk.columns.droplevel(0)

brk = brk.reset_index("date")
brk.date = pd.to_datetime(brk.date)

sdf = sdf.merge(brk, on="date", how="left")
sdf = assign_bkts(sdf, "BMm", pcts)

sdf = sdf.drop(pcts, axis=1)

assert sdf.BMmBkt.isna().sum() != sdf.shape[0]

## Prior Buckets

In [35]:
brk = sdf[["date", "Prior"]][(sdf.EXCHCD==1) & (sdf.PriorOK)].groupby("date")
brk = brk.quantile(pcts).unstack()
brk.columns = brk.columns.droplevel(0)

brk = brk.reset_index("date")
brk.date = pd.to_datetime(brk.date)

sdf = sdf.merge(brk, on="date", how="left")
sdf = assign_bkts(sdf, "Prior", pcts)

sdf = sdf.drop(pcts, axis=1)

assert sdf.PriorBkt.isna().sum() != sdf.shape[0]

## Size-Value-Momentum

In [36]:
grp, ix = ["date", "BMmBkt", "PriorBkt"], ((~sdf.L1_ME.isna()) & (sdf.BMmOK) & (sdf.PriorOK))

In [37]:
sdf["BktSize"] = sdf[ix].groupby(grp)["L1_ME"].transform("sum")

for col in ["RET", "BMm", "BM", "Prior", "Var_ret", "L1_Var_ret", "Inv", "F1_Inv", "BE_Growth_Future"]:
    print(col, end=" ")
    sdf["Wt"+col] = sdf[col] * sdf.L1_ME / sdf.BktSize
    bkt = sdf[ix].groupby(grp)["Wt"+col].sum()
    bkt = bkt.reset_index(grp[1:])
    bkt[grp[1]+grp[2]] = "S" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
    bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col).reset_index("date")
    bkt.to_csv("C:/Data/Thesis/Size_BMm_Prior_S_{}.csv".format(col))
print()

RET BMm BM Prior Var_ret L1_Var_ret Inv F1_Inv BE_Growth_Future 


---

BM and Inv are annually rebalanced.

In [38]:
sdf = df[df.ME_JunBkt==1]

sdf.shape

(2370047, 49)

## $BM$ Buckets

In [39]:
brk = sdf[(sdf.EXCHCD==1) & (sdf.date.dt.month==7) & (sdf.BMOK)][["HP", "BM"]].groupby("HP")
brk = brk.quantile(pcts).unstack()
brk.columns = brk.columns.droplevel(0)

sdf = sdf.merge(brk.reset_index("HP"), on="HP", how="left")
sdf = assign_bkts(sdf, "BM", pcts)

sdf = sdf.drop(pcts, axis=1)

assert sdf.BMBkt.isna().sum() != sdf.shape[0]

## Inv Buckets

In [40]:
brk = sdf[(sdf.EXCHCD==1) & (sdf.date.dt.month==7) & (~sdf.Inv.isna())][["HP", "Inv"]].groupby("HP")
brk = brk.quantile(pcts).unstack()
brk.columns = brk.columns.droplevel(0)

sdf = sdf.merge(brk.reset_index("HP"), on="HP", how="left")
sdf = assign_bkts(sdf, "Inv", pcts)

sdf = sdf.drop(pcts, axis=1)

assert sdf.InvBkt.isna().sum() != sdf.shape[0]

## Size-Value-Investment

In [41]:
grp, ix = ["date", "BMBkt", "InvBkt"], ((~sdf.Size.isna()) & (sdf.BMOK) & (~sdf.Inv.isna()))

sdf["BktSize"] = sdf[ix].groupby(grp)["Size"].transform("sum")

for col in ["RET", "BMm", "BM", "Prior", "Var_ret", "L1_Var_ret", "Inv", "F1_Inv", "BE_Growth_Future"]:
    print(col, end=" ")
    sdf["Wt"+col] = sdf[col] * sdf.Size / sdf.BktSize
    bkt = sdf[ix].groupby(grp)["Wt"+col].sum()
    bkt = bkt.reset_index(grp[1:])
    bkt[grp[1]+grp[2]] = "S" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
    bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col).reset_index("date")
    bkt.to_csv("C:/Data/Thesis/Size_BM_Inv_S_{}.csv".format(col))
print()

RET BMm BM Prior Var_ret L1_Var_ret Inv F1_Inv BE_Growth_Future 


# Big

In [42]:
sdf = df[df.L1_MEBkt==2]

sdf.shape

(597052, 49)

## $BM^m$ Buckets

In [43]:
brk = sdf[["date", "BMm"]][(sdf.EXCHCD==1) & (sdf.BMmOK)].groupby("date")
brk = brk.quantile(pcts).unstack()
brk.columns = brk.columns.droplevel(0)

brk = brk.reset_index("date")
brk.date = pd.to_datetime(brk.date)

sdf = sdf.merge(brk, on="date", how="left")
sdf = assign_bkts(sdf, "BMm", pcts)

sdf = sdf.drop(pcts, axis=1)

assert sdf.BMmBkt.isna().sum() != sdf.shape[0]

## Prior Buckets

In [44]:
brk = sdf[["date", "Prior"]][(sdf.EXCHCD==1) & (sdf.PriorOK)].groupby("date")
brk = brk.quantile(pcts).unstack()
brk.columns = brk.columns.droplevel(0)

brk = brk.reset_index("date")
brk.date = pd.to_datetime(brk.date)

sdf = sdf.merge(brk, on="date", how="left")
sdf = assign_bkts(sdf, "Prior", pcts)

sdf = sdf.drop(pcts, axis=1)

assert sdf.PriorBkt.isna().sum() != sdf.shape[0]

## Size-Value-Momentum

In [45]:
grp, ix = ["date", "BMmBkt", "PriorBkt"], ((~sdf.L1_MEBkt.isna()) & (sdf.BMmOK) & (sdf.PriorOK))

sdf["BktSize"] = sdf[ix].groupby(grp)["L1_ME"].transform("sum")

for col in ["RET", "BMm", "BM", "Prior", "Var_ret", "L1_Var_ret", "Inv", "F1_Inv", "BE_Growth_Future"]:
    print(col, end=" ")
    sdf["Wt"+col] = sdf[col] * sdf.L1_ME / sdf.BktSize
    bkt = sdf[ix].groupby(grp)["Wt"+col].sum()
    bkt = bkt.reset_index(grp[1:])
    bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
    bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col).reset_index("date")
    bkt.to_csv("C:/Data/Thesis/Size_BMm_Prior_B_{}.csv".format(col))
print()

RET BMm BM Prior Var_ret L1_Var_ret Inv F1_Inv BE_Growth_Future 


---

BM and Inv are annually rebalanced.

In [46]:
sdf = df[df.ME_JunBkt==2]

sdf.shape

(581661, 49)

## $BM$ Buckets

In [47]:
brk = sdf[(sdf.EXCHCD==1) & (sdf.date.dt.month==7) & (sdf.BMOK)][["HP", "BM"]].groupby("HP")
brk = brk.quantile(pcts).unstack()
brk.columns = brk.columns.droplevel(0)

sdf = sdf.merge(brk.reset_index("HP"), on="HP", how="left")
sdf = assign_bkts(sdf, "BM", pcts)

sdf = sdf.drop(pcts, axis=1)

assert sdf.BMBkt.isna().sum() != sdf.shape[0]

## Inv Buckets

In [48]:
brk = sdf[(sdf.EXCHCD==1) & (sdf.date.dt.month==7) & (~sdf.Inv.isna())][["HP", "Inv"]].groupby("HP")
brk = brk.quantile(pcts).unstack()
brk.columns = brk.columns.droplevel(0)

sdf = sdf.merge(brk.reset_index("HP"), on="HP", how="left")
sdf = assign_bkts(sdf, "Inv", pcts)

sdf = sdf.drop(pcts, axis=1)

assert sdf.InvBkt.isna().sum() != sdf.shape[0]

## Size-Value-Investment

In [49]:
grp, ix = ["date", "BMBkt", "InvBkt"], ((~sdf.Size.isna()) & (sdf.BMOK) & (~sdf.Inv.isna()))

sdf["BktSize"] = sdf[ix].groupby(grp)["Size"].transform("sum")

for col in ["RET", "BMm", "BM", "Prior", "Var_ret", "L1_Var_ret", "Inv", "F1_Inv", "BE_Growth_Future"]:
    print(col, end=" ")
    sdf["Wt"+col] = sdf[col] * sdf.Size / sdf.BktSize
    bkt = sdf[ix].groupby(grp)["Wt"+col].sum()
    bkt = bkt.reset_index(grp[1:])
    bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
    bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col).reset_index("date")
    bkt.to_csv("C:/Data/Thesis/Size_BM_Inv_B_{}.csv".format(col))
print()

RET BMm BM Prior Var_ret L1_Var_ret Inv F1_Inv BE_Growth_Future 


# Combine Small and Big

In [50]:
from collections import OrderedDict

In [103]:
prefix = "Size_BMm_Prior"

BMm_Prior = OrderedDict()

for col, display in zip(
    ["RET", "BMm", "BM", "Prior", "Var_ret", "L1_Var_ret", "Inv", "F1_Inv", "BE_Growth_Future"],
    [r"$\text{R}^i$", r"$\text{BM}^m$", r"BM", r"Prior", r"Var", r"Future Var", r"Inv",
     r"Future Inv", r"Future $\Delta\text{BE}$"]):
    # BM_Inv[col] = {"small": None, "big": None}
    BMm_Prior[col] = {"coef": None, "display": display}
    multiplier = 100 if col in ["RET", "Var_ret", "L1_Var_ret"] else 1

    small = pd.read_csv("C:/Data/Thesis/{}_S_{}.csv".format(prefix, col)).iloc[:, 1:]
    small.date = pd.to_datetime(small.date)
    # small = small.set_index("date")["1963-07":"2017-12"].replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean() * multiplier
    # BM_Inv[col]["small"] = small.round(2).values.reshape([4, 4])

    big = pd.read_csv("C:/Data/Thesis/{}_B_{}.csv".format(prefix, col)).iloc[:, 1:]
    big.date = pd.to_datetime(big.date)
    # big = big.set_index("date")["1963-07":"2017-12"].replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean() * multiplier
    # BM_Inv[col]["big"] = small.round(2).values.reshape([4, 4]).transpose()

    x = small.merge(big, how="left").set_index("date")["1963-07":"2017-12"]
    x = x.replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean() * multiplier
    BMm_Prior[col]["coef"] = x.round(2).values.reshape([2, 4, 4]).transpose((0, 2, 1))

In [104]:
label = "{}_chars".format(prefix)
fname = "{}_tbl.tex".format(label)

context = {"coefs": BM_Inv, "K": 2, "I":4, "J": 4,
           "caption": "Barnacles", "label": label, "font_size": r"\scriptsize",
           "K_hdrs": ["Small", "Big"], "col_name": r"$\text{BM}^m$",
           "col_names": ["Low", "2", "3", "High"],
           "row_names": ["Low Prior", "2", "3", "High Prior"]}

with open(os.path.join(path, fname), "w") as table:
    table.write(template.render(context))
print(fname)

Size_BM_Inv_chars_tbl.tex


In [51]:
prefix = "Size_BMm_Prior"

In [58]:
BMm_Prior = OrderedDict()

In [59]:
for col in ["RET", "BMm", "BM", "Prior", "Var_ret", "L1_Var_ret", "Inv", "F1_Inv", "BE_Growth_Future"]:
    small = pd.read_csv("C:/Data/Thesis/{}_S_{}.csv".format(prefix, col)).iloc[:, 1:]
    small.date = pd.to_datetime(small.date)
    big = pd.read_csv("C:/Data/Thesis/{}_B_{}.csv".format(prefix, col)).iloc[:, 1:]
    big.date = pd.to_datetime(big.date)
    x = small.merge(big, how="left").set_index("date")["1963-07":"2017-12"]
    multiplier = 100 if col in ["RET", "Var_ret", "L1_Var_ret"] else 1
    x = x.replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean() * multiplier
    BMm_Prior[col] = x.round(2).values.reshape([2, 4, 4])

In [60]:
BMm_Prior

OrderedDict([('RET', array([[[0.09, 0.69, 0.96, 1.47],
                      [0.76, 1.07, 1.26, 1.58],
                      [0.98, 1.24, 1.55, 1.78],
                      [1.01, 1.51, 1.71, 2.04]],
              
                     [[0.55, 0.64, 0.9 , 1.21],
                      [0.75, 0.82, 0.87, 1.11],
                      [0.95, 0.9 , 0.99, 1.14],
                      [0.96, 1.1 , 1.23, 1.42]]])),
             ('BMm', array([[[0.34, 0.35, 0.34, 0.29],
                      [0.69, 0.69, 0.69, 0.68],
                      [1.04, 1.03, 1.02, 1.01],
                      [2.29, 1.99, 2.05, 2.12]],
              
                     [[0.23, 0.22, 0.21, 0.19],
                      [0.45, 0.44, 0.44, 0.44],
                      [0.69, 0.69, 0.69, 0.68],
                      [1.26, 1.16, 1.14, 1.16]]])),
             ('BM', array([[[0.28, 0.36, 0.41, 0.49],
                      [0.55, 0.69, 0.8 , 1.02],
                      [0.8 , 1.  , 1.14, 1.46],
                      [1.48,

In [103]:
prefix = "Size_BM_Inv"

BM_Inv = OrderedDict()

for col, display in zip(
    ["RET", "BMm", "BM", "Prior", "Var_ret", "L1_Var_ret", "Inv", "F1_Inv", "BE_Growth_Future"],
    [r"$\text{R}^i$", r"$\text{BM}^m$", r"BM", r"Prior", r"Var", r"Future Var", r"Inv",
     r"Future Inv", r"Future $\Delta\text{BE}$"]):
    # BM_Inv[col] = {"small": None, "big": None}
    BM_Inv[col] = {"coef": None, "display": display}
    multiplier = 100 if col in ["RET", "Var_ret", "L1_Var_ret"] else 1

    small = pd.read_csv("C:/Data/Thesis/{}_S_{}.csv".format(prefix, col)).iloc[:, 1:]
    small.date = pd.to_datetime(small.date)
    # small = small.set_index("date")["1963-07":"2017-12"].replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean() * multiplier
    # BM_Inv[col]["small"] = small.round(2).values.reshape([4, 4])

    big = pd.read_csv("C:/Data/Thesis/{}_B_{}.csv".format(prefix, col)).iloc[:, 1:]
    big.date = pd.to_datetime(big.date)
    # big = big.set_index("date")["1963-07":"2017-12"].replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean() * multiplier
    # BM_Inv[col]["big"] = small.round(2).values.reshape([4, 4]).transpose()

    x = small.merge(big, how="left").set_index("date")["1963-07":"2017-12"]
    x = x.replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean() * multiplier
    BM_Inv[col]["coef"] = x.round(2).values.reshape([2, 4, 4]).transpose((0, 2, 1))

In [104]:
label = "{}_chars".format(prefix)
fname = "{}_tbl.tex".format(label)

context = {"coefs": BM_Inv, "K": 2, "I":4, "J": 4,
           "caption": "Barnacles", "label": label, "font_size": r"\scriptsize",
           "K_hdrs": ["Small", "Big"], "col_name": r"BM",
           "col_names": ["Low", "2", "3", "High"],
           "row_names": ["Low Inv", "2", "3", "High Inv"]}

with open(os.path.join(path, fname), "w") as table:
    table.write(template.render(context))
print(fname)

Size_BM_Inv_chars_tbl.tex
