# Size-Prior Sorts

In [1]:
import os
import pandas as pd

pd.__version__

'0.22.0'

In [16]:
def assign_bkts(df, var, brkpts, prefix=None, suffix="Bkt"):
    """
    Assigns buckets to column `var` based on percentiles in `brkpts`.
    
    Assumes breakpoints are already in dataframe `df` and are named the same as given in `brkpts`.
    Buckets are integers beginning with 1 ending with len(brkpts).
    """

    if not prefix:
        prefix = var
    varbkt = prefix + suffix

    df[varbkt] = pd.np.NaN

    for i, brkpt in enumerate(brkpts):  # index begins at 0
        if i==0:
            df.loc[df[var]<=df[brkpt], varbkt] = 1
        else:
            df.loc[(df[var]>df[brkpts[i-1]]) & (df[var]<=df[brkpt]), varbkt] = i + 1

    return df

In [2]:
%%time

df = pd.read_csv("C:/Data/Thesis/Combined_Data.csv")



Wall time: 50.3 s


In [3]:
df.date = pd.to_datetime(df.date)

In [32]:
ME_brk = pd.read_csv("C:/Data/Thesis/Brks_Months_ME.csv")

pcts = [.2, .4, .6, .8, 1.]

ME_brk = ME_brk.rename(columns=dict(zip([str(pct) for pct in pcts], pcts)))
ME_brk.date = pd.to_datetime(ME_brk.date, format="%m/%d/%Y").astype(pd.Timestamp)

df = df.merge(ME_brk[["date",] + pcts], on="date", how="left")

In [34]:
df = assign_bkts(df, "ME", pcts)

In [36]:
df["L1_MEBkt"] = df.groupby("PERMNO")["MEBkt"].shift(1)
df = df.drop(pcts, axis=1)

In [18]:
Prior_bkts = pd.read_csv("C:/Data/Thesis/Bkts_Prior_5.csv")

Prior_bkts.date = pd.to_datetime(Prior_bkts.date).astype(pd.Timestamp)

df = df.merge(Prior_bkts, on=["PERMNO", "date"], how="left")

In [None]:
grp, ix = ["date", "L1_MEBkt", "PriorBkt"], ((df.PriorOK) & (~df.L1_ME.isna()))

df["BktSize"] = df[ix].groupby(grp)["L1_ME"].transform("sum")

In [67]:
col = "RET"

df["Wt"+col] = df[col] * df.L1_ME / df.BktSize

bkt = df[ix].groupby(grp)["Wt"+col].sum()
bkt = bkt.reset_index(grp[1:])
bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col)

multiplier = 100 if col=="RET" else 1

(bkt["1963-07":"2017-12"].mean()*multiplier).round(2).values.reshape([5, 5])  # Gross returns, not excess

array([[0.52, 1.02, 1.24, 1.38, 1.61],
       [0.6 , 1.01, 1.22, 1.32, 1.55],
       [0.73, 1.01, 1.09, 1.13, 1.48],
       [0.68, 0.99, 1.04, 1.16, 1.36],
       [0.65, 0.86, 0.79, 0.98, 1.17]])

In [62]:
col = "Prior"

df["Wt"+col] = df[col] * df.L1_ME / df.BktSize

bkt = df[ix].groupby(grp)["Wt"+col].sum()
bkt = bkt.reset_index(grp[1:])
bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col)

multiplier = 100 if col=="RET" else 1

(bkt["1963-07":"2017-12"].mean()*multiplier).round(2).values.reshape([5, 5])  # Gross returns, not excess

array([[-0.38, -0.01,  0.12,  0.26,  0.68],
       [-0.3 , -0.01,  0.12,  0.26,  0.64],
       [-0.27, -0.01,  0.12,  0.26,  0.61],
       [-0.24, -0.01,  0.12,  0.26,  0.57],
       [-0.2 , -0.01,  0.12,  0.26,  0.51]])

In [60]:
col = "BMm"

df["Wt"+col] = df[col] * df.L1_ME / df.BktSize

bkt = df[ix].groupby(grp)["Wt"+col].sum()
bkt = bkt.reset_index(grp[1:])
bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col)

multiplier = 100 if col=="RET" else 1

(bkt["1963-07":"2017-12"].mean()*multiplier).round(2).values.reshape([5, 5])  # Gross returns, not excess

array([[0.97, 0.84, 0.78, 0.69, 0.49],
       [0.82, 0.72, 0.67, 0.6 , 0.41],
       [0.8 , 0.7 , 0.65, 0.57, 0.4 ],
       [0.8 , 0.68, 0.63, 0.55, 0.41],
       [0.66, 0.59, 0.53, 0.48, 0.4 ]])

In [61]:
col = "BM"

df["Wt"+col] = df[col] * df.L1_ME / df.BktSize

bkt = df[ix].groupby(grp)["Wt"+col].sum()
bkt = bkt.reset_index(grp[1:])
bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col)

multiplier = 100 if col=="RET" else 1

(bkt["1963-07":"2017-12"].mean()*multiplier).round(2).values.reshape([5, 5])  # Gross returns, not excess

array([[0.65, 0.77, 0.81, 0.82, 0.78],
       [0.6 , 0.69, 0.71, 0.72, 0.64],
       [0.6 , 0.68, 0.71, 0.69, 0.62],
       [0.62, 0.66, 0.68, 0.66, 0.62],
       [0.54, 0.57, 0.58, 0.58, 0.58]])

In [64]:
col = "Inv"

df["Wt"+col] = df[col] * df.L1_ME / df.BktSize

bkt = df[ix].groupby(grp)["Wt"+col].sum()
bkt = bkt.reset_index(grp[1:])
bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col)

multiplier = 100 if col=="RET" else 1

(bkt["1963-07":"2017-12"].replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean()*multiplier).round(2).values.reshape([5, 5])

array([[0.22, 0.16, 0.13, 0.13, 0.13],
       [0.25, 0.17, 0.16, 0.15, 0.19],
       [0.23, 0.15, 0.14, 0.15, 0.2 ],
       [0.19, 0.14, 0.13, 0.14, 0.19],
       [0.2 , 0.14, 0.12, 0.13, 0.18]])

In [65]:
col = "CP"

df["Wt"+col] = df[col] * df.L1_ME / df.BktSize

bkt = df[ix].groupby(grp)["Wt"+col].sum()
bkt = bkt.reset_index(grp[1:])
bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col)

multiplier = 100 if col=="RET" else 1

(bkt["1963-07":"2017-12"].replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean()*multiplier).round(2).values.reshape([5, 5])

array([[ 0.12,  0.22,  0.18,  0.19,  0.31],
       [ 0.21,  0.27,  0.23,  0.25,  0.23],
       [ 0.28,  0.28,  0.29,  0.27,  0.32],
       [-0.03,  0.25,  0.29,  0.27,  0.27],
       [ 0.35,  0.52,  0.21, -0.21,  0.37]])

In [66]:
col = "BE_Growth_Future"

df["Wt"+col] = df[col] * df.L1_ME / df.BktSize

bkt = df[ix].groupby(grp)["Wt"+col].sum()
bkt = bkt.reset_index(grp[1:])
bkt[grp[1]+grp[2]] = "B" + bkt[grp[1]].astype("int").astype("str") + bkt[grp[2]].astype("int").astype("str")
bkt = bkt.pivot(columns=grp[1]+grp[2], values="Wt"+col)

multiplier = 100 if col=="RET" else 1

(bkt["1963-07":"2017-12"].replace([pd.np.inf, -pd.np.inf], pd.np.nan).mean()*multiplier).round(2).values.reshape([5, 5])

array([[-0.05,  0.04,  0.09,  0.15,  0.28],
       [-0.13,  0.08,  0.08,  0.14,  0.32],
       [ 0.1 ,  0.09,  0.07,  0.14,  0.33],
       [ 0.13,  0.1 ,  0.11,  0.11,  0.27],
       [ 0.15,  0.18,  0.04, -0.1 ,  0.27]])