In [1]:
import pandas as pd
import numpy as np

In [138]:
import statsmodels.api as sm

In [102]:
def calculate_ret(x):
    
    x = x.resample("BM").last()
    
    ret = x["a_price"].pct_change().shift(0)
    
    return ret

In [103]:
data_directory = "data/"
file = "ab_stock_cross_section.csv"

df = pd.read_csv(data_directory+file)
df = df.iloc[:,1:]
df["date"] = pd.to_datetime(df["date"])


df = df[df["date"]>="2002-01-01"]

### Create Return data

In [104]:
price_df = df[["date","unique_id","a_price"]]
price_df = price_df.set_index("date")

In [105]:
ret_df = price_df.groupby("unique_id").apply(calculate_ret)

In [106]:
index_ret_df = ret_df.index.to_frame()
index_ret_df["ret"] = ret_df
index_ret_df.index = range(0,index_ret_df.shape[0])
index_ret_df.columns = ["ticker","date","ret"]
index_ret_df = index_ret_df.dropna()

In [107]:
index_ret_df = index_ret_df.dropna()

### Create Beta Value for each observation

In [111]:
ratio_df = df[["date","unique_id","ratio"]]
ratio_df = ratio_df.set_index("date")

In [117]:
ratio_df

Unnamed: 0_level_0,unique_id,ratio
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2002-01-04,id_51,1.464487
2002-01-07,id_51,1.446449
2002-01-08,id_51,1.488863
2002-01-09,id_51,1.512635
2002-01-10,id_51,1.487059
...,...,...
2019-12-25,id_40,19.422111
2019-12-26,id_40,19.200000
2019-12-27,id_40,18.975000
2019-12-30,id_40,19.496222


In [118]:
def func(x):
    
    return x["ratio"].resample("BM").last().shift(1)

In [119]:
mean_ratio_df = ratio_df.groupby("unique_id").apply(func)

In [120]:
mean_ratio_df

unique_id  date      
id_0       2002-01-31         NaN
           2002-02-28    2.307557
           2002-03-29    2.219224
           2002-04-30    2.315526
           2002-05-31    2.491039
                           ...   
id_96      2019-08-30    2.392749
           2019-09-30    2.402402
           2019-10-31    2.135447
           2019-11-29    2.216718
           2019-12-31    2.400000
Name: ratio, Length: 18264, dtype: float64

In [121]:
index_mean_ratio_df = mean_ratio_df.index.to_frame()
index_mean_ratio_df["ratio"] = mean_ratio_df
index_mean_ratio_df.index = range(0,index_mean_ratio_df.shape[0])
index_mean_ratio_df.columns = ["ticker","date","ratio"]
index_mean_ratio_df = index_mean_ratio_df.dropna()

In [122]:
index_mean_ratio_df

Unnamed: 0,ticker,date,ratio
1,id_0,2002-02-28,2.307557
2,id_0,2002-03-29,2.219224
3,id_0,2002-04-30,2.315526
4,id_0,2002-05-31,2.491039
5,id_0,2002-06-28,2.466000
...,...,...,...
18259,id_96,2019-08-30,2.392749
18260,id_96,2019-09-30,2.402402
18261,id_96,2019-10-31,2.135447
18262,id_96,2019-11-29,2.216718


In [123]:
index_ret_df

Unnamed: 0,ticker,date,ret
1,id_0,2002-02-28,0.002285
2,id_0,2002-03-29,0.053951
3,id_0,2002-04-30,0.002163
4,id_0,2002-05-31,-0.112950
5,id_0,2002-06-28,0.115977
...,...,...,...
18259,id_96,2019-08-30,0.010101
18260,id_96,2019-09-30,-0.073750
18261,id_96,2019-10-31,-0.033738
18262,id_96,2019-11-29,0.189944


### Merge the ratio and return 

In [128]:
merge_df = index_ret_df.merge(index_mean_ratio_df,on=["ticker","date"])

In [129]:
merge_df = merge_df.dropna()

In [132]:
date_list = list(set(merge_df["date"]))

In [167]:
def get_risk_premium(df,date):
    
    sample_df = df[df["date"]==date]
    y = sample_df[["ret"]]
    x = sample_df[["ratio"]]
    x = sm.add_constant(x)
    
    model = sm.OLS(y,x)
    results = model.fit()
    
    parameter = results.params["ratio"]
    
    return parameter

In [168]:
parameter_list = []

for date in date_list:
    
    parameter = get_risk_premium(merge_df,date)
    
    parameter_list.append(parameter)
    
    

In [161]:
sum(parameter_list)

-0.03751504596461382

In [169]:
parameter_series = pd.Series(parameter_list)

In [170]:
parameter_series.mean()/parameter_series.std()

0.08219428765309329

In [173]:
results.summary()

0,1,2,3
Dep. Variable:,ret,R-squared:,0.005
Model:,OLS,Adj. R-squared:,-0.008
Method:,Least Squares,F-statistic:,0.3941
Date:,"Mon, 28 Dec 2020",Prob (F-statistic):,0.532
Time:,22:50:25,Log-Likelihood:,79.976
No. Observations:,79,AIC:,-156.0
Df Residuals:,77,BIC:,-151.2
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0399,0.016,2.441,0.017,0.007,0.072
ratio,-0.0010,0.002,-0.628,0.532,-0.004,0.002

0,1,2,3
Omnibus:,51.051,Durbin-Watson:,2.097
Prob(Omnibus):,0.0,Jarque-Bera (JB):,206.725
Skew:,2.009,Prob(JB):,1.2899999999999999e-45
Kurtosis:,9.83,Cond. No.,17.4
