# Cross-sectional Predictability

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import urllib.parse
from sqlalchemy import create_engine
!pip install pymssql




[notice] A new release of pip available: 22.2.1 -> 22.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
chars = [
    "bm",
    "ep",
    "cashpr",
    "dy",
    "lev",
    "sp",
    "roic",
    "rd_sale",
    "rd_mve",
    "agr",
    "gma",
    "chcsho",
    "lgr",
    "acc",
    "pctacc",
    "cfp",
    "absacc",
    "age",
    "chinv",
    "hire",
    "sgr",
    "pchsale_pchinvt",
    "pchsale_pchrect",
    "pchgm_pchsale",
    "pchsale_pchxsga",
    "depr",
    "pchdepr",
    "invest",
    "egr",
    "grcapx",
    "tang",
    "sin",
    "currat",
    "pchcurrat",
    "quick",
    "pchquick",
    "salecash",
    "salerec",
    "saleinv",
    "pchsaleinv",
    "cashdebt",
    "realestate",
    "divi",
    "divo",
    "securedind",
    "secured",
    "convind",
    "grltnoa",
    "rd",
    "operprof",
    "ps",
    "chpmia",
    "chatoia",
    "chempia",
    "bm_ia",
    "pchcapx_ia",
    "tb",
    "cfp_ia",
    "mve_ia",
    "herf",
    "orgcap",
    "mve",
    "chtx",
    "roaq",
    "roeq",
    "rsup",
    "stdacc",
    "roavol",
    "stdcf",
    "cash",
    "cinvest",
    "nincr",
    "sue",
    "aeavol",
    "ear",
    "ms",
    "disp",
    "chfeps",
    "fgr5yr",
    "nanalyst",
    "sfe",
    "chnanalyst",
    "mom6m",
    "mom12m",
    "mom36m",
    "mom1m",
    "dolvol",
    "chmom",
    "turn",
    "ipo",
    "indmom",
    "maxret",
    "retvol",
    "baspread",
    "std_dolvol",
    "std_turn",
    "ill",
    "zerotrade",
    "beta",
    "betasq",
    "pricedelay",
    "idiovol",
]

intchars = [
    "age",
    "sin",
    "divi",
    "divo",
    "securedind",
    "convind",
    "rd",
    "ps",
    "nincr",
    "ms",
    "ipo",
]

chars = [x for x in chars if x not in intchars]
chars = np.sort(chars)
labels = ["Lo 20", "Qnt 2", "Qnt 3", "Qnt 4", "Hi 20"]

server = "eu-az-sql-serv1.database.windows.net:1433"
database = "dgn022k6348dcyh"
username = "uhgrque4d8p77hf"
password = "FfWrgFcK$Vnk@9BAgKH4nbEDF"
password = urllib.parse.quote_plus(password)

string = "mssql+pymssql://" + username + ":" + password + "@" + server + "/" + database
conn = create_engine(string).connect()


Pull the data from SQL server

In [61]:
char = 'mve, beta, mom12m, bm'        # example characteristics

df = pd.read_sql(
    " select ticker, date, ret, " + char + " from ghz where date>='2000-01-01' ", conn
)
df = df.dropna()

In [44]:
len(df.index)
df.date.min()

datetime.date(2000, 1, 31)

## Cross-sectional Method #1: Sorting

In [62]:
# Sorting function
def cut(x):
    try:
        out = pd.qcut(x, 5, labels=labels)
    except:
        out = pd.Series(np.nan, index=x.index)
    return out

In [63]:
# Characteristic to sort on
char = 'beta'

df["quintile"] = df.groupby("date")[char].apply(cut)

In [58]:
# Check to see we have about the same number of observations in each bin
df.quintile.value_counts()

Lo 20    91573
Hi 20    91524
Qnt 3    91431
Qnt 2    91418
Qnt 4    91418
Name: quintile, dtype: int64

In [64]:
# Time-series of EW average portfolio across stocks
df = df.dropna(subset=["quintile"])
rets = df.groupby(["date", "quintile"]).ret.mean().unstack()

In [65]:
# Time-series average of each portfolio
mns = rets.mean()
print(mns)

hilo = mns['Hi 20']-mns['Lo 20']
print(f'Hi-Lo average return is: {hilo: .2%}')

quintile
Lo 20    0.010107
Qnt 2    0.010880
Qnt 3    0.011299
Qnt 4    0.010744
Hi 20    0.008246
dtype: float64
Hi-Lo average return is: -0.19%


## Cross-sectional Method #2: Regression

Example #1: Single characteristic

In [24]:
# Run a cross-sectional regression for each date
coefs = df.groupby('date').apply(lambda d: sm.OLS(d.ret,sm.add_constant(d['beta'])).fit().params) 

In [38]:
coefs.mean()
T = len(coefs.beta)
tsreg = sm.OLS(coefs.beta,np.ones(T)).fit()

In [39]:
print(tsreg.summary())

                            OLS Regression Results                            
Dep. Variable:                   beta   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Wed, 16 Nov 2022   Prob (F-statistic):                nan
Time:                        09:53:41   Log-Likelihood:                 412.61
No. Observations:                 264   AIC:                            -823.2
Df Residuals:                     263   BIC:                            -819.6
Df Model:                           0                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0002      0.003     -0.050      0.9

In [30]:
# Plot coefficient on beta
import plotly.graph_objects as go
trace  = go.Scatter(x=coefs.index, y=coefs['beta'], mode="lines")
fig = go.Figure()
fig.add_trace(trace)
fig.update_yaxes(title='Coefficient on Beta',tickformat=".4f")
fig.update_xaxes(title='Date')
fig.show()


Example #2: Multiple characteristics

In [46]:
chars = ['beta','mve', 'bm','mom12m']
coefs = df.groupby('date').apply(lambda d: sm.OLS(d.ret,sm.add_constant(d[chars])).fit().params) 

T = len(coefs)
for c in chars:
    tsreg = sm.OLS(coefs.beta,np.ones(T)).fit()

In [50]:
stats = pd.DataFrame(dtype=float,index=coefs.columns,columns=['mean','tstat','pval'])
for coef in coefs.columns :
    tsreg = sm.OLS(coefs[coef],np.ones(T)).fit()

    stats.loc[coef,'mean'] = coefs[coef].mean()
    stats.loc[coef,'tstat'] = tsreg.tvalues['const']
    stats.loc[coef,'pval'] = tsreg.pvalues['const']
stats.round(4)

Unnamed: 0,mean,tstat,pval
const,0.027,3.3916,0.0008
beta,-0.0016,-0.6105,0.542
mve,-0.0011,-2.2353,0.0262
bm,-0.001,-0.6026,0.5473
mom12m,-0.0004,-0.1419,0.8872


In [52]:
# Plot a chosen characteristic's coefficients over time
coef = 'mom12m'
trace  = go.Scatter(x=coefs.index, y=coefs[coef], mode="lines")
fig = go.Figure()
fig.add_trace(trace)
fig.update_yaxes(title='Coefficient on '+coef,tickformat=".4f")
fig.update_xaxes(title='Date')
fig.show()