# EXAMPLE DATA

---

Created for [learn-investments.rice-business.org](https://learn-investments.rice-business.org)
    
By [Kerry Back](https://kerryback.com) and [Kevin Crotty](https://kevincrotty.rice.edu/)
    
Jones Graduate School of Business, Rice University

---


In [1]:
# Date Range (input a year)
start_yr = 1980
stop_yr  = 2023

# Characteristic for second sort (1st is market equity)
char = "Book to market ratio"

# Some choices: 
# "Book to market ratio", "Investment rate", "Momentum", "Short term reversal",
# "Long term reversal", "Accruals", "Beta", "Net equity issuance", "Variance", "Residual variance"

# GET DATA

In [2]:
from pandas_datareader import DataReader as pdr
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import statsmodels.api as sm
import plotly.express as px
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# make disctionary from lists of files and characteristics 
files = [
    '25_Portfolios_5x5',
    '25_Portfolios_ME_INV_5x5',
    '25_Portfolios_ME_Prior_12_2',
    '25_Portfolios_ME_Prior_1_0',
    '25_Portfolios_ME_Prior_60_13',
    '25_Portfolios_ME_AC_5x5',
    '25_Portfolios_ME_BETA_5x5',
    '25_Portfolios_ME_NI_5x5',
    '25_Portfolios_ME_VAR_5x5',
    '25_Portfolios_ME_RESVAR_5x5'
]

chars = [
    "Book to market ratio",
    "Investment rate",
    "Momentum",
    "Short term reversal",
    "Long term reversal",
    "Accruals",
    "Beta",
    "Net equity issuance",
    "Variance",
    "Residual variance",
]

charsDict = dict(zip(chars, files))
charsDict = dict(zip(chars, files))
chars.sort()

# get monthly market excess return and risk-free rate from French's data library
ff = pdr('F-F_Research_Data_Factors','famafrench', start=1900)[0]/100

# pull portfolio returns data from French's data library
RETS = pdr(charsDict[char], "famafrench", start=1926)[0] / 100
if char == "Net equity issuance":
    for x in RETS.columns:
        if x.split(" ")[1][0] == "Z" or x.split(" ")[1][0:2] == "Ne":
            RETS = RETS.drop(columns=x)

# Implement start and end dates
start = str(start_yr) + "-01"
stop = str(stop_yr) + "-12"
df = RETS.loc[start:stop].copy()

# see what the two chars are in the two-way sort
s = df.columns[1].split(" ")
s1 = s[0][:-1]             # market equity
s2 = s[1][:-1]             # other characteristic

def splitName(x):
    x1 = x.split(" ")[0]
    x1 = x1 if x1[0] == "M" else ("ME1" if x1[0] == "S" else "ME5")
    x2 = x.split(" ")[1]
    x2 = x2 if x2[0] == s2[0] else (s2 + "1" if x2[0] == "L" else s2 + "5")
    return x1, x2

splits = [splitName(x) for x in df.columns]


# CALCULATIONS

In [3]:
# 5x5 table calculations
df.columns = pd.MultiIndex.from_tuples(splits)
df = df.subtract(ff.RF, axis="index")

means = 12 * df.mean().unstack()
sharpes = np.sqrt(12) * df.mean() / df.std()
sharpes = sharpes.unstack()

# FIGURE (ALPHA HEAT MAP)

In [4]:
trace = go.Heatmap(
    x=means.columns.to_list(),
    y=means.index.to_list(),
    z=means,
    colorscale='Viridis',
    texttemplate="%{z:.1%}",
    hovertemplate="%{x} / %{y}<br>%{z:.2%}<extra></extra>"
)
means_tbl = go.Figure(trace)
means_tbl.show()

# FIGURE (SHARPE RATIO HEAT MAP)

In [5]:
trace = go.Heatmap(
    x=sharpes.columns.to_list(),
    y=sharpes.index.to_list(),
    z=sharpes,
    colorscale='Viridis',
    texttemplate="%{z:.1%}",
    hovertemplate="%{x} / %{y}<br>%{z:.2%}<extra></extra>"
)
sharpes_tbl = go.Figure(trace)
sharpes_tbl.show()

# FIGURE (ACCUMULATION)

In [6]:
import copy
df = RETS.loc[start:stop].copy()
dates = df.reset_index().iloc[:,0].astype(str)

accum = (1+df).cumprod()

accum.columns = range(25)
names = ["small char/small cap", "large char/small cap", "small char/large cap", "large char/large cap"]
cols = [0, 4, 20, 24]
colors = ['blue', 'blue', 'red', 'red']
traces = []
for i, (col, name, color) in enumerate(zip(cols, names, colors)):
    if i%2 !=0:
        trace = go.Scatter(
            x = dates,
            y = accum[col],
            mode="lines",
            name=name,
            line=dict(color=color, dash="dash"),
            hovertemplate="%{x}<br>$%{y:.2f}<extra></extra>"
        )
    else:
        trace = go.Scatter(
            x = dates,
            y = accum[col],
            mode="lines",
            name=name,
            line=dict(color=color),
            hovertemplate="%{x}<br>$%{y:.2f}<extra></extra>"
        )
    traces.append(trace)
accum_plot = go.Figure()
for trace in traces:
    accum_plot.add_trace(trace)
accum_plot.update_layout(legend=dict(x=0.01, y=0.99))
accum_plot.show()


# FIGURE (ACCUMULATION LOG SCALE)

In [7]:
accum_log_plot = copy.copy(accum_plot)
accum_log_plot.update_yaxes(type="log")
accum_plot.layout.yaxis.title="Accumulation"
accum_log_plot.layout.yaxis.title="Accumulation (log scale)"
accum_log_plot.show()