# Anomalies

In [1]:
import jinja2
import os
import pandas as pd
#from scipy.optimize import minimize
import statsmodels.api as sm

#%matplotlib inline

  from pandas.core import datetools


In [2]:
# F = pd.read_stata("C:/Data/Thesis/Factors.dta").set_index("date")

factors = pd.read_csv("C:/Data/Thesis/_Factors.csv")

myfactors = pd.read_csv("C:/Data/Thesis/_MyFactors.csv")
myfactors.iloc[:,1:] = myfactors.iloc[:,1:]*100

f = pd.concat([factors, myfactors[["HML", "HMLm", "PMU06", "PMU16", "PMU", "CMA", "WML"]]], axis=1)
f.date = pd.to_datetime(f.date.astype("str"), format="%Y%m")
f = f.set_index("date")

f.head()

Unnamed: 0_level_0,rf,rm,smb,hml,rmw,cma,wml,hmlm,HML,HMLm,PMU06,PMU16,PMU,CMA,WML
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1963-07-01,0.27,-0.39,-0.46,-0.81,0.72,-1.16,0.99,-0.59,-0.721155,-0.864465,0.888426,0.739828,0.35565,-1.13204,0.550726
1963-08-01,0.25,5.07,-0.81,1.65,0.42,-0.4,1.08,0.28,1.000628,-0.060107,0.212301,0.470122,0.456718,-0.070141,1.443902
1963-09-01,0.27,-1.57,-0.48,0.19,-0.8,0.23,0.13,0.27,1.045719,1.463823,-1.169019,-1.130309,-0.665664,-0.361107,0.267052
1963-10-01,0.29,2.53,-1.29,-0.09,2.75,-2.26,3.14,-2.11,-0.549914,-1.635921,2.612164,3.006009,3.007174,-1.886318,3.648531
1963-11-01,0.27,-0.85,-0.85,1.71,-0.34,2.22,-0.75,0.91,1.288533,1.400487,-0.099125,-0.481188,-0.625498,0.314912,-1.489579


In [3]:
models = {
    "CAPM": {"f": ["rm",],
             "regstr": r'R^i=a^i+\beta^iMkt',
             "coefs": ["a", "b"],
             "display_coefs": ["a", "b"]},
    "F93": {"f": ["rm", "smb", "hml"],
               "regstr": r'R^i=a^i+\beta^iMkt+s^iSize+v^iVal',
               "coefs": ["a", "b", "s", "v"],
               "display_coefs": ["a", "b", "s", "v"]},
    "C97": {"f" : ["rm", "smb", "hml", "wml"],
              "regstr": r'R^i=a^i+\beta^iMkt+s^iSize+v^iVal+m^iMom',
              "coefs": ["a", "b", "s", "v", "m"],
              "display_coefs": ["a", "v", "m"]},
    "C97b": {"f" : ["rm", "smb", "HMLm", "wml"],
               "regstr": r'R^i=a^i+\beta^iMkt+s^iSize+v^iVal+m^iMom',
               "coefs": ["a", "b", "s", "v", "m"],
               "display_coefs": ["a", "v", "m"]},
    "F15": {"f" : ["rm", "smb", "hml", "rmw", "cma"],
               "regstr": r'R^i=a^i+\beta^iMkt+s^iSize+v^iVal+m^iMom',
               "coefs": ["a", "b", "s", "v", "p", "i"],
              "display_coefs": ["a", "v", "p", "i"]},
    "F16": {"f" : ["rm", "smb", "hml", "PMU", "cma"],
               "regstr": r'R^i=a^i+\beta^iMkt+s^iSize+v^iVal+p^iProf+i^iInv',
               "coefs": ["a", "b", "s", "v", "p", "i"],
              "display_coefs": ["a", "v", "p", "i"]},
    "F17": {"f" : ["rm", "smb", "hml", "PMU", "cma", "wml"],
               "regstr": r'R^i=a^i+\beta^iMkt+s^iSize+v^iVal+p^iProf+i^iInv+m^iMom',
               "coefs": ["a", "b", "s", "v", "p", "i", "m"],
              "display_coefs": ["a", "v", "p", "i", "m"]},
    "B16": {"f" : ["rm", "smb", "HMLm", "wml", "PMU"],
              "regstr": r'R^i=a^i+\beta^iMkt+s^iSize+v^iVal+m^iMom+p^iProf',
              "coefs": ["a", "b", "s", "v", "m", "p"],
              "display_coefs": ["a", "v", "m", "p"]},
    "B16b": {"f" : ["rm", "smb", "HMLm", "wml", "PMU", "cma"],
              "regstr": r'R^i=a^i+\beta^iMkt+s^iSize+v^iVal+m^iMom+p^iProf+i^iInv',
              "coefs": ["a", "b", "s", "v", "m", "p", "i"],
              "display_coefs": ["a", "v", "m", "p", "i"]},
}

In [12]:
def tbl_content(LHS, F, factors, start, end, coef_names, K, i=5, j=5, precision=2, transpose_j=False):

    N = len(LHS.columns)
    A = {name: {"coef": [], "tval": [], "rows": []} for name in coef_names}

    # for each anomaly portfolio
    for lhsi, lhs in enumerate(LHS.columns):

        lhs = LHS[start:end][lhs] - F[start:end]["rf"]  # DataFrame
        rhs = sm.add_constant(F[start:end][factors])    # DataFrame

        # regress the "anomaly" on the factors
        model = sm.OLS(lhs, rhs)
        fit = model.fit()

        # get the coefficients and their t-values
        #coef = [round(b, precision) for b in fit.params]
        #tval = [round(t, precision) for t in fit.tvalues]

        coef = ["{:.2f}".format(b) for b in fit.params]
        tval = ["{:.2f}".format(t) for t in fit.tvalues]

        # for each coefficient
        for namei, name in enumerate(coef_names):  # ordered
            A[name]["coef"].append(coef[namei])
            A[name]["tval"].append(tval[namei])

    for coef in A:

        A[coef]["coef"] = pd.np.array(A[coef]["coef"]).reshape(K, i, j)
        A[coef]["tval"] = pd.np.array(A[coef]["tval"]).reshape(K, i, j)

        if transpose_j:
            for k, x in enumerate(A[coef]["coef"]):
                A[coef]["coef"][k] = A[coef]["coef"][k].transpose()
                A[coef]["tval"][k] = A[coef]["tval"][k].transpose()

        A[coef]["coef"] = A[coef]["coef"].tolist()
        A[coef]["tval"] = A[coef]["tval"].tolist()

    for coef in A:
        for row in range(j):
            A[coef]["rows"].append([])
            for k in range(K):
                A[coef]["rows"][row].extend(A[coef]["coef"][k][row])
                A[coef]["rows"][row].extend(A[coef]["tval"][k][row])
    return A

In [5]:
path = "C:/Users/samth/Dropbox/Thesis/Tex/Tables/"

env = jinja2.Environment(
    block_start_string='-%', block_end_string='%-',
    variable_start_string='=%', variable_end_string='%=',
    loader=jinja2.FileSystemLoader(path)
)

In [6]:
n_months, start, end = 654, "1963-07", "2017-12"

## 2x4x4

In [7]:
template = env.get_template("2x4x4_template.txt")

### Size-OP-Inv

In [None]:
a, K, j = pd.read_csv("C:/Data/FrenchDartmouth/32_Portfolios_ME_OP_INV_2x4x4.CSV"), 2, 4
a = a.iloc[:n_months, 1:]
a.index = f.index

fname_prefix = "32_Size_OP_Inv"

In [None]:
for m in models:
    print(m, end=" ")
    A = tbl_content(a, f, models[m]["f"], start, end, models[m]["coefs"], K, j, 2, True)
    context = {
        "A": A, "K": K, "j":j, "coef_names": models[m]["display_coefs"],
        "var_name": "OP", "title": models[m]["regstr"],
        "caption": "{} Portfolios with {} Factors {} through {}".format(
            fname_prefix.replace("_", "-"), m, start, end),
        "label": "tbl:{}_{}".format(fname_prefix, m)
    }

    fname = "{}_{}_{}_{}.tex".format(fname_prefix, m, start.replace("-", ""), end.replace("-", ""))

    with open(os.path.join(path, fname), "w") as table:
        table.write(template.render(context))
print("-||")

### Size-BM-Inv

In [None]:
a, K, j = pd.read_csv("C:/Data/FrenchDartmouth/32_Portfolios_ME_BEME_INV_2x4x4.CSV"), 2, 4
a = a.iloc[:n_months, 1:]
a.index = f.index

fname_prefix = "32_Size_BM_Inv"

In [None]:
for m in models:
    print(m, end=" ")
    A = tbl_content(a, f, models[m]["f"], start, end, models[m]["coefs"], K, j, 2, True)
    context = {
        "A": A, "K": K, "j":j, "coef_names": models[m]["display_coefs"],
        "var_name": "BM", "title": models[m]["regstr"],
        "caption": "{} Portfolios with {} Factors {} through {}".format(
            fname_prefix.replace("_", "-"), m, start, end),
        "label": "tbl:{}_{}".format(fname_prefix, m)
    }

    fname = "{}_{}_{}_{}.tex".format(fname_prefix, m, start.replace("-", ""), end.replace("-", ""))

    with open(os.path.join(path, fname), "w") as table:
        table.write(template.render(context))
print("-||")

### Size-BM-Prior

In [10]:
a, K, j = pd.read_csv("C:/Data/Thesis/32_Portfolios_BMm_Prior.csv"), 2, 4
a = a.iloc[:n_months, 1:]
a.index = f.index

a = a.fillna(-99.99)

fname_prefix = "32_Size_BMm_Prior"

In [11]:
for m in models:
    print(m, end=" ")
    A = tbl_content(a, f, models[m]["f"], start, end, models[m]["coefs"], K, j, 2, False)  # do not transpose, I made these
    context = {
        "A": A, "K": K, "j":j, "coef_names": models[m]["display_coefs"],
        "var_name": "Prior", "title": models[m]["regstr"],
        "caption": "{} Portfolios with {} Factors {} through {}".format(
            fname_prefix.replace("_", "-"), m, start, end),
        "label": "tbl:{}_{}".format(fname_prefix, m)
    }

    fname = "{}_{}_{}_{}.tex".format(fname_prefix, m, start.replace("-", ""), end.replace("-", ""))

    with open(os.path.join(path, fname), "w") as table:
        table.write(template.render(context))
print("-||")

CAPM F93 C97 C97b F15 F16 F17 B16 B16b -||


## 5x5

In [7]:
template = env.get_template("5x5_template.txt")

### Size-Inv

In [None]:
a, K, j = pd.read_csv("C:/Data/FrenchDartmouth/25_Portfolios_ME_INV_5x5.CSV"), 1, 5
a = a.iloc[:n_months, 1:]
a.index = f.index

fname_prefix = "25_Size_Inv"

In [None]:
for m in models:
    print(m, end=" ")
    A = tbl_content(a, f, models[m]["f"], start, end, models[m]["coefs"], K, j, 2, False)
    context = {
        "A": A, "K": K, "j":j, "coef_names": models[m]["display_coefs"],
        "var_name": "Inv", "title": models[m]["regstr"],
        "caption": "{} Portfolios with {} Factors {} through {}".format(
            fname_prefix.replace("_", "-"), m, start, end),
        "label": "tbl:{}_{}".format(fname_prefix, m)
    }

    fname = "{}_{}_{}_{}.tex".format(fname_prefix, m, start.replace("-", ""), end.replace("-", ""))

    with open(os.path.join(path, fname), "w") as table:
        table.write(template.render(context))
print("-||")

### Size-Var

In [None]:
a, K, j = pd.read_csv("C:/Data/FrenchDartmouth/25_Portfolios_ME_VAR_5x5.csv"), 1, 5
a = a.iloc[:n_months, 1:]
a.index = f.index

fname_prefix = "25_Size_Var"

In [None]:
for m in models:
    print(m, end=" ")
    A = tbl_content(a, f, models[m]["f"], start, end, models[m]["coefs"], K, j, 2, False)
    context = {
        "A": A, "K": K, "j":j, "coef_names": models[m]["display_coefs"],
        "var_name": "Var", "title": models[m]["regstr"],
        "caption": "{} Portfolios with {} Factors {} through {}".format(
            fname_prefix.replace("_", "-"), m, start, end),
        "label": "tbl:{}_{}".format(fname_prefix, m)
    }

    fname = "{}_{}_{}_{}.tex".format(fname_prefix, m, start.replace("-", ""), end.replace("-", ""))

    with open(os.path.join(path, fname), "w") as table:
        table.write(template.render(context))
print("-||")

### Size-Prior

In [8]:
a, K, j = pd.read_csv("C:/Data/FrenchDartmouth/25_Portfolios_ME_Prior_12_2.CSV"), 1, 5
a = a.iloc[:n_months, 1:]
a.index = f.index

fname_prefix = "25_Size_Prior"

In [9]:
for m in models:
    print(m, end=" ")
    A = tbl_content(a, f, models[m]["f"], start, end, models[m]["coefs"], K, j, 2, False)
    context = {
        "A": A, "K": K, "j":j, "coef_names": models[m]["display_coefs"],
        "var_name": "Prior", "title": models[m]["regstr"],
        "caption": "{} Portfolios with {} Factors {} through {}".format(
            fname_prefix.replace("_", "-"), m, start, end),
        "label": "tbl:{}_{}".format(fname_prefix, m)
    }

    fname = "{}_{}_{}_{}.tex".format(fname_prefix, m, start.replace("-", ""), end.replace("-", ""))

    with open(os.path.join(path, fname), "w") as table:
        table.write(template.render(context))
print("-||")

CAPM F93 C97 C97b F15 F16 F17 B16 B16b -||


### Size-$\beta$

In [16]:
a, K, j = pd.read_csv("C:/Data/FrenchDartmouth/25_Portfolios_ME_BETA_5x5.csv"), 1, 5
a = a.iloc[:n_months, 1:]
a.index = f.index

fname_prefix = "25_Size_Beta"

In [17]:
for m in models:
    print(m, end=" ")
    A = tbl_content(a, f, models[m]["f"], start, end, models[m]["coefs"], K)
    context = {
        "A": A, "K": K, "j":j, "coef_names": models[m]["display_coefs"],
        "var_name": "Prior", "title": models[m]["regstr"],
        "caption": "{} Portfolios with {} Factors {} through {}".format(
            fname_prefix.replace("_", "-"), m, start, end),
        "label": "tbl:{}_{}".format(fname_prefix, m)
    }

    fname = "{}_{}_{}_{}.tex".format(fname_prefix, m, start.replace("-", ""), end.replace("-", ""))

    with open(os.path.join(path, fname), "w") as table:
        table.write(template.render(context))
print("-||")

CAPM F93 C97 C97b F15 F16 F17 B16 B16b -||


## 5x7

In [21]:
template = env.get_template("5x7_template.txt")

### Size-NI

In [32]:
pd.np.array(range(35)).reshape(1, 5, 7)

array([[[ 0,  1,  2,  3,  4,  5,  6],
        [ 7,  8,  9, 10, 11, 12, 13],
        [14, 15, 16, 17, 18, 19, 20],
        [21, 22, 23, 24, 25, 26, 27],
        [28, 29, 30, 31, 32, 33, 34]]])

In [33]:
a, K, i, j = pd.read_csv("C:/Data/FrenchDartmouth/25_Portfolios_ME_NI_5x5.csv"), 1, 5, 7
a = a.iloc[:n_months, 1:]
a.index = f.index

fname_prefix = "25_Size_NI"

In [30]:
for m in models:
    print(m, end=" ")
    A = tbl_content(a, f, models[m]["f"], start, end, models[m]["coefs"], K, i, j)
    context = {
        "A": A, "K": K, "j":j, "coef_names": models[m]["display_coefs"],
        "var_name": "Prior", "title": models[m]["regstr"],
        "caption": "{} Portfolios with {} Factors {} through {}".format(
            fname_prefix.replace("_", "-"), m, start, end),
        "label": "tbl:{}_{}".format(fname_prefix, m)
    }

    fname = "{}_{}_{}_{}.tex".format(fname_prefix, m, start.replace("-", ""), end.replace("-", ""))

    with open(os.path.join(path, fname), "w") as table:
        table.write(template.render(context))
print("-||")

CAPM 

IndexError: list index out of range

In [29]:
A

{'a': {'coef': [[['0.06', '-0.13', '-0.06', '0.06', '0.21'],
    ['0.01', '-0.19', '-0.06', '-0.08', '-0.01'],
    ['-0.03', '-0.02', '0.18', '-0.10', '0.05'],
    ['0.02', '-0.07', '0.09', '0.07', '0.10'],
    ['0.04', '0.11', '-0.10', '-0.15', '-0.01'],
    ['0.16', '0.21', '0.01', '-0.02', '0.02'],
    ['-0.14', '-0.01', '0.10', '0.15', '-0.08']]],
  'rows': [['0.06',
    '-0.13',
    '-0.06',
    '0.06',
    '0.21',
    '0.99',
    '-1.41',
    '-0.71',
    '0.76',
    '2.62'],
   ['0.01',
    '-0.19',
    '-0.06',
    '-0.08',
    '-0.01',
    '0.09',
    '-1.90',
    '-0.95',
    '-0.62',
    '-0.16'],
   ['-0.03',
    '-0.02',
    '0.18',
    '-0.10',
    '0.05',
    '-0.36',
    '-0.23',
    '2.44',
    '-1.15',
    '0.70'],
   ['0.02',
    '-0.07',
    '0.09',
    '0.07',
    '0.10',
    '0.18',
    '-0.79',
    '1.10',
    '0.91',
    '1.26'],
   ['0.04',
    '0.11',
    '-0.10',
    '-0.15',
    '-0.01',
    '0.53',
    '1.53',
    '-0.77',
    '-1.82',
    '-0.12']],
  'tva

# Annual value with monthly value

In [17]:
a = pd.read_csv("C:/Data/FrenchDartmouth/25_Portfolios_5x5.CSV")

In [18]:
a.date = pd.to_datetime(a.date.astype(str), format="%Y%m")
a = a.set_index("date")

In [34]:
fit = sm.OLS(a["SMALL LoBM"], sm.add_constant(f[["rm", "smb", "hmlm", "wml", "PMU"]])).fit()
pd.concat([fit.params, fit.pvalues], axis=1).round(2)

Unnamed: 0,0,1
const,0.25,0.02
rm,1.05,0.0
smb,1.31,0.0
hmlm,-0.51,0.0
wml,-0.23,0.0
PMU,-0.5,0.0


In [35]:
fit = sm.OLS(a["SMALL LoBM"], sm.add_constant(f[["rm", "smb", "hmlm", "hml", "wml", "PMU"]])).fit()
pd.concat([fit.params, fit.pvalues], axis=1).round(2)

Unnamed: 0,0,1
const,0.17,0.09
rm,1.03,0.0
smb,1.3,0.0
hmlm,0.02,0.78
hml,-0.58,0.0
wml,-0.02,0.6
PMU,-0.51,0.0


In [36]:
fit = sm.OLS(a["SMALL HiBM"], sm.add_constant(f[["rm", "smb", "hmlm", "wml", "PMU"]])).fit()
pd.concat([fit.params, fit.pvalues], axis=1).round(2)

Unnamed: 0,0,1
const,0.41,0.0
rm,0.94,0.0
smb,1.07,0.0
hmlm,0.52,0.0
wml,0.18,0.0
PMU,0.1,0.06


In [37]:
fit = sm.OLS(a["SMALL HiBM"], sm.add_constant(f[["rm", "smb", "hmlm", "hml", "wml", "PMU"]])).fit()
pd.concat([fit.params, fit.pvalues], axis=1).round(2)

Unnamed: 0,0,1
const,0.47,0.0
rm,0.96,0.0
smb,1.08,0.0
hmlm,0.11,0.01
hml,0.45,0.0
wml,0.02,0.33
PMU,0.1,0.03
