In [50]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from statsmodels.sandbox.regression import gmm
import shutil
from pathlib import Path
os.getcwd()

'/Users/rubenexojo/Library/Mobile Documents/com~apple~CloudDocs/phd/uni/courses/finance/finance-3/hw/code'

In [46]:
def find_file(name):
    return "data/" + name

def df_essential(df):
    drop_cols=["LeadReal", "MidReal", "LagReal"]
    for col in drop_cols:
        df = df.drop(col, 1)
    return df

def df_include_constant_columns(df):
    df["const"] = [1]*len(df)
    cols = df.columns.tolist()
    cols = [cols[0]] + ["const"] + cols[1:-1]
    return df[cols]

def fix_date(date):
    year    = [date[i] for i in range(4)]
    month   = [date[i] for i in range(4,6)]
    day     = [date[i] for i in range(6,8)]
    year    = ''.join(year)
    month   = ''.join(month)
    day     = ''.join(day)
    date = [day, month, year]
    date = '/'.join(date)
    return date

def df_fix_date(df):
    date_col = df['dt']
    for i in range(len(df)):
        date_col[i] = fix_date(str(date_col[i]))
    df['dt'] = date_col
    return df
    
def df_annualise(df):
    cols=["LeadR", "MidR", "LagR", "Lead", "Mid", "Lag", "LL", "LLStrong"]
    for col in cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')*12*100
    return df

def df_excess_returns(df,  annualised = True):
    all_cols =  df.columns.tolist()
    rf = pd.to_numeric(df["rf"], errors='coerce')*12
    cols=["Lead", "Mid", "Lag"]
    move = 1
    for col in cols:
        new_col=f"ex_{col}"
        if annualised:
            loc = all_cols.index(col) + move
            df.insert(loc, new_col, pd.to_numeric(df[col], errors='coerce') - rf)
        else:
            loc = all_cols.index(col) + move
            df.insert(loc, new_col, pd.to_numeric(df[col], errors='coerce')*12*100 - rf)
        move += 1
    cols=["LL", "LLStrong"]
    for col in cols:
        new_col=f"ex_{col}"
        if annualised:
            loc = all_cols.index(col) + move
            df.insert(loc, new_col, pd.to_numeric(df[col], errors='coerce'))
        else:
            df.insert(loc, new_col, pd.to_numeric(df[col], errors='coerce'))*12*100
        move += 1
    return df

def df_merge(df1, df2):
    df = df1.merge(df2, how='inner', on="dtt", left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_left', '_right'), copy=True, indicator=True, validate="one_to_one")
    df.drop('dtt', inplace=True, axis=1)
    df.drop('_merge', inplace=True, axis=1)
    return df

def take_factor_sheet(): 
    master_file = find_file("Data_master.xlsx")
    rf_file     = find_file("FF4_monthlyKF.csv")
    # read xlsx file
    df     = pd.read_excel(master_file, sheet_name="T1_ptfs", names=["dt", "LeadR", "MidR", "LagR", "Lead", "Mid", "Lag", "LL", "LLStrong", "mktrf", "smb", "hml"], dtype=str)
    df_rf  = pd.read_csv(rf_file, sep=";", names=["dtt", "v2", "v3", "v4", "rf"], dtype=str)
    df_rf  = df_rf[["dtt", "rf"]]
    df2    = df_rf.copy()
    # df = df_essential(df)
    # include a column of ones to calculate the average return
    df = df_include_constant_columns(df)
    # annualise data (we have it daily)
    df = df_annualise(df)
    # merge
    df["dtt"]=df["dt"].str.slice(0, 6)
    df1= df.copy()
    df = df_merge(df1, df2)
    # include columns of excess returns
    df = df_excess_returns(df)
    df.to_excel('csv/mine.xlsx')
    return df

def take_factors(df):
    return df.keys().tolist()

def factors_order(factors):
    flags   = [k for k in range(len(factors))]
    zip_f   = {}
    for flag in flags:
        zip_f[factors[flag]] = flag
    return zip_f

def LL_portfolio_sorting(F, zip_f, excess = False):
    if excess:
        return [F[zip_f['ex_Lead']], F[zip_f['ex_Mid']], F[zip_f['ex_Lag']], F[zip_f['LL']], F[zip_f['LLStrong']]]
    else:
        return [F[zip_f['LeadR']], F[zip_f['MidR']], F[zip_f['LagR']], F[zip_f['LL']], F[zip_f['LLStrong']]]

def tex_file(table, title):
    return "tex/" + table + "_" + title + ".txt"
    
def create_txt(table, title, text):
    file    = Path(tex_file(table, title))
    file.write_text(f"{text}\n\n")

def reg(Y, X, lags, table, title, df):
    regs=[None]*len(Y)
    for i, y in enumerate(Y):
        y   = df[y]
        reg = sm.OLS(y.astype(float), X.astype(float)).fit(cov_type='HAC', cov_kwds={'maxlags':lags})
        regs[i]=reg  
    new_Y=[y.replace("ex_", "") for y in Y]

    sum = summary_col(results=regs, float_format='%0.2f', model_names=new_Y, stars=True, info_dict=None,  drop_omitted=True)
    text    = sum.as_latex()
    create_txt(table, title, text)

In [49]:
df      = take_factor_sheet('T1_ptfs')
F       = take_factors(df)
zip_f   = factors_order(F)
print(zip_f)
lags    = 24
table   = "T1"
# AVERAGE RETURN 
Y       = LL_portfolio_sorting(F, zip_f)
row     = "average_return"
X       = df["const"]
reg(Y, X, lags, table, row, df)
# # CAPM 
Y       = LL_portfolio_sorting(F, zip_f, True)
row     = "capm"
X       = df[["const", "mktrf"]]
reg(Y, X, lags, table, row, df)
# # FFM
# Y       = LL_portfolio_sorting(F, zip_f, True)
row     = "ffm"
X       = df[["const", "mktrf", "smb", "hml"]]
reg(Y, X, lags, table, row, df)

{'dt': 0, 'const': 1, 'LeadR': 2, 'MidR': 3, 'LagR': 4, 'Lead': 5, 'ex_Lead': 6, 'Mid': 7, 'ex_Mid': 8, 'Lag': 9, 'ex_Lag': 10, 'LL': 11, 'ex_LL': 12, 'LLStrong': 13, 'ex_LLStrong': 14, 'mktrf': 15, 'smb': 16, 'hml': 17, 'rf': 18}


In [72]:
N = 30
listN = [f"{n}" for n in range(1, N+1)]
df = pd.read_csv("data/30_industry_pfs.csv", sep=";", names=listN, dtype=str)
T = df.index
R = {}
for n in listN:
    Rn = []
    PO = df[n] 
    print(PO)
    for t in range(T):
        Rn_t = PO[t+1] / PO[t] 
        Rn.append(Rn_t)
    R[n] = Rn
R


Index(['192607', '192608', '192609', '192610', '192611', '192612', '192701',
       '192702', '192703', '192704',
       ...
       '201310', '201311', '201312', '201401', '201402', '201403', '201404',
       '201405', '201406', '201407'],
      dtype='object', length=1057)
192607     0.56
192608     2.59
192609     1.16
192610    -3.06
192611     6.35
          ...  
201403     2.75
201404     1.09
201405     4.08
201406     1.59
201407    -5.84
Name: 1, Length: 1057, dtype: object


TypeError: unsupported operand type(s) for /: 'str' and 'str'