## Initialize and Import Libraries

In [None]:
import time, sys, os, pprint
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas.tseries.offsets import *
from scipy import stats
import warnings
from google.colab import drive
from sklearn import metrics
from IPython.display import clear_output

warnings.filterwarnings("ignore")
drive.mount('/content/gdrive')

In [None]:
def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1
        
    block = int(round(bar_length * progress))
    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

## Load Data 

In [None]:
chass_monthly_file_location = os.path.join("/content/gdrive/My Drive/1977monthly.csv")
data_extract_monthly = pd.read_csv(chass_monthly_file_location)
data_extract_monthly['datadate'] =  pd.to_datetime(data_extract_monthly['datadate'])

In [None]:
print('\nnumber of different stocks: ', len(list(set(data_extract_monthly.ticker))))
print('\ntotal number of months: ', len(list(set(data_extract_monthly.datadate))))

## Portfolio Creation

In [None]:
#######################################################
# Create Momentum Portfolio                           #   
# Measures Based on Past (J) Month Compounded Returns #
#######################################################

J = 12 # Formation Period Length: J can be between 3 to 12 months
K = 12 # Holding Period Length: K can be between 3 to 12 months

In [None]:
results_extract = pd.DataFrame()

In [None]:
k = 0
i = 0
iterations = 18
j = 0
time_frame = 12
offset = 12
for i in range(j,iterations):
  csv_name = os.path.join("/content/gdrive/My Drive/temp_out_logit1/" + str(i) + "results.csv")
  df = pd.read_csv(csv_name)
  df = df[offset:]
  id_vars = [x for x in df.columns if ('_ismember' not in x) 
             and ('_out' not in x) 
             and ('_dn' not in x) 
             and ('_up' not in x) 
             and ('mean' not in x)
             and ('std' not in x)
             and ('median' not in x)
            ]
  value_vars = [x for x in df.columns if ('_dn' in x) or ('_up' in x) ]

  result = pd.DataFrame()
  for stock in id_vars:
    id_varss = ['datadate']
    temp = [x for x in value_vars if (stock+'_up') == x or (stock+'_dn'==x)]
    if len(temp) >0:
      id_varss = id_varss + temp
      #print(id_varss)
      temp = pd.melt(df, id_vars=id_varss, value_vars=[stock])
      temp.rename(columns={stock+'_up':'up', stock+'_dn':'dn'}, inplace=True)
      result = pd.concat([result,temp])
  k = k+1
  update_progress(k/iterations)
  results_extract = pd.concat([results_extract,result])

In [None]:
final_df = results_extract.copy(deep=True)

In [None]:
final_df['momr']=final_df.groupby('datadate')['dn'].transform(lambda x: pd.qcut(x, 10, labels=False,duplicates='drop'))
final_df = final_df.fillna(0)

In [None]:
final_df.momr=final_df.momr.astype(int)
final_df['momr'] = final_df['momr']+1

In [None]:
final_df['datadate'] = pd.to_datetime(final_df['datadate'])
final_df['form_date']=final_df['datadate']
final_df['medate']=final_df['datadate']+MonthEnd(0)
final_df['hdate1']=final_df['medate']+MonthBegin(1)
final_df['hdate2']=final_df['medate']+MonthEnd(K)
final_df = final_df[['variable','form_date','momr','hdate1','hdate2']]
final_df = final_df.rename(columns={'variable':'ticker'})

In [None]:
# join rank and return data together
# note: this step consumes a lot of memory so takes a while
_tmp_ret = data_extract_monthly[['ticker','datadate','prc_mret']]
#port = pd.merge(_tmp_ret, umd, on=['ticker'], how='inner')
port = pd.merge(_tmp_ret, final_df, on=['ticker'], how='inner')
port = port[(port['hdate1']<=port['datadate']) & (port['datadate']<=port['hdate2'])]
final_df2 = port.sort_values(by=['datadate','momr','form_date','ticker']).drop_duplicates()
final_df3 = final_df2.groupby(['datadate','momr','form_date'])['prc_mret'].mean().reset_index()

In [None]:
# Create one return series per MOM group every month
ewret = final_df3.groupby(['datadate','momr'])['prc_mret'].mean().reset_index()
ewstd = final_df3.groupby(['datadate','momr'])['prc_mret'].std().reset_index()
ewret = ewret.rename(columns={'prc_mret':'ewret'})
ewstd = ewstd.rename(columns={'rereturnt':'ewretstd'})
ewretdat = pd.merge(ewret, ewstd, on=['datadate','momr'], how='inner')
ewretdat = ewretdat.sort_values(by=['momr'])

# portfolio summary
ewretdat.groupby(['momr'])['ewret'].describe()[['count','mean', 'std']]

In [None]:
#################################
# Long-Short Portfolio Returns  #
#################################

# Transpose portfolio layout to have columns as portfolio returns
ewretdat2 = ewretdat.pivot(index='datadate', columns='momr', values='ewret')

# Add prefix port in front of each column
ewretdat2 = ewretdat2.add_prefix('port')
ewretdat2 = ewretdat2.rename(columns={'port1':'losers', 'port10':'winners'})
ewretdat2['long_short'] = ewretdat2['winners'] - ewretdat2['losers']

# Compute Long-Short Portfolio Cumulative Returns
ewretdat3 = ewretdat2
ewretdat3['1+losers']=1+ewretdat3['losers']
ewretdat3['1+winners']=1+ewretdat3['winners']
ewretdat3['1+ls'] = 1+ewretdat3['long_short']

ewretdat3['cumret_winners']=ewretdat3['1+winners'].cumprod()-1
ewretdat3['cumret_losers']=ewretdat3['1+losers'].cumprod()-1
ewretdat3['cumret_long_short']=ewretdat3['1+ls'].cumprod()-1

ewretdat3.head(25)

In [None]:
from matplotlib import pyplot
series_to_plot = ewretdat3[['cumret_losers','cumret_winners']]
series_to_plot.reset_index().plot(x='datadate', y=['cumret_losers','cumret_winners'])
pyplot.show()

In [None]:
from matplotlib import pyplot
series_to_plot = ewretdat3[['cumret_long_short']]
series_to_plot.reset_index().plot(x='datadate', y=['cumret_long_short'])
pyplot.show()

In [None]:
#################################
# Portfolio Summary Statistics  #
################################# 

# Mean 
mom_mean = ewretdat3[['winners', 'losers', 'long_short']].mean().to_frame()
mom_std = ewretdat3[['winners', 'losers', 'long_short']].std().to_frame()
mom_mean = mom_mean.rename(columns={0:'mean'}).reset_index()
mom_std = mom_std.rename(columns={0:'std'}).reset_index()

# T-Value and P-Value
t_losers = pd.Series(stats.ttest_1samp(ewretdat3['losers'],0.0)).to_frame().T
t_winners = pd.Series(stats.ttest_1samp(ewretdat3['winners'],0.0)).to_frame().T
t_long_short = pd.Series(stats.ttest_1samp(ewretdat3['long_short'],0.0)).to_frame().T

t_losers['momr']='losers'
t_winners['momr']='winners'
t_long_short['momr']='long_short'

t_output =pd.concat([t_winners, t_losers, t_long_short]).rename(columns={0:'t-stat', 1:'p-value'})

# Combine mean, t and p
mom_output = pd.merge(mom_mean, t_output, on=['momr'], how='inner')
mom_output = pd.merge(mom_output,mom_std, on=['momr'], how='inner')
sharpe_ratio = ((mom_output['mean'])/mom_output['std'])*np.sqrt(12)
mom_output['sharpe_ratio'] = sharpe_ratio
mom_output.rename(columns={"momr": "portfolio"})