## Initialize and Import Libraries

In [None]:
################################################
# Jegadeesh & Titman (1993) Momentum Portfolio #
# May 2018                                     #  
# Qingyi (Freda) Song Drechsler                #
################################################

import time, sys, os, pprint
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pandas.tseries.offsets import *
from scipy import stats
import warnings
from google.colab import drive
from sklearn import metrics
from IPython.display import clear_output

warnings.filterwarnings("ignore")
drive.mount('/content/gdrive')

In [None]:
def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1
        
    block = int(round(bar_length * progress))
    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format( "#" * block + "-" * (bar_length - block), progress * 100)
    print(text)

## Load Data 

In [None]:
chass_monthly_file_location = os.path.join("/content/gdrive/My Drive/1977monthly.csv")
data_extract_monthly = pd.read_csv(chass_monthly_file_location)
data_extract_monthly['datadate'] =  pd.to_datetime(data_extract_monthly['datadate'])

tsx_constituents_file_location = os.path.join("/content/gdrive/My Drive/constituents_tsx.csv")
tsx_constituents = pd.read_csv(tsx_constituents_file_location)
tsx_constituents['from'] =  pd.to_datetime(tsx_constituents['from'], format='%m/%d/%Y')
tsx_constituents['thru'] =  pd.to_datetime(tsx_constituents['thru'], format='%m/%d/%Y')
tsx_constituents =tsx_constituents[tsx_constituents['conm'] == 'S&P/TSX Composite Index']

In [None]:
print('\nnumber of different stocks: ', len(list(set(data_extract_monthly.ticker))))
print('\ntotal number of months: ', len(list(set(data_extract_monthly.datadate))))

### Map S&P TSX Composite Constituents and Create Time Series


In [None]:
temp1 = data_extract_monthly[data_extract_monthly['cusip'].isin(tsx_constituents['co_cusip'])]
temp2 = data_extract_monthly[data_extract_monthly['ticker'].isin(tsx_constituents['co_tic'])]
temp3 = pd.concat([temp1,temp2]).drop_duplicates().reset_index()
test = temp3.pivot_table(index='datadate', columns='ticker', values='prc_mret')
stock_list_monthly = test.columns

In [None]:
len(stock_list_monthly)

In [None]:
def is_member(temp3):
  stock = temp3['ticker']
  stock_cusip = temp3['cusip']
    
  stock_dates_from = tsx_constituents[tsx_constituents['co_cusip'] == stock_cusip]['from']
  if len(stock_dates_from.values) == 0:
      stock_dates_from = tsx_constituents[tsx_constituents['co_tic'] == stock]['from']
  stock_dates_thru = tsx_constituents[tsx_constituents['co_cusip'] == stock_cusip]['thru']
  if len(stock_dates_thru.values) == 0:
      stock_dates_thru = tsx_constituents[tsx_constituents['co_tic'] == stock]['thru']
    
  stock_dates_thru = stock_dates_thru.fillna(pd.to_datetime('today'))
    
  total_return = 0 
  for i in range(0,len(stock_dates_from)):
      total_return = total_return + np.where(
          (temp3['datadate'] >= pd.to_datetime(stock_dates_from.values[i])) 
          & (temp3['datadate'] <= pd.to_datetime(stock_dates_thru.values[i])), 1, 0)
         
  return total_return

In [None]:
temp3['is_member'] = temp3.apply(is_member,axis=1)

In [None]:
result_file_name = "/content/gdrive/My Drive/consituents_with_members.csv"
temp3.to_csv(result_file_name)

## Portfolio Creation

In [None]:
#######################################################
# Create Momentum Portfolio                           #   
# Measures Based on Past (J) Month Compounded Returns #
#######################################################

J = 12 # Formation Period Length: J can be between 3 to 12 months
K = 12 # Holding Period Length: K can be between 3 to 12 months

#### Data Extraction for Returns

In [None]:
temp = temp3[['ticker','datadate','prc_mret','is_member']].sort_values(['ticker','datadate','prc_mret']).set_index('datadate')

# Replace missing return with 0
temp['prc_mret']=temp['prc_mret'].fillna(0)
temp = temp.dropna(axis = 0)

In [None]:
# Calculate rolling cumulative return
# by summing log(1+ret) over the formation period
temp['logret']=np.log(1+temp['prc_mret'])
umd = temp.groupby(['ticker'])['logret'].rolling(J, min_periods=J).sum()
umd = umd.reset_index()
umd['cumret']=np.exp(umd['logret'])-1
umd['is_member'] = temp.reset_index()['is_member']
umd = umd.dropna(axis=0, subset=['cumret'])
umd = umd[umd['is_member'] == 1].copy(deep=True)
umd = umd.reset_index()
umd[umd['ticker'] == 'BCE']

#### Portfolio Formation

In [None]:
########################################
# Formation of 10 Momentum Portfolios  #
########################################

# For each date: assign ranking 1-10 based on cumret
# 1=lowest 10=highest cumret
umd['momr'] = umd.groupby('datadate')['cumret'].transform(lambda x: pd.qcut(x, 10, labels=False,duplicates='drop'))
umd = umd.fillna(0)
umd.momr = umd.momr.astype(int)
umd['momr'] = umd['momr']+1

In [None]:
umd['form_date']=umd['datadate']
umd['medate']=umd['datadate']+MonthEnd(0)
umd['hdate1']=umd['medate']+MonthBegin(1)
umd['hdate2']=umd['medate']+MonthEnd(K)
umd = umd[['ticker','form_date','momr','hdate1','hdate2']]

In [None]:
# join rank and return data together
# note: this step consumes a lot of memory so takes a while
_tmp_ret = data_extract_monthly[['ticker','datadate','prc_mret']]
port = pd.merge(_tmp_ret, umd, on=['ticker'], how='inner')
port = port[(port['hdate1']<=port['datadate']) & (port['datadate']<=port['hdate2'])]
umd2 = port.sort_values(by=['datadate','momr','form_date','ticker']).drop_duplicates()
umd3 = umd2.groupby(['datadate','momr','form_date'])['prc_mret'].mean().reset_index()

In [None]:
# Skip the training years of other models
#start_yr = umd3['datadate'].dt.year.min()+1
umd3 = umd3[umd3['datadate'] >= '2001-02-01']
umd3 = umd3.sort_values(by=['datadate','momr'])

In [None]:
# Create one return series per MOM group every month
ewret = umd3.groupby(['datadate','momr'])['prc_mret'].mean().reset_index()
ewstd = umd3.groupby(['datadate','momr'])['prc_mret'].std().reset_index()
ewret = ewret.rename(columns={'prc_mret':'ewret'})
ewstd = ewstd.rename(columns={'rereturnt':'ewretstd'})
ewretdat = pd.merge(ewret, ewstd, on=['datadate','momr'], how='inner')
ewretdat = ewretdat.sort_values(by=['momr'])

# portfolio summary
ewretdat.groupby(['momr'])['ewret'].describe()[['count','mean', 'std']]

In [None]:
#################################
# Long-Short Portfolio Returns  #
#################################

# Transpose portfolio layout to have columns as portfolio returns
ewretdat2 = ewretdat.pivot(index='datadate', columns='momr', values='ewret')

# Add prefix port in front of each column
ewretdat2 = ewretdat2.add_prefix('port')
ewretdat2 = ewretdat2.rename(columns={'port1':'losers', 'port10':'winners'})
ewretdat2['long_short'] = ewretdat2['winners'] - ewretdat2['losers']

# Compute Long-Short Portfolio Cumulative Returns
ewretdat3 = ewretdat2
ewretdat3['1+losers']=1+ewretdat3['losers']
ewretdat3['1+winners']=1+ewretdat3['winners']
ewretdat3['1+ls'] = 1+ewretdat3['long_short']

ewretdat3['cumret_winners']=ewretdat3['1+winners'].cumprod()-1
ewretdat3['cumret_losers']=ewretdat3['1+losers'].cumprod()-1
ewretdat3['cumret_long_short']=ewretdat3['1+ls'].cumprod()-1

ewretdat3.head(25)

In [None]:
from matplotlib import pyplot
series_to_plot = ewretdat3[['cumret_losers','cumret_winners']]
series_to_plot.reset_index().plot(x='datadate', y=['cumret_losers','cumret_winners'])
pyplot.show()

In [None]:
from matplotlib import pyplot
series_to_plot = ewretdat3[['cumret_long_short']]
series_to_plot.reset_index().plot(x='datadate', y=['cumret_long_short'])
pyplot.show()

In [None]:
#################################
# Portfolio Summary Statistics  #
################################# 

# Mean 
mom_mean = ewretdat3[['winners', 'losers', 'long_short']].mean().to_frame()
mom_std = ewretdat3[['winners', 'losers', 'long_short']].std().to_frame()
mom_mean = mom_mean.rename(columns={0:'mean'}).reset_index()
mom_std = mom_std.rename(columns={0:'std'}).reset_index()

# T-Value and P-Value
t_losers = pd.Series(stats.ttest_1samp(ewretdat3['losers'],0.0)).to_frame().T
t_winners = pd.Series(stats.ttest_1samp(ewretdat3['winners'],0.0)).to_frame().T
t_long_short = pd.Series(stats.ttest_1samp(ewretdat3['long_short'],0.0)).to_frame().T

t_losers['momr']='losers'
t_winners['momr']='winners'
t_long_short['momr']='long_short'

t_output =pd.concat([t_winners, t_losers, t_long_short]).rename(columns={0:'t-stat', 1:'p-value'})

# Combine mean, t and p
mom_output = pd.merge(mom_mean, t_output, on=['momr'], how='inner')
mom_output = pd.merge(mom_output,mom_std, on=['momr'], how='inner')
sharpe_ratio = ((mom_output['mean'])/mom_output['std'])*np.sqrt(12)
mom_output['sharpe_ration'] = sharpe_ratio
mom_output.rename(columns={"momr": "portfolio"})