In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
from data import Data

## Data Preparation

In [29]:
# Create year, month, day variables
def create_time_vars(data):
    data.loc[:,"Year"] = data.Date.dt.year
    data.loc[:,"Month"] = data.Date.dt.month
    data.loc[:,"Day"] = data.Date.dt.day

# Filter out stocks that have traded for less than 5000 VND or saw a maximum of 1000 trades per day 
def filter_low_price_and_volume(data):
    low_price = np.unique(data[data["Adj_close"] <= 5.0]["Symbol"])
    max_volume = data.groupby("Symbol")["Matched_Volume"].max()
    low_volume = max_volume[max_volume <= 1000].index
    low_price_and_volume = set(low_price).union(set(low_volume))
    return data[~ data['Symbol'].isin( low_price_and_volume ) ]

In [3]:
# Reading in data
all_data = Data()
hose_listings, hnx_listings = all_data.hnx_listings,all_data.hose_listings
hose_trades, hnx_trades = all_data.get_all_records()

# Removing delisted stocks
hose_trades = hose_trades[~hose_trades['Symbol'].isin(hose_listings[hose_listings["Status"] == "Delisted"]["Symbol"])]
hnx_trades = hnx_trades[~hnx_trades['Symbol'].isin(hnx_listings[hnx_listings["Status"] == "Delisted"]["Symbol"])]

In [30]:
# Filtering our records from the last 5 years, create time dummies and remove low priced stocks
hose_trades_16_20 = hose_trades[hose_trades["Date"] > "2015-12-31"].copy(deep = True)
hnx_trades_16_20 = hnx_trades[hnx_trades["Date"] > "2015-12-31"].copy(deep = True)
create_time_vars(hose_trades_16_20)
create_time_vars(hnx_trades_16_20)
hose_trades_16_20 = filter_low_price_and_volume(hose_trades_16_20)
hnx_trades_16_20 = filter_low_price_and_volume(hnx_trades_16_20)

In [31]:
# Inspect the adjusted closing prices and matched volumes for HOSE
hose_trades_16_20[["Adj_close","Matched_Volume"]].describe()

Unnamed: 0,Adj_close,Matched_Volume
count,330030.0,330030.0
mean,33.251655,391227.3
std,31.389158,1290102.0
min,5.2,0.0
25%,14.65,1670.0
50%,23.3,28590.0
75%,38.9,238970.0
max,343.0,128443900.0


In [32]:
# Inspect the adjusted closing prices and matched volumes for HNX
hnx_trades_16_20[["Adj_close","Matched_Volume"]].describe()

Unnamed: 0,Adj_close,Matched_Volume
count,247926.0,247926.0
mean,23.497645,50369.76
std,21.103556,370115.4
min,5.1,0.0
25%,11.6,0.0
50%,16.8,100.0
75%,28.6,5000.0
max,263.3,21213220.0


In [33]:
print("HOSE")
print("Original: ", len(np.unique(hose_trades["Symbol"])))
print("After prep: ",len(np.unique(hose_trades_16_20["Symbol"])))
print("\nHNX")
print("Original: ",len(np.unique(hnx_trades["Symbol"])))
print("After prep: ",len(np.unique(hnx_trades_16_20["Symbol"])))

HOSE
Original:  433
After prep:  305

HNX
Original:  404
After prep:  222


## Calculate Momentum Strategies' Returns

In [55]:
# Get the start of the trading month.
# Because on some days only a few listings are traded, I had to specify that number of listings being traded exceed 20 to get whole-market sessions
def get_trading_month_start(trade_data):
    valid_df = trade_data.groupby(["Year","Month","Date"]).size().reset_index()
    month_starts =  valid_df[valid_df[0] >= 20].groupby(["Year","Month"])["Date"].min().reset_index()
    return month_starts

# Calculate strategy returns for every month
def calculate_strategy_returns(trade_data, month_starts, preform_period, hold_period):
    # Helper function
    def single_symbol_strategy(symbol_trade_data):
        df = symbol_trade_data.sort_values(by = "Date")
        res_df = pd.DataFrame({"Month_start": df["Date"],
                            "Preform_date": df["Date"].shift(preform_period),
                            "Hold_date": df["Date"].shift(- hold_period),
                            "Symbol":  df["Symbol"],
                            "Month_start_price": df["Adj_close"], 
                            "Preform_price": df["Adj_close"].shift( preform_period),
                            "Hold_price": df["Adj_close"].shift( - hold_period)
                             }).dropna()
        pref_rets = (res_df["Month_start_price"]-res_df["Preform_price"])/(res_df["Preform_price"])
        res_df["Preform_returns"] = pref_rets
        return res_df

    # Form dataset with prices at month start, at beginning of preformation period and at the end of holding period to facilitate returns calculations

    def form_strategy_df(trade_data, month_starts):
        df = trade_data[trade_data["Date"].isin(month_starts["Date"])][["Date", "Symbol","Adj_close","Close"]]
        periods = []
        symbols = np.unique(df["Symbol"])
        for symbol in symbols:
            single_stock = single_symbol_strategy(df[df["Symbol"]==symbol])
            periods.append( single_stock  )
        test_df =  pd.concat(periods).sort_values(by=["Month_start","Symbol"]) 
        return  test_df


    test_df = form_strategy_df(trade_data, month_starts)
    returns_df = {"Month_start":[],"Winners":[],"Losers":[],"Winners_minus_Losers":[]}
    
    for month_start in np.unique(test_df['Month_start']):
        df = test_df[test_df['Month_start'] == month_start].copy(deep = True)
        # get winners - top 10th percentile
        winners = df[df["Preform_returns"] >= df["Preform_returns"].quantile(.9)].copy(deep = True)
        # get losers - bottom 10th percentile
        losers = df[df["Preform_returns"] <= df["Preform_returns"].quantile(.1)].copy(deep = True)
        # Adjust the prices to simulate an "Equally Weighted Portfolio"
        winners["Adj_Hold_price"] = winners.Hold_price/winners.Month_start_price
        losers["Adj_Hold_price"] = losers.Hold_price/losers.Month_start_price

        og_winners = len(winners)
        og_losers = len(losers)
        hold_winners = np.sum(winners["Adj_Hold_price"])
        hold_losers = np.sum(losers["Adj_Hold_price"])

        # calculate returns for individual stocks
        winners_returns =  100 * (hold_winners - og_winners)/(og_winners)
        losers_returns = 100 * (og_losers - hold_losers)/(hold_losers)
        wml_returns = 100* (hold_winners + og_losers - (hold_losers + og_winners))/(og_winners + hold_losers)

        # Append to data frame
        returns_df["Month_start"].append(month_start)
        returns_df["Winners"].append(winners_returns)
        returns_df["Losers"].append(losers_returns)
        returns_df["Winners_minus_Losers"].append(wml_returns )
    returns_df = pd.DataFrame(returns_df)
    returns_df["Preform_Period"] = preform_period
    returns_df["Hold_Period"] = hold_period
    return returns_df

# Conduct t-test against mean returns of 0 for different combinations of preformation and holding periods
def calculate_t_test(trade_data):
    pre_hold_combo =[[3,3],
            [3,6],
            [3,9],
            [6,3],
            [6,6],
            [6,9],
            [9,3],
            [9,6],
            [9,9]]
    test_res = {"Preform":[],"Hold":[], "Strategy":[],"Mean Returns":[],"Stdev. Returns":[],"p-value":[],"t-stat": [] }
    strat_rets_all = []
    month_starts = get_trading_month_start(trade_data)
    for combo in pre_hold_combo:
        preform_period = combo[0]
        hold_period = combo[1]
        strat_rets = calculate_strategy_returns(trade_data,  month_starts, preform_period, hold_period)
        strat_rets_all.append(strat_rets)
        for strat in ["Winners","Losers","Winners_minus_Losers"]:
            test_res["Preform"].append(preform_period)
            test_res["Hold"].append(hold_period)
            t_res = stats.ttest_1samp(strat_rets[strat],0.0)
            test_res["Strategy"].append(strat)
            test_res["Mean Returns"].append(np.mean(strat_rets[strat]))
            test_res["Stdev. Returns"].append(np.std(strat_rets[strat], ddof = 1))
            test_res["p-value"].append(t_res.pvalue)
            test_res["t-stat"].append(t_res.statistic)
    test_res = pd.DataFrame(test_res)
    strat_rets_all = pd.concat(strat_rets_all)
    return strat_rets_all, test_res

# Functions to display results
def highlight_signif(s):
    is_signficant = s <= 0.05
    return ['background-color: yellow' if v else '' for v in is_signficant]

def display_test_res(test_res):
    return test_res.sort_values(by = ["Preform","Hold"]).style.apply(highlight_signif, 
                            subset=['p-value'])

### Repeating Vo and Truong's experiment

In [46]:
# Filtering out records from June 2007 to November 2015
hose_trades_07_20 = hose_trades[ hose_trades["Date"] >= pd.to_datetime("2007-06-01")]
hose_trades_07_15 = hose_trades_07_20[ hose_trades_07_20["Date"] <= pd.to_datetime("2015-11-30")].copy(deep = True)

# Create year, month, day variables
create_time_vars(hose_trades_07_15)

# Filter out low priced stocks
hose_trades_07_15 = filter_low_price_and_volume(hose_trades_07_15)

In [47]:
# Calculate returns and test results
hose_strat_rets_07_15, hose_test_res_07_15  =  calculate_t_test(hose_trades_07_15 )
display_test_res(hose_test_res_07_15)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-4.396936,22.997801,0.064106,-1.873266
1,3,3,Losers,2.855746,49.738306,0.575064,0.562554
2,3,3,Winners_minus_Losers,-5.240625,8.479088,0.0,-6.055772
3,3,6,Winners,-2.765422,33.141444,0.42307,-0.804695
4,3,6,Losers,10.035633,62.156029,0.122892,1.557051
5,3,6,Winners_minus_Losers,-4.155764,9.245391,3.7e-05,-4.33478
6,3,9,Winners,-4.595242,39.464185,0.272287,-1.104655
7,3,9,Losers,14.991741,70.29365,0.046044,2.023286
8,3,9,Winners_minus_Losers,-4.911746,10.969651,5.3e-05,-4.247803
9,6,3,Winners,-1.868317,24.128474,0.457132,-0.746728


### Using data from 2016 - 2020

In [56]:
hnx_strat_rets_16_20, hnx_test_res_16_20  =  calculate_t_test(hnx_trades_16_20)
hpse_strat_rets_16_20, hose_test_res_16_20  =  calculate_t_test(hose_trades_16_20)

In [57]:
display_test_res(hnx_test_res_16_20)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-6.141798,11.120229,0.000163,-4.058623
1,3,3,Losers,-9.968067,9.185202,0.0,-7.974787
2,3,3,Winners_minus_Losers,-8.258349,8.879042,0.0,-6.834772
3,3,6,Winners,-2.961057,11.711732,0.07701,-1.805555
4,3,6,Losers,-9.145829,10.580254,0.0,-6.173224
5,3,6,Winners_minus_Losers,-6.43103,9.131765,7e-06,-5.029339
6,3,9,Winners,-3.958468,11.133125,0.017481,-2.463376
7,3,9,Losers,-12.896333,11.05082,0.0,-8.085229
8,3,9,Winners_minus_Losers,-8.978829,9.524571,0.0,-6.531228
9,6,3,Winners,-3.53609,7.727478,0.001963,-3.267914


In [58]:
display_test_res(hose_test_res_16_20)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-4.577631,11.863766,0.006466,-2.835405
1,3,3,Losers,-7.16875,11.671134,3.6e-05,-4.513644
2,3,3,Winners_minus_Losers,-6.342404,7.933903,0.0,-5.874405
3,3,6,Winners,-1.885026,13.683549,0.329954,-0.983793
4,3,6,Losers,-6.349544,12.912481,0.000954,-3.511704
5,3,6,Winners_minus_Losers,-4.776264,8.304239,0.000148,-4.107462
6,3,9,Winners,-1.790778,16.233901,0.448534,-0.764257
7,3,9,Losers,-7.81517,15.322691,0.000932,-3.533654
8,3,9,Winners_minus_Losers,-5.882847,8.699842,2.4e-05,-4.684861
9,6,3,Winners,-2.798779,11.032182,0.076037,-1.811725


### Using data from 2016 - 2019

In [43]:
_, hnx_test_res_16_19  =  calculate_t_test(hnx_trades_16_20[hnx_trades_16_20["Date"] <= "2019-12-31"])
_, hose_test_res_16_19  =  calculate_t_test(hose_trades_16_20[hose_trades_16_20["Date"] <= "2019-12-31"])

In [44]:
display_test_res(hnx_test_res_16_19)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-6.891509,11.715516,0.000455,-3.812216
1,3,3,Losers,-9.858005,9.078404,0.0,-7.03727
2,3,3,Winners_minus_Losers,-8.503251,9.369124,1e-06,-5.881806
3,3,6,Winners,-2.178964,12.130315,0.268992,-1.121787
4,3,6,Losers,-8.534255,10.105886,6e-06,-5.273798
5,3,6,Winners_minus_Losers,-5.67119,9.206803,0.000444,-3.846783
6,3,9,Winners,-3.060658,11.999339,0.134904,-1.530413
7,3,9,Losers,-12.211562,11.50075,0.0,-6.370834
8,3,9,Winners_minus_Losers,-8.184108,10.022211,2.2e-05,-4.899582
9,6,3,Winners,-3.020359,7.371695,0.014613,-2.558725


In [45]:
display_test_res(hose_test_res_16_19)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-4.612121,12.359404,0.020116,-2.418398
1,3,3,Losers,-8.565401,10.308074,3e-06,-5.385113
2,3,3,Winners_minus_Losers,-6.999367,8.010685,1e-06,-5.662573
3,3,6,Winners,-0.666125,14.150338,0.770371,-0.293983
4,3,6,Losers,-6.832466,11.658781,0.000764,-3.659794
5,3,6,Winners_minus_Losers,-4.388383,7.600695,0.000892,-3.605649
6,3,9,Winners,0.550679,16.610557,0.843481,0.198914
7,3,9,Losers,-9.717857,14.289579,0.000247,-4.080396
8,3,9,Winners_minus_Losers,-5.765635,8.080611,0.000137,-4.281088
9,6,3,Winners,-2.231545,11.447011,0.230945,-1.217435
