In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
from data import Data

## Data Preparation

In [None]:
# Reading in data
all_data = Data()
hose_listings, hnx_listings = all_data.hnx_listings,all_data.hose_listings
hose_trades, hnx_trades = all_data.get_all_records()

# Removing delisted stocks
hose_trades = hose_trades[~hose_trades['Symbol'].isin(hose_listings[hose_listings["Status"] == "Delisted"]["Symbol"])]
hnx_trades = hnx_trades[~hnx_trades['Symbol'].isin(hnx_listings[hnx_listings["Status"] == "Delisted"]["Symbol"])]

In [31]:
# Filtering our records from the last 5 years
hose_trades_15_20 = hose_trades[hose_trades["Date"] > "2014-12-31"].copy(deep = True)
hnx_trades_15_20 = hnx_trades[hnx_trades["Date"] > "2014-12-31"].copy(deep = True)

# Create year, month, day variables
def create_time_vars(data):
    data.loc[:,"Year"] = data.Date.dt.year
    data.loc[:,"Month"] = data.Date.dt.month
    data.loc[:,"Day"] = data.Date.dt.day

create_time_vars(hose_trades_15_20)
create_time_vars(hnx_trades_15_20)

In [12]:
# Inspect the adjusted closing prices and matched volumes for HOSE
hose_trades_15_20[["Adj_close","Matched_Volume"]].describe()

Unnamed: 0,Adj_close,Matched_Volume
count,519427.0,519427.0
mean,26.26778,486590.6
std,28.64859,1646994.0
min,0.0,0.0
25%,9.61,2230.0
50%,17.59,39050.0
75%,31.3,296930.0
max,343.0,128443900.0


In [13]:
# Inspect the adjusted closing prices and matched volumes for HNX
hnx_trades_15_20[["Adj_close","Matched_Volume"]].describe()

Unnamed: 0,Adj_close,Matched_Volume
count,486847.0,486847.0
mean,16.735464,100653.6
std,18.050091,669012.7
min,0.2,0.0
25%,6.8,0.0
50%,11.7,400.0
75%,19.8,13303.0
max,263.3,65589850.0


In [32]:
# Filter out stocks that have traded for less than 5000 VND or saw a maximum of 1000 trades per day 
def filter_low_price_and_volume(data):
    min_adj_close = data.groupby("Symbol")["Adj_close"].min()
    low_price = min_adj_close[min_adj_close <= 5.0].index
    max_volume = data.groupby("Symbol")["Matched_Volume"].max()
    low_volume = max_volume[max_volume <= 1000].index
    low_price_and_volume = set(low_price).union(set(low_volume))
    return data[~ data['Symbol'].isin( low_price_and_volume ) ]
hose_trades_15_20 = filter_low_price_and_volume(hose_trades_15_20)
hnx_trades_15_20 = filter_low_price_and_volume(hnx_trades_15_20)

In [6]:
print("HOSE")
print("Original: ", len(np.unique(hose_trades["Symbol"])))
print("After prep: ",len(np.unique(hose_trades_15_20["Symbol"])))
print("\nHNX")
print("Original: ",len(np.unique(hnx_trades["Symbol"])))
print("After prep: ",len(np.unique(hnx_trades_15_20["Symbol"])))

HOSE
Original:  433
After prep:  308

HNX
Original:  404
After prep:  218


## Calculate Momentum Strategies' Returns

In [35]:
# Calculate strategy returns for every month
def calculate_strategy_returns(trade_data, preform_period, hold_period):
    # Get the start of the trading month.
    # Because on some days only a few listings are traded, I had to specify that number of listings being traded exceed 200 to get whole-market sessions
    def get_trading_month_start(trade_data):
        valid_df = trade_data.groupby(["Year","Month","Date"]).size().reset_index()
        month_starts =  valid_df[valid_df[0] >= 200].groupby(["Year","Month"])["Date"].min().reset_index()
        return month_starts
    
    # Helper function
    def single_symbol_strategy(symbol_trade_data):
        df = symbol_trade_data.sort_values(by = "Date")
        preform = df["Adj_close"].shift( preform_period)
        hold = df["Adj_close"].shift( - hold_period)
        res_df = pd.DataFrame({"Month_start": df["Date"],
                            "Preform_date": df["Date"].shift( preform_period),
                            "Hold_date": df["Date"].shift(- hold_period),
                            "Symbol":  df["Symbol"],
                            "Month_start_price": df["Adj_close"], 
                            "Preform_price": preform, 
                            "Hold_price": hold }).dropna()
        return res_df

    # Form dataset with prices at month start, at beginning of preformation period and at the end of holding period to facilitate returns calculations
    def form_strategy_df(trade_data):
        month_starts = get_trading_month_start(trade_data)
        df = trade_data[trade_data["Date"].isin(month_starts["Date"])][["Date", "Symbol","Adj_close"]]
        periods = []
        symbols = np.unique(trade_data["Symbol"])
        for symbol in symbols:
            periods.append(single_symbol_strategy(df[df["Symbol"]==symbol]))
        test_df =  pd.concat(periods).sort_values(by=["Month_start","Symbol"])
        pref_rets = (test_df["Month_start_price"]-test_df["Preform_price"])*100/(test_df["Preform_price"])
        test_df["Preform_returns"] = pref_rets
        return  test_df

    test_df = form_strategy_df(trade_data)
    returns_df = {"Month_start":[],"Winners":[],"Losers":[],"Winners_minus_Losers":[]}
    
    for month_start in np.unique(test_df['Month_start']):
        df = test_df[test_df['Month_start'] == month_start].copy(deep = True)
        # get winners - top 10th percentile
        winners = df[df["Preform_returns"] > df["Preform_returns"].quantile(.9)].copy(deep = True)
        # get losers - bottom 10th percentile
        losers = df[df["Preform_returns"] < df["Preform_returns"].quantile(.1)].copy(deep = True)

        # Adjust the prices to simulate an "Equally Weighted Portfolio"
        winners["Adj_Hold_price"] = winners.Hold_price/winners.Month_start_price
        losers["Adj_Hold_price"] = losers.Hold_price/losers.Month_start_price

        # calculate returns for individual stocks
        winners_returns = (np.sum(winners["Adj_Hold_price"]) - len(winners))/(len(winners))
        losers_returns = (len(losers) - np.sum(losers["Adj_Hold_price"]))/(len(losers))
        wml_returns = (np.sum(winners["Adj_Hold_price"]) + len(losers) - len(winners) - np.sum(losers["Adj_Hold_price"]) )/(len(winners) + np.sum(losers["Adj_Hold_price"]))

        # Append to data frame
        returns_df["Month_start"].append(month_start)
        returns_df["Winners"].append(winners_returns)
        returns_df["Losers"].append(losers_returns)
        returns_df["Winners_minus_Losers"].append(wml_returns )

    returns_df = pd.DataFrame(returns_df)
    returns_df["Preform_Period"] = preform_period
    returns_df["Hold_Period"] = hold_period
    return returns_df
# Conduct t-test against mean returns of 0 for different combinations of preformation and holding periods
def calculate_t_test(trade_data):
    pre_hold_combo =[[3,3],
            [6,3],
            [3,6],
            [6,6],
            [6,9],
            [9,6],]
            # [9,9],
            # [3,9],
            # [9,3]]
    test_res = {"Preform":[],"Hold":[], "Strategy":[],"Mean Returns":[],"Stdev. Returns":[],"p-value":[],"t-stat": [] }
    for combo in pre_hold_combo:
        preform_period = combo[0]
        hold_period = combo[1]
        strat_rets = calculate_strategy_returns(trade_data,  preform_period, hold_period)
        for strat in ["Winners","Losers","Winners_minus_Losers"]:
            test_res["Preform"].append(preform_period)
            test_res["Hold"].append(hold_period)
            t_res = stats.ttest_1samp(strat_rets[strat],0.0)
            test_res["Strategy"].append(strat)
            test_res["Mean Returns"].append(np.mean(strat_rets[strat]))
            test_res["Stdev. Returns"].append(np.std(strat_rets[strat], ddof = 1))
            test_res["p-value"].append(t_res.pvalue)
            test_res["t-stat"].append(t_res.statistic)
    test_res = pd.DataFrame(test_res)
    return test_res

In [None]:
hnx_test_res_15_20  =  calculate_t_test(hnx_trades_15_20)
hose_test_res_15_20  =  calculate_t_test(hose_trades_15_20)

In [51]:
# Functions to display results
def highlight_signif(s):
    '''
    highlight significant p-values
    '''
    is_signficant = s <= 0.05
    return ['background-color: yellow' if v else '' for v in is_signficant]

def display_test_res(test_res):
    return test_res.sort_values(["Preform","Hold"]).style.apply(highlight_signif, subset=['p-value'])

In [52]:
display_test_res(hnx_test_res_15_20)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-0.045791,0.081006,0.028224,-2.398288
1,3,3,Losers,-0.098844,0.125722,0.003917,-3.335621
2,3,3,Winners_minus_Losers,-0.06522,0.076003,0.002022,-3.640702
6,3,6,Winners,-0.043832,0.064431,0.019603,-2.634767
7,3,6,Losers,-0.072414,0.146787,0.076746,-1.910657
8,3,6,Winners_minus_Losers,-0.052114,0.06835,0.010484,-2.952951
3,6,3,Winners,-0.041002,0.0506,0.007258,-3.138362
4,6,3,Losers,-0.075955,0.1218,0.029982,-2.415207
5,6,3,Winners_minus_Losers,-0.053458,0.058021,0.003086,-3.56839
9,6,6,Winners,-0.038336,0.056237,0.037719,-2.361418


In [53]:
display_test_res(hose_test_res_15_20)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-0.034362,0.11522,0.018195,-2.422839
1,3,3,Losers,-0.090125,0.139746,2e-06,-5.239352
2,3,3,Winners_minus_Losers,-0.055851,0.077511,0.0,-5.853895
6,3,6,Winners,-0.00357,0.137994,0.83797,-0.205351
7,3,6,Losers,-0.092464,0.161719,2.7e-05,-4.538176
8,3,6,Winners_minus_Losers,-0.041493,0.08558,0.000284,-3.848302
3,6,3,Winners,-0.018114,0.108146,0.188562,-1.329477
4,6,3,Losers,-0.065292,0.099903,2e-06,-5.187423
5,6,3,Winners_minus_Losers,-0.038401,0.065872,1.9e-05,-4.627217
9,6,6,Winners,-0.005901,0.132853,0.732043,-0.344036


### Repeating the exercise with 2020 left out

In [38]:
hnx_test_res_15_19  =  calculate_t_test(hnx_trades_15_20[hnx_trades_15_20["Date"] <= "2020-12-31"])
hose_test_res_15_19  =  calculate_t_test(hose_trades_15_20[hose_trades_15_20["Date"] <= "2020-12-31"])

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-0.045791,0.081006,0.028224,-2.398288
1,3,3,Losers,-0.098844,0.125722,0.003917,-3.335621
2,3,3,Winners_minus_Losers,-0.06522,0.076003,0.002022,-3.640702
3,6,3,Winners,-0.041002,0.0506,0.007258,-3.138362
4,6,3,Losers,-0.075955,0.1218,0.029982,-2.415207
5,6,3,Winners_minus_Losers,-0.053458,0.058021,0.003086,-3.56839
6,3,6,Winners,-0.043832,0.064431,0.019603,-2.634767
7,3,6,Losers,-0.072414,0.146787,0.076746,-1.910657
8,3,6,Winners_minus_Losers,-0.052114,0.06835,0.010484,-2.952951
9,6,6,Winners,-0.038336,0.056237,0.037719,-2.361418


In [54]:
display_test_res(hnx_test_res_15_19)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-0.045791,0.081006,0.028224,-2.398288
1,3,3,Losers,-0.098844,0.125722,0.003917,-3.335621
2,3,3,Winners_minus_Losers,-0.06522,0.076003,0.002022,-3.640702
6,3,6,Winners,-0.043832,0.064431,0.019603,-2.634767
7,3,6,Losers,-0.072414,0.146787,0.076746,-1.910657
8,3,6,Winners_minus_Losers,-0.052114,0.06835,0.010484,-2.952951
3,6,3,Winners,-0.041002,0.0506,0.007258,-3.138362
4,6,3,Losers,-0.075955,0.1218,0.029982,-2.415207
5,6,3,Winners_minus_Losers,-0.053458,0.058021,0.003086,-3.56839
9,6,6,Winners,-0.038336,0.056237,0.037719,-2.361418


In [55]:
display_test_res(hose_test_res_15_19)

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-0.034362,0.11522,0.018195,-2.422839
1,3,3,Losers,-0.090125,0.139746,2e-06,-5.239352
2,3,3,Winners_minus_Losers,-0.055851,0.077511,0.0,-5.853895
6,3,6,Winners,-0.00357,0.137994,0.83797,-0.205351
7,3,6,Losers,-0.092464,0.161719,2.7e-05,-4.538176
8,3,6,Winners_minus_Losers,-0.041493,0.08558,0.000284,-3.848302
3,6,3,Winners,-0.018114,0.108146,0.188562,-1.329477
4,6,3,Losers,-0.065292,0.099903,2e-06,-5.187423
5,6,3,Winners_minus_Losers,-0.038401,0.065872,1.9e-05,-4.627217
9,6,6,Winners,-0.005901,0.132853,0.732043,-0.344036
