In [1]:
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
from data import Data

## Data Preparation

In [3]:
# Reading in data
all_data = Data()
hose_listings, hnx_listings = all_data.hnx_listings,all_data.hose_listings
hose_trades, hnx_trades = all_data.get_all_records()

# Removing delisted stocks
hose_trades = hose_trades[~hose_trades['Symbol'].isin(hose_listings[hose_listings["Status"] == "Delisted"]["Symbol"])]
hnx_trades = hnx_trades[~hnx_trades['Symbol'].isin(hnx_listings[hnx_listings["Status"] == "Delisted"]["Symbol"])]

# Filtering our records from the last 5 years
hose_trades_15_20 = hose_trades[hose_trades["Date"] > "2014-12-31"]
hnx_trades_15_20 = hnx_trades[hnx_trades["Date"] > "2014-12-31"]

# Create year, month, day variables
def create_time_vars(data):
    data["Year"] = data.loc[:,"Date"].dt.year
    data["Month"] = data.loc[:,"Date"].dt.month
    data["Day"] = data.loc[:,"Date"].dt.day

create_time_vars(hose_trades_15_20)
create_time_vars(hnx_trades_15_20)

In [12]:
# Inspect the adjusted closing prices and matched volumes for HOSE
hose_trades_15_20[["Adj_close","Matched_Volume"]].describe()

Unnamed: 0,Adj_close,Matched_Volume
count,519427.0,519427.0
mean,26.26778,486590.6
std,28.64859,1646994.0
min,0.0,0.0
25%,9.61,2230.0
50%,17.59,39050.0
75%,31.3,296930.0
max,343.0,128443900.0


In [13]:
# Inspect the adjusted closing prices and matched volumes for HNX
hnx_trades_15_20[["Adj_close","Matched_Volume"]].describe()

Unnamed: 0,Adj_close,Matched_Volume
count,486847.0,486847.0
mean,16.735464,100653.6
std,18.050091,669012.7
min,0.2,0.0
25%,6.8,0.0
50%,11.7,400.0
75%,19.8,13303.0
max,263.3,65589850.0


In [59]:
# Filter out stocks that at maximum, traded for less than 5000 VND or  saw less than 100000 trades per day
def filter_low_price_and_volume(data):
    max_adj_close = data.groupby("Symbol")["Adj_close"].max()
    low_price = max_adj_close[max_adj_close <= 5.0].index
    max_volume = data.groupby("Symbol")["Matched_Volume"].max()
    low_volume = max_volume[max_volume <= 100000].index
    low_price_and_volume = set(low_price).union(set(low_volume))
    return data[~ data['Symbol'].isin( low_price_and_volume ) ]
hose_trades_15_20 = filter_low_price_and_volume(hose_trades_15_20)
hnx_trades_15_20 = filter_low_price_and_volume(hnx_trades_15_20)

In [201]:
print("HOSE")
print("Original: ", len(np.unique(hose_trades["Symbol"])))
print("After prep: ",len(np.unique(hose_trades_15_20["Symbol"])))
print("\nHNX")
print("Original: ",len(np.unique(hnx_trades["Symbol"])))
print("After prep: ",len(np.unique(hnx_trades_15_20["Symbol"])))

HOSE
Original:  433
After prep:  388

HNX
Original:  404
After prep:  259


## Calculate Momentum Strategies' Returns

In [185]:
# Calculate strategy returns for every month
def calculate_strategy_returns(trade_data, preform_period, hold_period):
    # Get the start of the trading month.
    # Because on some days only a few listings are traded, I had to specify that number of listings being traded exceed 200 to get whole-market sessions
    def get_trading_month_start(trade_data):
        valid_df = trade_data.groupby(["Year","Month","Date"]).size().reset_index()
        month_starts =  valid_df[valid_df[0] >= 200].groupby(["Year","Month"])["Date"].min().reset_index()
        return month_starts
    
    # Helper function
    def single_symbol_strategy(symbol_trade_data):
        df = symbol_trade_data.sort_values(by = "Date")
        preform = df["Adj_close"].shift( preform_period)
        hold = df["Adj_close"].shift( - hold_period)
        res_df = pd.DataFrame({"Month_start": df["Date"],
                            "Preform_date": df["Date"].shift( preform_period),
                            "Hold_date": df["Date"].shift(- hold_period),
                            "Symbol":  df["Symbol"],
                            "Month_start_price": df["Adj_close"], 
                            "Preform_price": preform, 
                            "Hold_price": hold }).dropna()
        return res_df

    # Form dataset with prices at month start, at beginning of preformation period and at the end of holding period to facilitate returns calculations
    def form_strategy_df(trade_data):
        month_starts = get_trading_month_start(trade_data)
        df = trade_data[trade_data["Date"].isin(month_starts["Date"])][["Date", "Symbol","Adj_close"]]
        periods = []
        symbols = np.unique(trade_data["Symbol"])
        for symbol in symbols:
            periods.append(single_symbol_strategy(df[df["Symbol"]==symbol]))
        test_df =  pd.concat(periods).sort_values(by=["Month_start","Symbol"])
        test_df.loc[:,"Preform_returns"] = (test_df["Month_start_price"]-test_df["Preform_price"])*100/(test_df["Preform_price"])
        return  test_df
    test_df = form_strategy_df(trade_data)
    returns_df = {"Month_start":[],"Winners":[],"Losers":[],"Winners_minus_Losers":[]}
    
    for month_start in np.unique(test_df['Month_start']):
        df = test_df[test_df['Month_start'] == month_start]
        # get winners - top 10th percentile
        winners = df[df["Preform_returns"] > df["Preform_returns"].quantile(.9)]
        # get losers - bottom 10th percentile
        losers = df[df["Preform_returns"] < df["Preform_returns"].quantile(.1)]
        # calculate returns for individual stocks
        winners_returns = (winners["Hold_price"] - winners["Month_start_price"])/(winners["Month_start_price"])
        losers_returns = (losers["Month_start_price"] - losers["Hold_price"])/(losers["Hold_price"])
        # calculate returns porfolios (mean of individual stock returns)
        returns_df["Month_start"].append(month_start)
        returns_df["Winners"].append(np.mean(winners_returns))
        returns_df["Losers"].append(np.mean(losers_returns))
        returns_df["Winners_minus_Losers"].append((np.mean(winners_returns) + np.mean(losers_returns))/2 )
    returns_df = pd.DataFrame(returns_df)
    returns_df["Preform_Period"] = preform_period
    returns_df["Hold_Period"] = hold_period
    return returns_df

In [195]:
# Conduct t-test against mean returns of 0 for different combinations of preformation and holding periods
def calculate_t_test(trade_data):
    pre_hold_combo =[[3,3],
            [6,3],
            [3,6],
            [6,6],
            [6,9],
            [9,6],
            [9,9],
            [3,9],
            [9,3]]
    test_res = {"Preform":[],"Hold":[], "Strategy":[],"Mean Returns":[],"Stdev. Returns":[],"p-value":[],"t-stat": [] }
    for combo in pre_hold_combo:
        preform_period = combo[0]
        hold_period = combo[1]
        strat_rets = calculate_strategy_returns(trade_data,  preform_period, hold_period)
        for strat in ["Winners","Losers","Winners_minus_Losers"]:
            test_res["Preform"].append(preform_period)
            test_res["Hold"].append(hold_period)
            t_res = stats.ttest_1samp(strat_rets[strat],0.0)
            test_res["Strategy"].append(strat)
            test_res["Mean Returns"].append(np.mean(strat_rets[strat]))
            test_res["Stdev. Returns"].append(np.std(strat_rets[strat], ddof = 1))
            test_res["p-value"].append(t_res.pvalue)
            test_res["t-stat"].append(t_res.statistic)
    test_res = pd.DataFrame(test_res)
    return test_res

In [196]:
hnx_test_res  =  calculate_t_test(hnx_trades_15_20)
hnx_test_res

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-0.071637,0.111265,2e-06,-5.230626
1,3,3,Losers,-0.022041,0.143469,0.216471,-1.248103
2,3,3,Winners_minus_Losers,-0.046839,0.107254,0.000727,-3.547887
3,6,3,Winners,-0.040745,0.079052,0.000126,-4.091051
4,6,3,Losers,0.00409,0.118149,0.784431,0.274738
5,6,3,Winners_minus_Losers,-0.018328,0.076058,0.060416,-1.912642
6,3,6,Winners,-0.061353,0.101859,1.1e-05,-4.780852
7,3,6,Losers,0.045617,0.167977,0.035013,2.155492
8,3,6,Winners_minus_Losers,-0.007868,0.104927,0.553891,-0.595177
9,6,6,Winners,-0.047289,0.104723,0.000899,-3.497783


In [197]:
hose_test_res  =  calculate_t_test(hose_trades_15_20)
hose_test_res

Unnamed: 0,Preform,Hold,Strategy,Mean Returns,Stdev. Returns,p-value,t-stat
0,3,3,Winners,-0.042426,0.115015,0.003861,-2.996736
1,3,3,Losers,-0.019049,0.145464,0.291326,-1.063865
2,3,3,Winners_minus_Losers,-0.030737,0.100279,0.015334,-2.490174
3,6,3,Winners,-0.020179,0.100418,0.115803,-1.594975
4,6,3,Losers,0.023439,0.138704,0.184715,1.341302
5,6,3,Winners_minus_Losers,0.00163,0.089975,0.886115,0.143812
6,3,6,Winners,-0.019115,0.148585,0.311175,-1.0211
7,3,6,Losers,0.049871,0.183294,0.034681,2.15958
8,3,6,Winners_minus_Losers,0.015378,0.119151,0.309624,1.024406
9,6,6,Winners,-0.020962,0.146068,0.270807,-1.111629
