In [377]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../src')

# import pandas as pd
import fireducks.pandas as pd
import numpy as np

import time
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from utils.data_utils.data_handler import Handler
from utils.data_utils.data_transform import DataTransform
from portfolio.portfolio import Portfolio
from backtest.backtest import Backtest
from analytics.returns import calculate_portfolio_return
from analytics.plots import visualize_cummulative_returns


from glob import glob
import shutil
import logging

pd.options.plotting.backend = 'plotly'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [378]:
benchmark_csv_dir = '../data/benchmark/dc_performance.csv'
fundamental_csv_dir = '../data/fundamental_data/'
historical_csv_dir = '../data/historical_data/'
gics_csv_dir = '../data/gics/gics.csv'

handl = Handler(benchmark_csv_dir, fundamental_csv_dir, historical_csv_dir, gics_csv_dir)

In [379]:
benchmark = handl.get_benchmark_data() 
fun_data = handl.get_processed_fundamental_data() 
hist_data = handl.get_processed_historical_data() 

In [382]:
transform = DataTransform(benchmark, fun_data, hist_data, start_time='2013-07-01', end_time='2023-06-30')

In [383]:
long_daily, long_monthly = transform.get_transform_data()


divide by zero encountered in log



In [221]:
portf= Portfolio(long_daily, long_monthly)

## Momentum strategy - Non Abnormal Momentum Stocks

In [222]:
non_abnormal_bt = Backtest(portfolio=portf)
non_abnormal_bt.run(abnormal=False)

Backtest is running ...
|████████████████████████████████████████| 121/121 [100%] in 3:32.6 (0.57/s)    


In [223]:
non_abnormal_performnace_df = pd.concat(non_abnormal_bt.performance_dfs)

winner_performance, loser_performance, arbitrage_performace = calculate_portfolio_return(non_abnormal_performnace_df, cummulative=True)

visualize_cummulative_returns(winner_performance, loser_performance, arbitrage_performace)

## Momentum strategy - Abnormal Momentum Stocks

In [None]:
abnormal_bt = Backtest(portfolio=portf)
abnormal_bt.run(abnormal=True)

Backtest is running ...
|████████████████████████████████████████| 121/121 [100%] in 21:34.5 (0.09/s)   


In [670]:
abnormal_performnace_df = pd.concat(abnormal_bt.performance_dfs)

abnormal_winner_performance, abnormal_loser_performance, abnormal_arbitrage_performace = calculate_portfolio_return(abnormal_performnace_df)

visualize_cummulative_returns(abnormal_winner_performance, abnormal_loser_performance, abnormal_arbitrage_performace)

## Momentum strategy - Exclude Abnormal Momentum Stocks

In [449]:
exclude_abnormal_bt = Backtest(portfolio=portf)
exclude_abnormal_bt.run(abnormal=True, exclude_abnormal=True)

Backtest is running ...
|████████████████████████████████████████| 121/121 [100%] in 19:27.2 (0.10/s)   


In [450]:
exclude_abnormal_performnace_df = pd.concat(exclude_abnormal_bt.performance_dfs)

exclude_abnormal_winner_performance, exclude_abnormal_loser_performance, exclude_abnormal_arbitrage_performace = calcualte_portfolios_return(exclude_abnormal_performnace_df)

visualize_cummulative_returns(exclude_abnormal_winner_performance, exclude_abnormal_loser_performance, exclude_abnormal_arbitrage_performace)

## Portfolio Profiling

In [200]:
import quantstats_lumi as qs

qs.extend_pandas()

In [231]:
color_map = {
    'cmom_return': "#EEE8AA", 
    'mmom_return': "#20B2AA", 
    'tmom_return': "#DDA0DD", 
    'pmom_return': "#FF6F61"
}

fig = make_subplots(
    rows=1, 
    cols=2, 
    shared_yaxes=True,
    horizontal_spacing=0.03, 
    vertical_spacing=0.09,
    column_titles=['Loser Portfolio (Small-cap)', 'Abnormal Loser Portfolio (Small-cap)']
)

In [240]:
for col in loser_performance.xs('bottom', level=0).columns:  
    fig.add_trace(
        go.Scatter(
            x=loser_performance.xs('bottom', level=0).index,
            y=loser_performance.xs('bottom', level=0)[col],
            name=f'normal {col}',
            line=dict(color=color_map[col]),
            showlegend=False
        ),
        row=1,
        col=1
    )
    
for col in abnormal_loser_performance.xs('bottom', level=0).columns:  
    fig.add_trace(
        go.Scatter(
            x=abnormal_loser_performance.xs('bottom', level=0).index,
            y=abnormal_loser_performance.xs('bottom', level=0)[col],
            name=f'abnormal {col}',
            line=dict(color=color_map[col]),
            showlegend=False
        ),
        row=1,
        col=2
    )

fig.update_layout(
    title='Cummulative Return (Normal vs. Anormal Momentum Stocks)',
    title_x=.5,
    width=1400, 
    height=400,
    margin=dict(l=20, r=20, t=50, b=20),
    template='plotly_dark'
)
    
fig.show()


In [369]:
from empyrical import max_drawdown, annual_return, annual_volatility, sharpe_ratio, sortino_ratio, calmar_ratio, cum_returns
import pyfolio as pf


Module "zipline.assets" not found; multipliers will not be applied to position notionals.



In [668]:
non_abnormal_performnace_df.to_csv('../reports/performance/non_abnormal_performnace.csv')
abnormal_performnace_df.to_csv('../reports/performance/abnormal_performnace.csv')
exclude_abnormal_performnace_df.to_csv('../reports/performance/exclude_abnormal_performnace.csv')

In [667]:
non_abnormal_winner, non_abnormal_loser, _ = calculate_portfolio_return(non_abnormal_performnace_df, cummulative=False)
abnormal_winner, abnormal_loser, _  = calculate_portfolio_return(abnormal_performnace_df, cummulative=False)
exclude_abnormal_winner, exclude_abnormal_loser, _  =calculate_portfolio_return(exclude_abnormal_performnace_df, cummulative=False)

In [675]:
def create_portfolio_statistics(performance): 
    metrics = ["Annual return",
                "Annual volatility",
                "Max drawdown",
                "Sharpe ratio",
                "Sortino ratio",
                "Calmar ratio"]
    
    results = pd.DataFrame(index=metrics)
    
    for group, df in performance.groupby(level=0): 
        stats = [
            annual_return(df.pmom_return, period='monthly'),
            annual_volatility(df.pmom_return, period='monthly'),
            max_drawdown(df.pmom_return),
            sharpe_ratio(df.pmom_return, period='monthly'),
            sortino_ratio(df.pmom_return, period='monthly'),
            calmar_ratio(df.pmom_return, period='monthly')
        ]    
        results[group] = stats
    
    return results

In [361]:
def create_portfolio_statistics(returns): 
    stats = [
        annual_return(returns, period='monthly'),
        annual_volatility(returns, period='monthly'),
        max_drawdown(returns),
        sharpe_ratio(returns, period='monthly'),
        sortino_ratio(returns, period='monthly'),
        calmar_ratio(returns, period='monthly')
    ]    
    
    return stats

In [576]:
metrics = ["Annual return",
            "Annual volatility",
            "Max drawdown",
            "Sharpe ratio",
            "Sortino ratio",
            "Calmar ratio"]
portfolio_stats = pd.DataFrame(index=metrics)

for group, df in non_abnormal_loser.groupby(level=0): 
    portfolio_stats[group] = create_portfolio_statistics(df.pmom_return)

print('Performance of non-abnormal momentum stocks:')
portfolio_stats

Performance of non-abnormal momentum stocks:


Unnamed: 0,bottom,mid,top
Annual return,0.06745,0.037813,0.014023
Annual volatility,0.072997,0.082304,0.068252
Max drawdown,-0.164239,-0.21132,-0.169579
Sharpe ratio,0.932958,0.4922,0.238095
Sortino ratio,1.541658,0.806329,0.345925
Calmar ratio,0.410684,0.178938,0.08269


In [646]:
metrics = ["Annual return",
            "Annual volatility",
            "Max drawdown",
            "Sharpe ratio",
            "Sortino ratio",
            "Calmar ratio"]
portfolio_stats = pd.DataFrame(index=metrics)

for group, df in abnormal_loser.groupby(level=0): 
    portfolio_stats[group] = create_portfolio_statistics(df.pmom_return)

print('Performance of abnormal momentum stocks:')
portfolio_stats

Performance of abnormal momentum stocks:


Unnamed: 0,bottom,mid,top
Annual return,0.097605,0.022304,-0.046063
Annual volatility,0.134994,0.147537,0.183229
Max drawdown,-0.29709,-0.338112,-0.510182
Sharpe ratio,0.761924,0.224665,-0.144525
Sortino ratio,1.157024,0.31711,-0.168048
Calmar ratio,0.328536,0.065966,-0.090288


In [578]:
metrics = ["Annual return",
            "Annual volatility",
            "Max drawdown",
            "Sharpe ratio",
            "Sortino ratio",
            "Calmar ratio"]
portfolio_stats = pd.DataFrame(index=metrics)

for group, df in exclude_abnormal_loser.groupby(level=0): 
    portfolio_stats[group] = create_portfolio_statistics(df.pmom_return)

print('Performance of exclude abnormal momentum stocks:')
portfolio_stats

Performance of exclude abnormal momentum stocks:


Unnamed: 0,bottom,mid,top
Annual return,0.028674,0.026863,0.024316
Annual volatility,0.03623,0.036489,0.031171
Max drawdown,-0.080786,-0.065847,-0.063412
Sharpe ratio,0.798793,0.744983,0.786757
Sortino ratio,1.781881,1.563625,1.641692
Calmar ratio,0.354939,0.407961,0.383456


## Adjust Stategy

In [605]:
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from hdbscan import HDBSCAN

import numpy as np
import matplotlib.pyplot as plt

feature_cols = ['CMOM', 'MMOM', 'TMOM', 'PMOM']
feature_dfs = non_abnormal_bt.portfolio_features

feature = feature_dfs[0][feature_cols].values

sc = StandardScaler()
scaled_feature = sc.fit_transform(feature)

In [606]:
results = []

min_sample_grid = np.arange(5, 35)
min_cluster_size_grid = np.arange(5, 35)

for min_sample in min_sample_grid: 
    for min_cluster_size in min_cluster_size_grid: 
        clf = HDBSCAN(min_samples=min_sample, min_cluster_size=min_cluster_size)
        clf.fit(scaled_feature)
        if len(clf.cluster_persistence_) == 0:
            continue
        results.append([min_sample, min_cluster_size, clf.cluster_persistence_.mean()])

In [607]:
best_min_sample, best_min_cluster_size, cluster_persistence_score = sorted(results, key=lambda x : x[-1], reverse=True)[0]
print("Best min samples: ", best_min_sample)
print("Best min cluster size: ",best_min_cluster_size)
print("Persistence score for parameter pairs: ", cluster_persistence_score)

Best min samples:  5
Best min cluster size:  18
Persistence score for parameter pairs:  0.16335188216949303


In [635]:
from umap import UMAP
import plotly.express as px

clf = HDBSCAN(min_samples=6, min_cluster_size=24)
clf.fit(scaled_feature)

temp = feature_dfs[0][feature_cols].copy()
temp['label'] = clf.labels_

umap_2d = UMAP(n_components=2, init='random', random_state=0)
umap_3d = UMAP(n_components=3, init='random', random_state=0)

proj_2d = umap_2d.fit_transform(scaled_feature)
proj_3d = umap_3d.fit_transform(scaled_feature)

fig_2d = px.scatter(
    proj_2d, x=0, y=1,
    color=temp.label, labels={'color': 'label'}
)
fig_3d = px.scatter_3d(
    proj_3d, x=0, y=1, z=2,
    color=temp.label, labels={'color': 'label'}
)
fig_3d.update_traces(marker_size=5)

fig_2d.show()
fig_3d.show()


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



In [634]:
class AbnormalMomentumStatey: 
    def __hyperparameter_search(self, scaled_feature): 
        results = []

        min_sample_grid = np.arange(5, 35)
        min_cluster_size_grid = np.arange(5, 35)

        for min_sample in min_sample_grid: 
            for min_cluster_size in min_cluster_size_grid: 
                
                clf = HDBSCAN(min_samples=min_sample, min_cluster_size=min_cluster_size)
                clf.fit(scaled_feature)
                
                if len(clf.cluster_persistence_) == 0:
                    continue
                
                results.append([min_sample, min_cluster_size, clf.cluster_persistence_.mean()]) 
        
        return sorted(results, key=lambda x : x[-1], reverse=True)[0]
    
    def find_cluster(self, formed_portfolio): 
        feature_cols = ['CMOM', 'MMOM', 'TMOM', 'PMOM']
        
        feature = formed_portfolio[feature_cols].values

        sc = StandardScaler()
        scaled_feature = sc.fit_transform(feature)
        
        best_min_sample, best_min_cluster_size, _ = self.__hyperparameter_search(scaled_feature)
        
        clf = HDBSCAN(min_samples=best_min_sample, min_cluster_size=best_min_cluster_size)
        clf.fit(scaled_feature)
        
        return clf.labels_

In [146]:
ams = AbnormalMomentumStatey()

In [147]:
temp_2 = feature_dfs[1]

temp_2['mom_label'] = ams.find_cluster(feature_dfs[1])

In [None]:
non_abnormal_performnace_df.xs('bottom', level=2).groupby(level=0, group_keys=False)[['pmom_weights', 'r']].apply(lambda df: df['pmom_weights'].dot(df['r']))

time
2013-12-31   -0.019529
2014-01-31   -0.005888
2014-02-28    0.012113
2014-03-31    0.010248
2014-04-30    0.007890
                ...   
2023-08-31    0.002047
2023-09-30   -0.005752
2023-10-31   -0.000794
2023-11-30   -0.001586
2023-12-31    0.000266
Length: 121, dtype: float64

In [64]:
temp_small = (non_abnormal_performnace_df[['pmom_weights', 'r']]
             .xs('bottom', level=2)
             .query('pmom_weights < 0')
             )
temp_small['pmom_weights'] = -temp_small['pmom_weights']

In [62]:
temp_small['pmom_weights'].groupby(level=0, group_keys=False).apply(lambda x: x.rank(method='max', ascending=False, pct=True).sort_values())

time        ticker
2013-12-31  SMT       0.019608
            CYC       0.039216
            DNM       0.058824
            S55       0.078431
            SAV       0.098039
                        ...   
2023-12-31  MAC       0.964286
            KSQ       0.973214
            PFL       0.982143
            C47       0.991071
            GMX       1.000000
Name: pmom_weights, Length: 7787, dtype: float64

In [68]:
temp_small.xs('2013-12-31', level=0).pmom_weights.hist() 

In [224]:
from scipy.special import softmax
np.set_printoptions(precision=5)

In [225]:
smallcap_performance = (non_abnormal_performnace_df
                            .query('cap_groups == "bottom" and pmom_weights < 0')
                            .groupby(level=0, group_keys=False)
                            .apply(lambda df: df.nsmallest(n=round(len(df)*.3), columns='pmom_weights'))
                            [['pmom_weights', 'r']]
                        )                           

In [241]:
sml_ret = smallcap_performance['pmom_weights'].groupby(level=0).transform(lambda x: softmax(x)).mul(smallcap_performance['r']).groupby(level=0, group_keys=False).apply(np.sum)

### Scrapping Vietnam 10 Year Government Bond Yield

In [343]:
from websocket import create_connection
import json

def create_msg(ws, fun, arg): 
    ms = json.dumps({"m": fun, "p": arg})
    msg = "~m~" + str(len(ms)) + "~m~" + ms
    ws.send(msg)
    
def clean_data(data): 
    extracted_data = json.loads(data[data.find('"s":[')+4:data.find(',"ns"')])
    cleaned_data = []
    for item in extracted_data:
        cleaned_data.append(item['v'])
    df = pd.DataFrame(cleaned_data, columns=['time', 'open', 'high', 'low', 'close'])
    df['time'] = pd.to_datetime(df['time'], unit='s').dt.date
    
    return df
    
# Init WebSocketURL
socket = "wss://data.tradingview.com/socket.io/websocket"

# Create connection to WebSocket
ws = create_connection(socket)

# Create a new chart session
create_msg(ws, fun="chart_create_session", arg=["cs_WwurjQQG1oBH",""])
# Idenify stock symbol
create_msg(ws, fun="resolve_symbol", arg=["cs_WwurjQQG1oBH","sds_sym_1","={\"adjustment\":\"splits\",\"symbol\":\"TVC:VN10Y\"}"])
# Create price series
create_msg(ws, fun="create_series", arg=["cs_WwurjQQG1oBH","sds_1","s1","sds_sym_1","1D",10000,""])

while True: 
    res = ws.recv()
    print(res)
    if "series_completed" in res: 
        break
    
cleaned_data = clean_data(res)

~m~292~m~{"session_id":"0.30476.858_hkg1-charts-free-3-tvbs-166mu-1","timestamp":1739722529,"timestampMs":1739722529947,"release":"release_208-46","studies_metadata_hash":"95427ddd2ab57743f205aa54f28d5e49a228f47d","auth_scheme_vsn":2,"protocol":"json","via":"45.135.231.57:443","javastudies":["3.66"]}
~m~95~m~{"m":"series_loading","p":["cs_WwurjQQG1oBH","sds_1","s1"],"t":1739722530,"t_ms":1739722530058}~m~1297~m~{"m":"symbol_resolved","p":["cs_WwurjQQG1oBH","sds_sym_1",{"source2":{"country":"US","description":"TVC","exchange-type":"exchange","id":"TVC","name":"TVC","url":"https://tradingview.com"},"source_id":"TVC","subsession_id":"regular","provider_id":"refinitiv","country":"VN","format":"percent","formatter":"percent","pro_perm":"","value_unit_id":"PCT","measure":"unit","allowed_adjustment":"none","short_description":"Vietnam 10Y yield","variable_tick_size":"","name":"VN10Y","full_name":"TVC:VN10Y","pro_name":"TVC:VN10Y","base_name":["TVC:VN10Y"],"description":"Vietnam 10 Year Govern

In [344]:
cleaned_data = (cleaned_data
                .assign(
                    time=pd.to_datetime(cleaned_data['time'])
                )
                .set_index('time')
                ['2013-01-01':'2023-12-31']
                .resample('ME')
                .last()
                .ffill()
                [['close']]
                .apply(lambda x: x.add(1).pow(1/12).sub(1).div(100))
                )

In [346]:
cleaned_data.rename(columns={'close': 'rf'}, inplace=True)

In [347]:
(cleaned_data
 .merge(sml_ret.to_frame('sml_ret'), left_index=True, right_index=True)
 .assign(
     excess_ret=lambda x: x['sml_ret'] - x['rf']
 )).excess_ret.cumsum().plot()

In [376]:
metrics = ["Annual return",
            "Annual volatility",
            "Max drawdown",
            "Sharpe ratio",
            "Sortino ratio",
            "Calmar ratio"]
portfolio_stats = pd.DataFrame(index=metrics)
portfolio_stats['sml_stock'] = create_portfolio_statistics(sml_ret)

print('Performance of abnormal momentum stocks:')
portfolio_stats

Performance of abnormal momentum stocks:


Unnamed: 0,sml_stock
Annual return,0.352098
Annual volatility,0.303998
Max drawdown,-0.56323
Sharpe ratio,1.1509
Sortino ratio,2.273581
Calmar ratio,0.62514
