In [806]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../src')

# import pandas as pd
import fireducks.pandas as pd
import numpy as np

import time
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from utils.data_utils.data_handler import Handler
from utils.data_utils.data_transform import DataTransform
from portfolio.portfolio import Portfolio
from backtest.backtest import Backtest
from analytics.returns import calculate_portfolio_return
from analytics.plots import visualize_cummulative_returns


from glob import glob
import shutil
import logging

pd.options.plotting.backend = 'plotly'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [807]:
benchmark_csv_dir = '../data/benchmark/dc_performance.csv'
fundamental_csv_dir = '../data/fundamental_data/'
historical_csv_dir = '../data/historical_data/'
gics_csv_dir = '../data/gics/gics.csv'

handl = Handler(benchmark_csv_dir, fundamental_csv_dir, historical_csv_dir, gics_csv_dir)

In [808]:
benchmark = handl.get_benchmark_data() 
fun_data = handl.get_processed_fundamental_data() 
hist_data = handl.get_processed_historical_data() 

In [809]:
transform = DataTransform(benchmark, fun_data, hist_data, start_time='2013-01-01', end_time='2023-12-31')

In [810]:
long_daily, long_monthly = transform.get_transform_data()


divide by zero encountered in log



In [811]:
portf= Portfolio(long_daily, long_monthly)

## Price behaviour of different market capitalization groups

In [812]:
bt = Backtest(portfolio=portf)
bt.run(abnormal=False)

Backtest is running ...
|████████████████████████████████████████| 121/121 [100%] in 4:01.2 (0.50/s)    


In [813]:
performance_df = pd.concat(bt.performance_dfs)

winner_performance, loser_performance, arbitrage_performace = calculate_portfolio_return(performance_df, cummulative=True)

visualize_cummulative_returns(winner_performance, loser_performance, arbitrage_performace)

## Portfolio Performance Evaluation

In [814]:
from empyrical import max_drawdown, annual_return, annual_volatility, sharpe_ratio, sortino_ratio, calmar_ratio, cum_returns
import pyfolio as pf


In [815]:
performance_df.to_csv('../reports/performance/non_abnormal_performnace.csv')

[None]

In [875]:
winner_ret, loser_ret, _ = calculate_portfolio_return(performance_df, cummulative=False)

In [880]:
def create_portfolio_statistics(performance, level=0): 
    metrics = ["Annual return",
                "Annual volatility",
                "Max drawdown",
                "Sharpe ratio",
                "Sortino ratio",
                "Calmar ratio"]
    
    results = pd.DataFrame(index=metrics)
    
    for group, df in performance.groupby(level=level): 
        stats = [
            annual_return(df.pmom_return, period='monthly'),
            annual_volatility(df.pmom_return, period='monthly'),
            max_drawdown(df.pmom_return),
            sharpe_ratio(df.pmom_return, period='monthly'),
            sortino_ratio(df.pmom_return, period='monthly'),
            calmar_ratio(df.pmom_return, period='monthly')
        ]    
        results[group] = stats
    
    return results

In [881]:
create_portfolio_statistics(loser_ret)

Unnamed: 0,bottom,mid,top
Annual return,0.06745,0.037813,0.014023
Annual volatility,0.072997,0.082304,0.068252
Max drawdown,-0.164239,-0.21132,-0.169579
Sharpe ratio,0.932958,0.4922,0.238095
Sortino ratio,1.541658,0.806329,0.345925
Calmar ratio,0.410684,0.178938,0.08269


## Adjusting Strategy for Vietnamese Market

In [819]:
from scipy.special import softmax
np.set_printoptions(precision=5)

In [900]:
smallcap_performance = (performance_df
                            .query('cap_groups == "bottom" and pmom_weights < 0')
                            .groupby(level=0, group_keys=False)
                            .apply(lambda df: df.nsmallest(n=round(len(df)*.3), columns='pmom_weights'))
                            [['pmom_weights', 'r']]
                        )                           

In [901]:
sml_ret = smallcap_performance['pmom_weights'].groupby(level=0).transform(lambda x: softmax(x)).mul(smallcap_performance['r']).groupby(level=0, group_keys=False).apply(np.sum)

In [865]:
sml_ret = smallcap_performance['pmom_weights'].groupby(level=0).transform(lambda x: softmax(x)).mul(smallcap_performance['r']).to_frame('pmom_return')

In [899]:
loser_ret

Unnamed: 0_level_0,Unnamed: 1_level_0,cmom_return,mmom_return,tmom_return,pmom_return
cap_groups,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
bottom,2013-12-31,0.000993,0.014336,-0.000458,0.016424
bottom,2014-01-31,0.005887,0.009505,0.001565,0.013662
bottom,2014-02-28,0.022265,0.012360,0.005335,0.015703
bottom,2014-03-31,0.031344,0.019729,0.006061,0.028054
bottom,2014-04-30,-0.015644,-0.021642,-0.004283,-0.028976
...,...,...,...,...,...
top,2023-08-31,0.000961,0.001738,0.000263,0.006162
top,2023-09-30,-0.004516,-0.003848,-0.001032,-0.005311
top,2023-10-31,-0.011033,-0.018038,-0.002637,-0.022555
top,2023-11-30,0.007197,0.015571,0.001639,0.018351


### Scrapping Vietnam 10 Year Government Bond Yield

In [837]:
from websocket import create_connection
import json

def create_msg(ws, fun, arg): 
    ms = json.dumps({"m": fun, "p": arg})
    msg = "~m~" + str(len(ms)) + "~m~" + ms
    ws.send(msg)
    
def clean_data(data): 
    extracted_data = json.loads(data[data.find('"s":[')+4:data.find(',"ns"')])
    cleaned_data = []
    for item in extracted_data:
        cleaned_data.append(item['v'])
    df = pd.DataFrame(cleaned_data, columns=['time', 'open', 'high', 'low', 'close'])
    df['time'] = pd.to_datetime(df['time'], unit='s').dt.date
    
    return df
    
# Init WebSocketURL
socket = "wss://data.tradingview.com/socket.io/websocket"

# Create connection to WebSocket
ws = create_connection(socket)

# Create a new chart session
create_msg(ws, fun="chart_create_session", arg=["cs_WwurjQQG1oBH",""])
# Idenify stock symbol
create_msg(ws, fun="resolve_symbol", arg=["cs_WwurjQQG1oBH","sds_sym_1","={\"adjustment\":\"splits\",\"symbol\":\"TVC:VN10Y\"}"])
# Create price series
create_msg(ws, fun="create_series", arg=["cs_WwurjQQG1oBH","sds_1","s1","sds_sym_1","1D",10000,""])

while True: 
    res = ws.recv()
    print(res)
    if "series_completed" in res: 
        break
    
cleaned_data = clean_data(res)

~m~293~m~{"session_id":"0.11657.7694_tyo1-charts-free-4-tvbs-x88l9-2","timestamp":1740037794,"timestampMs":1740037794600,"release":"release_208-46","studies_metadata_hash":"3c98d9a6ed119cb56b13fd9057ec77e2efdce8f3","auth_scheme_vsn":2,"protocol":"json","via":"92.223.46.148:443","javastudies":["3.66"]}
~m~95~m~{"m":"series_loading","p":["cs_WwurjQQG1oBH","sds_1","s1"],"t":1740037794,"t_ms":1740037794763}~m~1297~m~{"m":"symbol_resolved","p":["cs_WwurjQQG1oBH","sds_sym_1",{"source2":{"country":"US","description":"TVC","exchange-type":"exchange","id":"TVC","name":"TVC","url":"https://tradingview.com"},"source_id":"TVC","subsession_id":"regular","provider_id":"refinitiv","country":"VN","format":"percent","formatter":"percent","pro_perm":"","value_unit_id":"PCT","measure":"unit","allowed_adjustment":"none","short_description":"Vietnam 10Y yield","variable_tick_size":"","name":"VN10Y","full_name":"TVC:VN10Y","pro_name":"TVC:VN10Y","base_name":["TVC:VN10Y"],"description":"Vietnam 10 Year Gover

In [838]:
rf = (cleaned_data
                .assign(
                    time=pd.to_datetime(cleaned_data['time'])
                )
                .set_index('time')
                ['2013-01-01':'2023-12-31']
                .resample('ME')
                .last()
                .ffill()
                [['close']]
                .apply(lambda x: x.add(1).pow(1/12).sub(1).div(100))
                )

In [839]:
rf.rename(columns={'close': 'rf'}, inplace=True)

In [840]:
excess_ret = (rf
            .merge(sml_ret.to_frame('sml_ret'), left_index=True, right_index=True)
            .assign(
                excess_ret=lambda x: x['sml_ret'] - x['rf']
            )).excess_ret
excess_ret.cumsum().plot()

In [841]:
sharpe_ratio(sml_ret, risk_free=0.002186, period='monthly')

1.0646103697833522

### Portfolio optimization

In [842]:
import cvxpy as cp

In [843]:
def get_optimal_weights(covariance_matrix, momentum_weights,scale=2.0): 
    x = cp.Variable(len(momentum_weights))
    P = cp.quad_form(x, covariance_matrix.to_numpy())
    distance_term = cp.norm2(x-momentum_weights)
    objective_func = cp.Minimize(P + scale*distance_term)
    constraints = [
        x >= 0,
        cp.sum(x) == 1
    ]
    problem = cp.Problem(objective_func, constraints)
    problem.solve()
    optimal_weights = x.value
    
    return optimal_weights

In [844]:
formed_portfolio = bt.portfolio_features

In [845]:
mom_weights = smallcap_performance['pmom_weights'].groupby(level=0).transform(lambda x: softmax(x))

In [846]:
mom_weights_dfs = [mom_weights.loc[idx] for idx in mom_weights.index.get_level_values(0).unique()]

idx = pd.IndexSlice

cov_matrix_dfs = []

for i in range(len(mom_weights_dfs)):
    cov_matrix = formed_portfolio[i]['r'].unstack(level=1)[mom_weights_dfs[i].index.get_level_values(0)].cov()
    cov_matrix_dfs.append(cov_matrix)

In [847]:
def optimize_portfolio(cov_matrix_dfs, mom_weights_dfs, scale=.01):
    optimal_weights_dfs = [] 
    for i in range(len(cov_matrix_dfs)):
        optimal_weights = get_optimal_weights(cov_matrix_dfs[i], mom_weights_dfs[i], scale)
        optimal_weights_dfs.append(optimal_weights)
    
    return optimal_weights_dfs

In [848]:
optimal_weights_dfs = optimize_portfolio(cov_matrix_dfs, mom_weights_dfs, scale=.01)
optimal_weights_df = pd.DataFrame(optimal_weights_dfs)
optimal_weights_df = optimal_weights_df.stack().to_frame('optimal_weight').set_index(smallcap_performance.droplevel(2).index)
optimized_ret = smallcap_performance['r'].mul(optimal_weights_df['optimal_weight']).groupby(level=0).sum()

In [849]:
optimal_weights_df['optimal_weight'].mul(smallcap_performance['r']).groupby(level=0).sum().cumsum().plot()
optimized_ret.cumsum().plot()