In [1]:
import os
import math
import random 
import pandas as pd
import numpy as np
import datetime as dt
from pandas_datareader import data as pdr
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from Util_def import *
from Util_model import *
from pypfopt import (
    EfficientFrontier,
    risk_models,
    expected_returns,
    objective_functions,
)

import warnings
warnings.filterwarnings('ignore')


Devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU details:  {'device_name': 'METAL'}


In [2]:
ETF_list = [
    'SHV',
    'BND', 'BNDX', 'JNK',
    'VT', 'VEA', 'IEMG',
    'VOO', 'QQQ', 'DIA', 'VGK', 'EWJ', 'MCHI', 'THD', 'VNM', 'INDA',
    'RXI', 'KXI', 'IXC', 'IXG', 'IXJ', 'EXI', 'IXN', 'IXP', 'JXI',
    'ITA', 'ICLN', 'SKYY', 'SMH',
    'REET', 'IGF', 'PDBC', 'GLD'
]

# 5 years data
startDate = dt.datetime(2015, 1, 1)
endDate = dt.datetime(2024, 12, 31)

start_rebalance_year = 2020  # startDate.year + 3

data = getData(ETF_list, startDate, endDate)
data.fillna(method='ffill', inplace=True)
data.fillna(method='bfill', inplace=True)
print(data.info())
avg_days = avg_days_per_month(data)

print("=" * 50)
print("Min Date:", data.index.min())
print("Max Date:", data.index.max())
print("Start Rebalance Year:", start_rebalance_year)
print(f"Average number of trading days per month: {avg_days}", "days")
print("=" * 50)


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  33 of 33 completed

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2515 entries, 2015-01-02 to 2024-12-30
Data columns (total 33 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   SHV     2515 non-null   float64
 1   BND     2515 non-null   float64
 2   BNDX    2515 non-null   float64
 3   JNK     2515 non-null   float64
 4   VT      2515 non-null   float64
 5   VEA     2515 non-null   float64
 6   IEMG    2515 non-null   float64
 7   VOO     2515 non-null   float64
 8   QQQ     2515 non-null   float64
 9   DIA     2515 non-null   float64
 10  VGK     2515 non-null   float64
 11  EWJ     2515 non-null   float64
 12  MCHI    2515 non-null   float64
 13  THD     2515 non-null   float64
 14  VNM     2515 non-null   float64
 15  INDA    2515 non-null   float64
 16  RXI     2515 non-null   float64
 17  KXI     2515 non-null   float64
 18  IXC     2515 non-null   float64
 19  IXG     2515 non-null   float64
 20  IXJ     2515 non-null   float64
 21  EXI     2515 non-nu




In [3]:
# data.to_csv('33_ETF_data.csv', index=True)

In [4]:
###### Portfolio Type ######
long_only = tuple([0,1])
long_short = tuple([-1,1])

port_type = long_only 

###### Adding Constraints ######
# Asset Mapping
asset_map = {
    'SHV': 'Cash_Equivalent',
    
    'BND': 'Fixed_Income',
    'BNDX': 'Fixed_Income',
    'JNK': 'Fixed_Income',

    'VT': 'Equity',
    'VEA': 'Equity',
    'IEMG': 'Equity',

    'VOO': 'Equity',
    'QQQ': 'Equity',
    'DIA': 'Equity',
    'VGK': 'Equity',
    'EWJ': 'Equity',
    'MCHI': 'Equity',
    'THD': 'Equity',
    'VNM': 'Equity',
    'INDA': 'Equity',

    'RXI': 'Equity',
    'KXI': 'Equity',
    'IXC': 'Equity',
    'IXG': 'Equity',
    'IXJ': 'Equity',
    'EXI': 'Equity',
    'IXN': 'Equity',
    'IXP': 'Equity',
    'JXI': 'Equity',

    'ITA': 'Equity',
    'ICLN': 'Equity',
    'SKYY': 'Equity',
    'SMH': 'Equity',

    'REET': 'Alternatives',
    'IGF': 'Alternatives',
    'PDBC': 'Alternatives',
    'GLD': 'Alternatives',
}

### Aggressive Portfolio ###
asset_lower_aggressive = {
    'Cash_Equivalent': 0.0,
    'Fixed_Income': 0.0,
    'Equity': 0.55,
    'Alternatives': 0.0}
asset_upper_aggressive = {
    'Cash_Equivalent': 0.4,
    'Fixed_Income': 0.3,
    'Equity': 0.9,
    'Alternatives': 0.3}

len(ETF_list), len(asset_map)

(33, 33)

# Training

In [5]:
# Directory to save the results
output_dir = 'Results'
model_type = 'Transformer'  # 'Transformer' or 'LSTM'
pe_type = 'OriPE'          
# 'OriPE', 'Time2Vec', 
# 'ConvSPE', 'SineSPE', 
# 'TemporalPE', 'LearnablePE', 
# 'AbsolutePE', 'tAPE'
# norm_type = 'QP'            # 'QP' or 'Iter'
pre_post = 'PreNorm'       # 'PostNorm' or 'PreNorm'
n_temp = 1.0
train_type = f'01_1_{model_type}_{pe_type}_{pre_post}_temp_{n_temp}'  # '01_1', '01_2', '01_3', etc.
run_no = 551


# --- เริ่มโค้ดสำหรับบันทึก Excel ---
results_excel_path = f"{output_dir}/{train_type}/01_Results_{model_type}_{pe_type}_{pre_post}_run_{run_no}.xlsx"


In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input, Dropout, LayerNormalization, MultiHeadAttention, Embedding, GlobalAveragePooling1D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K
from dateutil.parser import parse
from tensorflow.keras import layers, Model
from tensorflow.keras.models import Model as KModel, Sequential
from tensorflow.keras import layers, Model as KModel
import keras
from tensorflow.keras.layers import Layer
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import cvxpy as cp
import cvxopt



devices = tf.config.list_physical_devices()
print("\nDevices: ", devices)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  details = tf.config.experimental.get_device_details(gpus[0])
  print("GPU details: ", details)
warnings.filterwarnings('ignore')


Devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU details:  {'device_name': 'METAL'}


Quadratic Programming (QP) method

In [7]:
# Transformer - Ori
class Model:
    def __init__(self, max_weight=1, asset_map=None, asset_lower=None, asset_upper=None, port_type=None):
        self.data = None
        self.model = None
        self.max_weight = max_weight
        self.asset_map = asset_map or {}
        self.asset_lower = asset_lower or {}
        self.asset_upper = asset_upper or {}
        self.port_type = port_type
        self.asset_columns = None

    def _create_constraint_matrices(self, columns):
        """Create constraint matrices for asset‐type bounds."""
        self.asset_columns = columns
        
        asset_types = {}
        for asset in columns:
            t = self.asset_map.get(asset, "Unknown")
            asset_types.setdefault(t, []).append(asset)
        
        mats = []
        lbs = []
        ubs = []
        names = []
        for t, assets in asset_types.items():
            vec = np.zeros(len(columns), dtype=float)
            for a in assets:
                idx = columns.get_loc(a)
                vec[idx] = 1.0
            mats.append(vec)
            lbs.append(self.asset_lower.get(t, 0.0))
            ubs.append(self.asset_upper.get(t, 1.0))
            names.append(t)
        
        self.constraint_matrix = np.vstack(mats)        # shape = (n_types, n_assets)
        self.lower_bounds = np.array(lbs, dtype=float)  # shape = (n_types,)
        self.upper_bounds = np.array(ubs, dtype=float)  # shape = (n_types,)
        self.asset_type_names = names

    # 2-phase QP with constraints (fixing zeros)
    def _apply_constraints_row_qp(self, weights_row: np.ndarray,
                                tol_zero: float = 1e-12) -> np.ndarray:
        """
        2-phase QP:
        • Phase-1 : fix original zeros (bounds = (0,0)).
        • Phase-2 : if infeasible, use normal bounds (allow zeros to move).
        """
        w_initial = weights_row.copy()
        n_assets  = len(w_initial)

        # ---------- (0) helper ----------
        def build_bounds(fix_zero=False):
            """return list[(lb,ub)] for each asset"""
            bounds = []
            for asset_name, w0 in zip(self.asset_columns, w_initial):
                is_zero = w0 <= tol_zero
                lb_def  = self.port_type[0]   # 0
                ub_def  = 0.40 if asset_name == "SHV" else 0.30
                if fix_zero and is_zero:
                    bounds.append((0.0, 0.0))        # lock at 0
                else:
                    bounds.append((lb_def, ub_def))
            return bounds

        def objective(w, penalty=None):
            """weighted least-squares distance"""
            if penalty is None:
                return np.sum((w - w_initial)**2)
            return np.sum(penalty * (w - w_initial)**2)

        # constraints definition (เหมือนเดิม)
        constraints = [
            {'type': 'eq',   'fun': lambda w: np.sum(w) - 1.0},
            {'type': 'ineq', 'fun': lambda w: self.constraint_matrix.dot(w) - self.lower_bounds},
            {'type': 'ineq', 'fun': lambda w: self.upper_bounds - self.constraint_matrix.dot(w)},
        ]

        # ---------- (1) PHASE-1 : fix zeros ----------
        result = minimize(
            fun=objective,
            x0=w_initial,
            method='SLSQP',
            bounds=tuple(build_bounds(fix_zero=True)),
            constraints=constraints,
            options={'ftol': 1e-9, 'disp': False}
        )

        # ---------- (2) if fail → PHASE-2 ----------
        if not result.success:
            result = minimize(
                fun=objective,
                x0=w_initial,
                method='SLSQP',
                bounds=tuple(build_bounds(fix_zero=False)),
                constraints=constraints,
                options={'ftol': 1e-9, 'disp': False}
            )

        # ---------- (3) fallback ----------
        if result.success:
            w = result.x
        else:
            print(f"⚠️ QP failed (both phases). Using clipped normalisation.")
            lb_ub = build_bounds(fix_zero=False)
            w = np.clip(w_initial, *zip(*lb_ub))

        # normaliseให้ผลรวม = 1
        s = np.sum(w)
        return w / s if s > 0 else np.ones(n_assets) / n_assets

    # _apply_constraints_row_cvxpy
    def _apply_constraints_row_cvxpy(self, weights_row, tol_zero=1e-6, lambda_zero=100.0):
        """
        Applies two-phase QP constraints to a single row of weights.
        - Phase 1: Attempts to solve with a hard lock on zero-weight assets.
        - Phase 2: If Phase 1 is infeasible, it releases the lock and instead
                   applies a heavy penalty for moving away from zero.
        """
        # ---------- Data Preparation for a single row ----------
        w0 = np.clip(weights_row, *self.port_type)
        n = len(w0)
        lower_w, _ = self.port_type

        # Upper bound vector (0.30, except 0.40 for SHV)
        ub_vec = np.full(n, 0.30)
        if 'SHV' in self.asset_columns:
            shv_idx = self.asset_columns.get_loc('SHV')
            ub_vec[shv_idx] = 0.40

        # Indices of assets with near-zero initial weights
        zero_idx = np.where(w0 <= tol_zero)[0]

        # ---------- Phase 1: Solve QP with locked zeros ----------
        w_p1 = cp.Variable(n)
        
        # Base constraints + hard lock on zeros
        cons_p1 = [w_p1 >= lower_w, w_p1 <= ub_vec]
        if len(zero_idx) > 0:
            cons_p1.append(w_p1[zero_idx] == 0)

        # Add sum-to-one and group constraints
        if self.port_type == long_only:
            cons_p1.append(cp.sum(w_p1) == 1)
        if hasattr(self, 'constraint_matrix'):
            cm, lb, ub = self.constraint_matrix, self.lower_bounds, self.upper_bounds
            cons_p1.extend([cm @ w_p1 >= lb, cm @ w_p1 <= ub])
        
        obj_p1 = cp.Minimize(cp.sum_squares(w_p1 - w0))
        prob_p1 = cp.Problem(obj_p1, cons_p1)
        prob_p1.solve(solver=cp.OSQP, warm_start=True)

        if prob_p1.status in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
            return np.array(w_p1.value).flatten()

        # ---------- Phase 2: Solve QP with penalty if Phase 1 failed ----------
        print("⚠️ Strict (zero-locked) QP failed. Falling back to penalty-based QP.")
        w_p2 = cp.Variable(n)
        
        # Base constraints without the hard lock
        cons_p2 = [w_p2 >= lower_w, w_p2 <= ub_vec]
        if self.port_type == long_only:
            cons_p2.append(cp.sum(w_p2) == 1)
        if hasattr(self, 'constraint_matrix'):
            cm, lb, ub = self.constraint_matrix, self.lower_bounds, self.upper_bounds
            cons_p2.extend([cm @ w_p2 >= lb, cm @ w_p2 <= ub])
            
        # Softer objective with penalty for moving zero-weight assets
        penalty = np.ones(n)
        penalty[zero_idx] = lambda_zero
        sqrt_p = np.sqrt(penalty)
        obj_p2 = cp.Minimize(cp.sum_squares(cp.multiply(sqrt_p, w_p2 - w0)))
        
        prob_p2 = cp.Problem(obj_p2, cons_p2)
        prob_p2.solve(solver=cp.OSQP, warm_start=True)

        if prob_p2.status in [cp.OPTIMAL, cp.OPTIMAL_INACCURATE]:
            return np.array(w_p2.value).flatten()
        else:
            # Final fallback if both phases fail
            print(f"❌ Warning: Both QP attempts failed (Final status: {prob_p2.status}). Using fallback normalization.")
            s = w0.sum()
            return (w0 / s) if s > 0 else np.ones_like(w0) / n

    # Final New QP with two phases - this allows for hard constraints on zero weights
    def _apply_constraints_final(self, weights):
        """
        ใช้ Quadratic Programming เพื่อบังคับ:
        • Σw = 1
        • per-asset cap 0.30 (SHV 0.40)
        • asset_lower / asset_upper
        • ลดการขยับจาก w0 โดยเฉพาะตำแหน่งที่ w0 == 0
        """
        w0 = np.asarray(weights, float).copy()
        n  = w0.size

        # ---------- สร้างขอบรายตัว --------------------------------------------
        ub = np.full(n, 0.30)
        if self.asset_columns is not None and "SHV" in self.asset_columns:
            ub[self.asset_columns.get_loc("SHV")] = 0.40

        lb = np.full(n, self.port_type[0])      # long_only → 0, long/short → -1
        # (ถ้ามีพอร์ตชนิดอื่นปรับได้ตาม self.port_type)

        # ---------- ตัวแปร QP ---------------------------------------------------
        w = cp.Variable(n)

        constraints = [
            cp.sum(w) == 1,
            w >= lb,
            w <= ub
        ]

        # ---------- ข้อจำกัดรายหมวด -------------------------------------------
        if hasattr(self, "constraint_matrix"):
            C = self.constraint_matrix           # shape (n_types, n_assets)
            constraints += [
                C @ w >= self.lower_bounds,
                C @ w <= self.upper_bounds
            ]

        # ---------- Objective: min Σ α_i (w_i - w0_i)^2 ------------------------
        eps = 1e-4
        alpha = 1.0 / (w0 + eps)        # ช่องที่ w0=0 จะถูกลงโทษมาก
        obj   = cp.Minimize(cp.sum(cp.multiply(alpha, cp.square(w - w0))))

        prob = cp.Problem(obj, constraints)

        # เลือก solver ที่รองรับ QP
        try:
            prob.solve(solver=cp.OSQP)  # หรือ ECOS_BB / SCS
        except cp.error.SolverError:
            prob.solve(solver=cp.ECOS)

        # ถ้าแก้ไม่ได้ (infeasible) กลับไปใช้วิธีเดิม
        if w.value is None:
            print("QP infeasible, falling back to greedy method.")
            return super()._apply_constraints(weights)

        return np.asarray(w.value).flatten()
    # ===== END V.2 =====
    
    def _apply_constraints(self, weights: np.ndarray) -> np.ndarray:
        """
        Accept either:
          • 1D array of shape (n_assets,), or
          • 2D array of shape (batch_size, n_assets).
        Returns a NumPy array of the same shape, after applying constraints row‐wise.
        """
        w = np.array(weights, copy=True)
        if w.ndim == 1:
            return self._apply_constraints_final(w)
        elif w.ndim == 2:
            return np.vstack([self._apply_constraints_final(row) for row in w])
        else:
            raise ValueError(f"_apply_constraints got ndim={w.ndim}, expected 1 or 2.")

    def build(self, input_shape, outputs, PE_type=pe_type):
        """
        input_shape = (seq_len, feature_dim)
        outputs     = number of assets
        """
        d_model = 512
        num_heads = 8
        ff_dim = 2048
        num_layers = 6
        dropout_rate = 0.3
        spe_kernel_size = 5
        seq_len, feature_dim = input_shape

        def get_positional_encoding(length, depth):
            # Halve the depth because we will concatenate sine and cosine embeddings.
            depth = depth / 2

            # Create arrays for positions and depths.
            positions = np.arange(length)[:, np.newaxis]      # Shape: (length, 1)
            depths = np.arange(depth)[np.newaxis, :] / depth  # Shape: (1, depth)

            # Calculate the angle rates.
            angle_rates = 1 / (10000**depths)                 # Shape: (1, depth)

            # Calculate the angle radians.
            angle_rads = positions * angle_rates             # Shape: (length, depth)

            # Concatenate the sine and cosine of the angle radians to form the encoding.
            pos_encoding = np.concatenate(
                [np.sin(angle_rads), np.cos(angle_rads)],
                axis=-1) 
            # Cast the final encoding to a float32 TensorFlow tensor.
            return tf.cast(pos_encoding, dtype=tf.float32)
        
        inputs = layers.Input(shape=(seq_len, feature_dim))
        # x = layers.Dense(d_model)(inputs)
        # pos_encoding = get_positional_encoding(seq_len, d_model)
        # x = x + pos_encoding
        
        # ====== Position Encoding ======
        if PE_type == 'OriPE':
            x = layers.Dense(d_model)(inputs)
            pos_encoding = get_positional_encoding(seq_len, d_model)
            x = x + pos_encoding[tf.newaxis, :]
        elif PE_type == 'Time2Vec':
            time_embedding = Time2Vector(seq_len)(inputs)
            x = layers.Concatenate(axis=-1)([inputs, time_embedding])
            x = layers.Dense(d_model)(x)
        elif PE_type == 'ConvSPE':
            x = layers.Dense(d_model)(inputs)
            x = ConvSPE(d_model=d_model, kernel_size=spe_kernel_size)(x)
        elif PE_type == 'SineSPE':
            x = layers.Dense(d_model)(inputs)
            x = SineSPE(d_model=d_model, max_len=seq_len + 100)(x)
        elif PE_type == 'TemporalPE':
            x = layers.Dense(d_model)(inputs)
            x = TemporalPositionalEncoding(d_model=d_model, max_len=seq_len + 100)(x)
        elif PE_type == 'LearnablePE':
            x = layers.Dense(d_model)(inputs)
            pos_encoding_layer = LearnablePositionalEncoding(d_model=d_model, max_len=seq_len + 100, dropout=dropout_rate)
            x = pos_encoding_layer(x)
        elif PE_type == 'AbsolutePE':
            x = layers.Dense(d_model)(inputs)
            pos_encoding_layer = AbsolutePositionalEncoding(d_model=d_model, max_len=seq_len + 100, dropout=dropout_rate)
            x = pos_encoding_layer(x)
        elif PE_type == 'tAPE':
            x = layers.Dense(d_model)(inputs)
            pos_encoding_layer = tAPE(d_model=d_model, max_len=seq_len + 100, dropout=dropout_rate)
            x = pos_encoding_layer(x)
        else:
            raise ValueError(f"Unknown Positional Encoding type: {PE_type}")

        for _ in range(num_layers):
            attn_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(x, x)
            attn_output = layers.Dropout(dropout_rate)(attn_output)
            out1 = layers.LayerNormalization(epsilon=1e-6)(x + attn_output)

            ffn = layers.Dense(ff_dim, activation="relu")(out1) # ori = relu
            ffn = layers.Dense(d_model)(ffn)
            ffn_output = layers.Dropout(dropout_rate)(ffn)
            x = layers.LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

        pooled = layers.GlobalAveragePooling1D()(x)       # (batch_size, d_model)
        # logits = layers.Dense(outputs, activation="softmax")(pooled)  # (batch_size, n_assets)

        logits = layers.Dense(outputs, activation=TemperatureSoftmax(temperature=n_temp))(pooled)  # (batch_size, n_assets)
        # logits = layers.Dense(outputs)(pooled)
        # logits = TemperatureSoftmax(temperature=1.0)(logits)  # Use custom softmax layer

        if hasattr(self, "constraint_matrix"):
            print("Applying constraints to output layer... ✅")
            # Wrap in a small tf.py_function that forces a known shape
            def _tf_apply_constraints(x_tensor):
                # x_tensor: tf.Tensor(shape=[batch_size, outputs], dtype=float32)
                y = tf.py_function(self._apply_constraints, [x_tensor], tf.float32)
                # Now explicitly tell Keras “y has shape (batch_size, outputs)”
                y.set_shape([None, outputs])
                return y

            constrained = layers.Lambda(_tf_apply_constraints)(logits)
        else:
            print("No constraints applied to output layer... ❌")
            constrained = logits

        model = KModel(inputs=inputs, outputs=constrained)            

        # Modify Sharpe loss function - corrected version
        def sharpe_loss(_, weights):
            w = weights[0]
            # 1. คำนวณอนุกรมเวลาผลตอบแทนรายวันของพอร์ต
            portfolio_returns_daily = tf.reduce_sum(tf.multiply(self.data, w), axis=1)
            # 2. คำนวณค่าสถิติรายวัน
            mean_daily_return = tf.reduce_mean(portfolio_returns_daily)
            std_daily_return = tf.math.reduce_std(portfolio_returns_daily)
            # 3. แปลงเป็นค่ารายปี (Annualize)
            annualized_return = mean_daily_return * 252
            annualized_volatility = std_daily_return * tf.sqrt(252.0) # ต้อง sqrt() จำนวนวัน
            # 4. คำนวณ Sharpe Ratio ด้วยหน่วยเวลาที่สอดคล้องกัน
            risk_free_rate = 0.02
            sharpe = (annualized_return - risk_free_rate) / (annualized_volatility + 1e-8) # เพิ่ม epsilon เพื่อความเสถียร
            return -sharpe
        
        optimizer = tf.keras.optimizers.Adam()
        model.compile(loss=sharpe_loss, optimizer=optimizer)
        return model

    def calc_wgts(self, lkbk: int, ep: int, data: pd.DataFrame, features: pd.DataFrame, patience=10, PE_type=pe_type):
        # Ensure constraint matrices exist
        if not hasattr(self, "constraint_matrix"):
            self._create_constraint_matrices(data.columns)

        # Scale the features
        features = [features.shift(k).fillna(0).values[lkbk:] for k in range(lkbk)]
        
        # Create Numpy array from features
        data_array = np.concatenate(features, axis=1)
                    
        # Split off train set
        data = data.iloc[lkbk:]
        
        # Convert data to tensorflow format for processing in loss function
        self.data = tf.cast(tf.constant(data), float)

        # Building a new model (create fresh model for each rebalance)
        self.model = self.build(data_array.shape, len(data.columns), PE_type=PE_type)
        
        early_stopping = EarlyStopping(monitor='loss', patience=patience, restore_best_weights=True)

        # Adding a new axis to features
        fit_predict_data = data_array[np.newaxis, :]

        # Adding new axis to classifier
        """Extending the length of the output vector"""
        y = np.zeros(len(data.columns))[np.newaxis, :]

        # print(f"X shape: {fit_predict_data.shape}")
        # print("X:", fit_predict_data)
        # print("y shape:", ep)
        # print("y:", y)
        # Fit the model  
        self.model.fit(fit_predict_data, y, epochs=ep, #batch_size=256,
                       shuffle=False, callbacks=[early_stopping])

        # Predict weights
        raw_weights = self.model.predict(fit_predict_data)[0]
        # raw_weights_non_zero_count = np.count_nonzero(raw_weights)
        # if raw_weights_non_zero_count < 10 or raw_weights_non_zero_count > 15:
        #     print(f"❌ Warning: Raw weights have {raw_weights_non_zero_count}")
        # print(f"Raw Weights selected ETF: {raw_weights_non_zero_count}")

        return raw_weights

In [8]:
# asset_weights = model.calc_wgts(lookback, n_epochs, train_features, train_features, patience=10)
def quarterly_walk_forward(df, lookback, n_epochs, features, asset_map=None, 
                                   asset_lower=None, asset_upper=None, port_type=(0, 1), 
                                   start_year=2024, trading_days_per_quarter=63, 
                                   min_train_periods=252, PE_type=pe_type):
    """Optimized quarterly walk-forward analysis"""
    
    # Efficient data preparation
    original_index = df.index
    df_reset = df.reset_index(drop=True)
    features_reset = features.reset_index(drop=True)
    
    # Get rebalance dates
    rebalance_dates = get_rebalance_dates(df, start_year)
    
    # Pre-allocate result containers
    all_rets = []
    weights_list = []
    rebalance_info = []
    
    # Initialize model once
    model = Model(
        max_weight=1, 
        asset_map=asset_map, 
        asset_lower=asset_lower,
        asset_upper=asset_upper, 
        port_type=port_type
    )
    
    print(f"\n=== Starting Optimized Quarterly Rebalancing Analysis ===")
    print(f"Training lookback period: {lookback} days")
    print(f"Minimum training periods: {min_train_periods} days")
    print(f"Trading days per quarter: {trading_days_per_quarter} days")
    print(f"Total rebalance periods: {len(rebalance_dates)}")
    
    for i, rebalance_date in enumerate(rebalance_dates):
        print(f"\n--- Rebalancing {i+1}/{len(rebalance_dates)} ---")
        print(f"Rebalance Date: {rebalance_date.strftime('%Y-%m-%d')}")
        
        try:
            # More efficient position finding
            train_end_pos = original_index.get_indexer([rebalance_date], method='pad')[0] - 1
            if train_end_pos < 0:
                continue
                
        except Exception:
            print(f"Cannot find position for {rebalance_date}, skipping... ❌❌❌")
            continue
        
        # Define training period
        train_start_pos = max(0, train_end_pos - min_train_periods)
        
        print(f"Training period: {original_index[train_start_pos].strftime('%Y-%m-%d')} to {original_index[train_end_pos].strftime('%Y-%m-%d')}")
        print(f"Training days: {train_end_pos - train_start_pos + 1}")
        
        # Get quarter end
        quarter_end = get_quarter_end_date(rebalance_date, original_index, trading_days_per_quarter)
        
        try:
            test_end_pos = original_index.get_loc(quarter_end)
            print(f"Test period: {rebalance_date.strftime('%Y-%m-%d')} to {quarter_end.strftime('%Y-%m-%d')}")
            print(f"Test days: {test_end_pos - train_end_pos}")
        except Exception:
            print(f"Cannot find end date for quarter, skipping... ❌❌❌")
            continue
        
        # Skip if insufficient future data
        if test_end_pos >= len(df_reset):
            print(f"Not enough future data, stopping at rebalance {i+1} ❌❌❌")
            break
        
        # Extract and prepare training data more efficiently
        train_data = df_reset.iloc[train_start_pos:train_end_pos+1].copy()
        train_features = features_reset.iloc[train_start_pos:train_end_pos+1].copy()
        
        # Efficient data cleaning
        train_data = train_data.ffill().bfill()
        train_features = train_features.fillna(0) # for cal sharpe loss (ori pct_change)

        # # scale features - for training
        # sc = MinMaxScaler()
        # train_features_sc = train_features.copy()
        # # train_features_sc = train_features_sc.rolling(window=5, min_periods=1).mean().fillna(0)
        # train_features_sc = pd.DataFrame(sc.fit_transform(train_features_sc), columns=train_features.columns, index=train_features.index)
        
        # Calculate test returns more efficiently
        test_data = df_reset.iloc[train_end_pos:test_end_pos+1].copy()
        test_returns = test_data.pct_change().fillna(0).iloc[1:]

        print(f"Training data shape: {train_data.shape}")
        print(f"Test returns shape: {test_returns.shape}")
        
        # Train model and get weights
        try:
            # Clear session for memory management
            K.clear_session()
            tf.keras.backend.clear_session()
            
            # asset_weights = model.calc_wgts(lookback, n_epochs, train_data, train_features, patience=10)
            asset_weights = model.calc_wgts(lookback, n_epochs, train_features, train_features, patience=5, PE_type=PE_type)
            # asset_weights = model.calc_wgts(lookback, n_epochs, train_features, train_features_sc, patience=10)

            print(f"\n{'=' * 30}")
            # total weight should be 1 status
            total_weight = np.sum(asset_weights).round(4)
            if total_weight == 1:
                print(f"Total weight: {total_weight:.4f} ✅")
            else:
                print(f"Total weight: {total_weight:.4f} ❌ (should be 1.0)")

            weights_non_zero_count = np.count_nonzero(asset_weights)
            print(f"Selected ETF count: {weights_non_zero_count}")
            # if weights_non_zero_count < 10 or weights_non_zero_count > 15:
            #     print(f"Warning: weights have {weights_non_zero_count} ❌")
            #     os.system(f'say "Warning: Selected ETF have {weights_non_zero_count}"')
            # if weights_non_zero_count >= 10 and weights_non_zero_count <= 15:
            #     print(f"Selected ETF: {weights_non_zero_count} ✅")

            # Print allocation summary
            if hasattr(model, 'constraint_matrix') and hasattr(model, 'asset_type_names'):
                type_weights = np.dot(model.constraint_matrix, asset_weights)
                print("Asset Type Allocation:")
                for j, (asset_type, weight) in enumerate(zip(model.asset_type_names, type_weights)):
                    lower = model.asset_lower.get(asset_type, 0.0)
                    upper = model.asset_upper.get(asset_type, 1.0)
                    # status
                    if lower <= weight.round(4) <= upper:
                        status = "✅" #Within Range
                    else:
                        status = "❌" #Out of Range
                    print(f"  {asset_type}: {weight:.3f} ({weight*100:.2f}%) [Range: {lower:.2f}-{upper:.2f}] {status}")
            
            # Calculate out-of-sample returns
            if len(test_returns) > 0:
                # Vectorized return calculation
                oos_returns = (test_returns.values * asset_weights).sum(axis=1)
                oos_returns = pd.Series(oos_returns, index=test_returns.index)
                
                all_rets.append(oos_returns)
                weights_list.append(asset_weights)
                rebalance_info.append({
                    'rebalance_date': rebalance_date,
                    'quarter_end': quarter_end,
                    'quarter_return': oos_returns.sum(),
                    'quarter_days': len(oos_returns)
                })
                
                print(f"Quarter return: {oos_returns.sum():.4f} ({oos_returns.sum()*100:.2f}%)")
            else:
                print("No test returns available ❌❌❌")
            
            print(f"\n{'=' * 30}")
                
        except Exception as e:
            print(f"Error in rebalancing: {str(e)} ❌❌❌")
            continue
    
    # Combine results efficiently
    if all_rets:
        pnl = pd.concat(all_rets, ignore_index=False)
        weights_df = pd.DataFrame(weights_list, columns=df.columns)
        rebalance_summary = pd.DataFrame(rebalance_info)
        
        print(f"\n=== Rebalancing Summary ===")
        print(f"Total quarters processed: {len(rebalance_info)}")
        print(f"Total return periods: {len(pnl)}")
        print(f"Average quarterly return: {rebalance_summary['quarter_return'].mean():.4f}")
        print(f"Quarterly return std: {rebalance_summary['quarter_return'].std():.4f}")
        
        return pnl, weights_df, rebalance_summary
    else:
        print("No successful rebalancing periods ❌❌❌")
        return None, None, None


In [9]:
data_train = data.copy()
features = data_train.pct_change().fillna(0)  # Calculate percentage change for features

# Reset states generated by Keras
K.clear_session()

set_seed(1)

n_lookback = avg_days * 12 # Sequence: โค้ดจะสร้าง Input โดยสำหรับ ทุกๆ วัน ในชุดข้อมูลเทรน มันจะ "มองย้อนกลับไป" เป็นจำนวน n_lookback วัน (เช่น 21 วันทำการ หรือประมาณ 1 เดือน)
n_trading_days_per_quarter = avg_days * 3  # 21 days/month * 3 months = 63 days
n_min_train_periods = avg_days * 12 * 3   # 3 years of training data = 252*3 days = 756 days

print(f"Lookback period: {n_lookback} days")
print(f"Trading days per quarter: {n_trading_days_per_quarter} days")
print(f"Minimum training periods: {n_min_train_periods} days")

# Run quarterly rebalancing with constraints
pnl, model_raw_weights, rebalance_summary = quarterly_walk_forward(
    data_train, 
    lookback=n_lookback, 
    n_epochs=100, 
    features=features.fillna(0),
    asset_map=asset_map, 
    asset_lower=asset_lower_aggressive,
    asset_upper=asset_upper_aggressive,
    port_type=long_only,
    start_year=start_rebalance_year,
    trading_days_per_quarter=n_trading_days_per_quarter,  
    min_train_periods=n_min_train_periods,
    PE_type=pe_type
)

Lookback period: 252 days
Trading days per quarter: 63 days
Minimum training periods: 756 days
Rebalance Dates: ['2020-01-02', '2020-04-01', '2020-07-01', '2020-10-01', '2021-01-04', '2021-04-01', '2021-07-01', '2021-10-01', '2022-01-03', '2022-04-01', '2022-07-01', '2022-10-03', '2023-01-03', '2023-04-03', '2023-07-03', '2023-10-02', '2024-01-02', '2024-04-01', '2024-07-01', '2024-10-01']

=== Starting Optimized Quarterly Rebalancing Analysis ===
Training lookback period: 252 days
Minimum training periods: 756 days
Trading days per quarter: 63 days
Total rebalance periods: 20

--- Rebalancing 1/20 ---
Rebalance Date: 2020-01-02
Training period: 2016-12-28 to 2019-12-31
Training days: 757
Test period: 2020-01-02 to 2020-04-01
Test days: 63
Training data shape: (757, 33)
Test returns shape: (63, 33)


2025-07-09 08:18:32.700200: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2025-07-09 08:18:32.700363: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-07-09 08:18:32.700369: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-07-09 08:18:32.700528: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-07-09 08:18:32.700537: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Applying constraints to output layer... ✅
Epoch 1/100


2025-07-09 08:18:35.086379: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8s/step - loss: -0.5396
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 637ms/step - loss: -0.5449
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 522ms/step - loss: -0.5293
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 500ms/step - loss: -0.5382
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 506ms/step - loss: -0.5385
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 497ms/step - loss: -0.5296
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 544ms/step - loss: -0.5396
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 673ms/step

Total weight: 1.0000 ✅
Selected ETF count: 33
Asset Type Allocation:
  Cash_Equivalent: 0.013 (1.29%) [Range: 0.00-0.40] ✅
  Fixed_Income: 0.060 (6.04%) [Range: 0.00-0.30] ✅
  Equity: 0.819 (81.95%) [Range: 0.55-0.90] ✅
  Alternatives

In [10]:
rebalance_dates =  get_rebalance_dates(data, start_year=start_rebalance_year)

# add rebalance_dates to weights_df index
weights_df = model_raw_weights.copy()
weights_df.index = rebalance_dates[:len(weights_df)]
print("\n%Weights DataFrame with Rebalance Dates:")
display(weights_df.multiply(100).round(2))

Rebalance Dates: ['2020-01-02', '2020-04-01', '2020-07-01', '2020-10-01', '2021-01-04', '2021-04-01', '2021-07-01', '2021-10-01', '2022-01-03', '2022-04-01', '2022-07-01', '2022-10-03', '2023-01-03', '2023-04-03', '2023-07-03', '2023-10-02', '2024-01-02', '2024-04-01', '2024-07-01', '2024-10-01']

%Weights DataFrame with Rebalance Dates:


Unnamed: 0,SHV,BND,BNDX,JNK,VT,VEA,IEMG,VOO,QQQ,DIA,...,IXP,JXI,ITA,ICLN,SKYY,SMH,REET,IGF,PDBC,GLD
2020-01-02,1.29,3.26,1.14,1.64,2.2,2.84,0.96,4.67,1.76,9.81,...,15.34,6.6,2.98,1.76,2.03,0.52,0.84,4.57,2.65,2.66
2020-04-01,3.05,4.55,0.54,0.51,0.33,3.48,2.62,1.2,2.7,0.6,...,2.38,6.19,3.78,0.69,1.02,2.45,0.52,0.57,2.05,1.18
2020-07-01,2.1,1.31,5.14,1.72,20.809999,3.53,3.28,3.39,1.78,1.28,...,1.04,1.57,0.49,2.38,0.69,4.7,1.56,1.74,1.06,3.73
2020-10-01,3.03,16.860001,2.02,1.51,1.87,4.14,2.69,2.32,1.3,1.02,...,2.35,1.65,2.49,1.58,5.2,2.32,0.82,1.0,14.86,4.91
2021-01-04,2.08,1.05,1.68,1.37,6.82,0.48,1.13,4.54,14.33,2.57,...,0.82,1.25,1.63,3.17,1.16,6.5,1.36,3.12,2.98,3.3
2021-04-01,2.31,6.27,3.56,3.67,3.81,1.72,2.27,10.37,7.11,2.64,...,4.11,4.69,1.24,0.61,1.03,1.92,2.75,1.8,3.61,2.49
2021-07-01,0.74,1.02,0.35,0.48,1.62,5.75,7.07,4.54,6.71,1.11,...,0.85,8.6,0.58,7.84,0.54,2.7,1.55,0.54,2.98,2.35
2021-10-01,3.45,5.99,4.84,1.57,2.59,2.97,3.22,0.66,1.05,1.93,...,1.35,1.1,1.98,3.82,1.11,1.15,1.3,1.54,3.15,14.49
2022-01-03,0.76,9.62,2.32,0.71,6.31,2.02,5.6,2.57,1.13,0.69,...,1.21,5.0,2.92,3.14,8.06,2.09,1.23,1.88,1.86,1.14
2022-04-01,1.52,2.85,1.85,0.56,4.45,7.46,2.03,2.52,1.24,1.81,...,11.86,0.59,8.61,2.96,1.58,3.57,1.84,1.43,3.93,1.86


In [11]:
# --- Run the Check and Print Results ---
violations_found = check_portfolio_constraints(
    weights_df, 
    asset_map, 
    asset_lower_aggressive, 
    asset_upper_aggressive
)

if not violations_found:
    print("✅ All portfolio weights satisfy the constraints.")
else:
    print("❌ Constraint violations were found:")
    for date, messages in violations_found.items():
        print(f"\nOn {date}:")
        for msg in messages:
            print(f"  - {msg}")

✅ All portfolio weights satisfy the constraints.


In [12]:
# weights_df.to_csv(f'{output_dir}/{train_type}/raw_weights_df_run{run_no}.csv', index=True)
save_dataframe_to_new_sheet(weights_df, results_excel_path, 'Raw Weights')

DataFrame saved to sheet 'Raw Weights' in new file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/01_Results_Transformer_OriPE_PreNorm_run_551.xlsx ✨


In [13]:
# os.system('say "Model training has finished"')

# Compared

### 1. Model weights

In [14]:
model_weights_df = weights_df.copy()
model_weights_df = model_weights_df.round(4)
save_dataframe_to_new_sheet(model_weights_df, results_excel_path, 'Model Weights')
# model_weights_df

DataFrame saved to sheet 'Model Weights' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/01_Results_Transformer_OriPE_PreNorm_run_551.xlsx 📄


In [15]:
# check number of weights > 0 in each row
non_zero_weights_count = (model_weights_df > 0).sum(axis=1)
print("\nNumber of non-zero weights in each row:")
print(non_zero_weights_count)


Number of non-zero weights in each row:
2020-01-02    33
2020-04-01    33
2020-07-01    33
2020-10-01    33
2021-01-04    33
2021-04-01    33
2021-07-01    33
2021-10-01    33
2022-01-03    33
2022-04-01    33
2022-07-01    33
2022-10-03    33
2023-01-03    33
2023-04-03    33
2023-07-03    33
2023-10-02    33
2024-01-02    33
2024-04-01    33
2024-07-01    33
2024-10-01    33
dtype: int64


In [16]:
# check total weight in each row to ensure it sums to 1
total_weights = model_weights_df.sum(axis=1).round(2)
print("\nTotal weights in each row (should be 1):")
print(total_weights)


Total weights in each row (should be 1):
2020-01-02    1.0
2020-04-01    1.0
2020-07-01    1.0
2020-10-01    1.0
2021-01-04    1.0
2021-04-01    1.0
2021-07-01    1.0
2021-10-01    1.0
2022-01-03    1.0
2022-04-01    1.0
2022-07-01    1.0
2022-10-03    1.0
2023-01-03    1.0
2023-04-03    1.0
2023-07-03    1.0
2023-10-02    1.0
2024-01-02    1.0
2024-04-01    1.0
2024-07-01    1.0
2024-10-01    1.0
dtype: float32


### 2. Traditional mean-variance optimization

In [17]:
risk_free = 0.02  # Example risk-free rate
mvo_weights_df = mvo_quarterly_rebalancing(data, asset_map, 
                                           asset_lower_aggressive, asset_upper_aggressive, 
                                           port_type, start_year=start_rebalance_year,
                                           trading_days_per_quarter=n_trading_days_per_quarter,
                                           min_train_periods=n_min_train_periods,
                                           risk_free_rate=risk_free)

mvo_weights_df = mvo_weights_df.round(4)
save_dataframe_to_new_sheet(mvo_weights_df, results_excel_path, 'MVO Weights')
# mvo_weights_df

Rebalance Dates: ['2020-01-02', '2020-04-01', '2020-07-01', '2020-10-01', '2021-01-04', '2021-04-01', '2021-07-01', '2021-10-01', '2022-01-03', '2022-04-01', '2022-07-01', '2022-10-03', '2023-01-03', '2023-04-03', '2023-07-03', '2023-10-02', '2024-01-02', '2024-04-01', '2024-07-01', '2024-10-01']

--- Rebalancing 1/20 ---
Rebalance Date: 2020-01-02
Training period: 2016-12-28 to 2019-12-31
Training days: 757

Optimal Weights:
Alternatives: 0.1841
Cash_Equivalent: 0.0000
Equity: 0.5500
Fixed_Income: 0.2659

--- Rebalancing 2/20 ---
Rebalance Date: 2020-04-01
Training period: 2017-03-29 to 2020-03-31
Training days: 757

Optimal Weights:
Alternatives: 0.3000
Cash_Equivalent: 0.0000
Equity: 0.5500
Fixed_Income: 0.1500

--- Rebalancing 3/20 ---
Rebalance Date: 2020-07-01
Training period: 2017-06-28 to 2020-06-30
Training days: 757

Optimal Weights:
Alternatives: 0.3000
Cash_Equivalent: 0.0000
Equity: 0.5500
Fixed_Income: 0.1500

--- Rebalancing 4/20 ---
Rebalance Date: 2020-10-01
Training p

In [18]:
# --- Run the Check and Print Results ---
violations_found = check_portfolio_constraints(
    mvo_weights_df, 
    asset_map, 
    asset_lower_aggressive, 
    asset_upper_aggressive
)

if not violations_found:
    print("✅ All portfolio weights satisfy the constraints.")
else:
    print("❌ Constraint violations were found:")
    for date, messages in violations_found.items():
        print(f"\nOn {date}:")
        for msg in messages:
            print(f"  - {msg}")

# check number of weights > 0 in each row
non_zero_weights_count = (mvo_weights_df > 0).sum(axis=1)
print("\nNumber of non-zero weights in each row:")
print(non_zero_weights_count)

# check total weight in each row to ensure it sums to 1
total_weights = mvo_weights_df.sum(axis=1).round(2)
print("\nTotal weights in each row (should be 1):")
print(total_weights)

❌ Constraint violations were found:

On 2020-01-02:
  - Violation in 'Equity': Total weight is 0.5499, but bounds are [0.55, 0.9].

On 2022-04-01:
  - Violation in 'Equity': Total weight is 0.5499, but bounds are [0.55, 0.9].

Number of non-zero weights in each row:
2020-01-02    7
2020-04-01    5
2020-07-01    4
2020-10-01    5
2021-01-04    5
2021-04-01    6
2021-07-01    5
2021-10-01    5
2022-01-03    4
2022-04-01    6
2022-07-01    5
2022-10-03    4
2023-01-03    4
2023-04-03    7
2023-07-03    5
2023-10-02    5
2024-01-02    6
2024-04-01    6
2024-07-01    5
2024-10-01    7
dtype: int64

Total weights in each row (should be 1):
2020-01-02    1.0
2020-04-01    1.0
2020-07-01    1.0
2020-10-01    1.0
2021-01-04    1.0
2021-04-01    1.0
2021-07-01    1.0
2021-10-01    1.0
2022-01-03    1.0
2022-04-01    1.0
2022-07-01    1.0
2022-10-03    1.0
2023-01-03    1.0
2023-04-03    1.0
2023-07-03    1.0
2023-10-02    1.0
2024-01-02    1.0
2024-04-01    1.0
2024-07-01    1.0
2024-10-01    1.

### 3. Equal weights

In [19]:
# Run equal weight portfolio
equal_weights_df = equal_weight_portfolio(data, rebalance_dates)
equal_weights_df = equal_weights_df.round(4)
equal_weights_df = equal_weights_df.abs()  # Ensure all weights are positive
save_dataframe_to_new_sheet(equal_weights_df, results_excel_path, 'Equal Weights')
# equal_weights_df

DataFrame saved to sheet 'Equal Weights' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/01_Results_Transformer_OriPE_PreNorm_run_551.xlsx 📄


### 4. Benchmark

In [20]:
benchmark_df = pd.DataFrame(
   index=rebalance_dates,
   columns=data.columns,
   data=0.0
)
num_cash = sum(1 for v in asset_map.values() if v == 'Cash_Equivalent')
num_fixed_income = sum(1 for v in asset_map.values() if v == 'Fixed_Income')
num_alternatives = sum(1 for v in asset_map.values() if v == 'Alternatives')
print(f"Number of Cash Equivalents: {num_cash}")
print(f"Number of Fixed Income: {num_fixed_income}")
print(f"Number of Alternatives: {num_alternatives}")

benchmark_df['SHV'] = 0.05 / num_cash
benchmark_df[['BND', 'BNDX', 'JNK']] = 0.05 / num_fixed_income
benchmark_df['VT'] = 0.75
benchmark_df[['REET', 'IGF', 'PDBC', 'GLD']] = 0.15 / num_alternatives
save_dataframe_to_new_sheet(benchmark_df, results_excel_path, 'Beanchmark Weights')
# benchmark_df

Number of Cash Equivalents: 1
Number of Fixed Income: 3
Number of Alternatives: 4
DataFrame saved to sheet 'Beanchmark Weights' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/01_Results_Transformer_OriPE_PreNorm_run_551.xlsx 📄


### 4. Compared

In [21]:
# Main analysis
start_date = rebalance_dates[0]  # First rebalance date

# Calculate portfolio returns for each strategy
portfolios = {
    'Model Portfolio': model_weights_df,
    'MVO Portfolio': mvo_weights_df,
    'Equal Weight': equal_weights_df,
    'Benchmark': benchmark_df
}

portfolio_returns = {}
for name, weights in portfolios.items():
    returns = calculate_portfolio_returns(data, weights, rebalance_dates, start_date)
    portfolio_returns[name] = returns

# Calculate performance metrics
performance_metrics = {}
benchmark_returns = portfolio_returns['Benchmark']

for name, returns in portfolio_returns.items():
    if name == 'Benchmark':
        metrics = calculate_performance_metrics(returns, returns)  # Self as benchmark
    else:
        metrics = calculate_performance_metrics(returns, benchmark_returns)
    performance_metrics[name] = metrics

# Create performance comparison DataFrame
performance_df = pd.DataFrame(performance_metrics).T
print("Portfolio Performance Comparison:")
print("=" * 50)
# performance_df.to_csv(f'{output_dir}/{train_type}/performance_comparison_run{run_no}.csv')
save_dataframe_to_new_sheet(performance_df.T, results_excel_path, 'Performance Comparison')
performance_df.round(4).T

Portfolio Performance Comparison:
DataFrame saved to sheet 'Performance Comparison' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/01_Results_Transformer_OriPE_PreNorm_run_551.xlsx 📄


Unnamed: 0,Model Portfolio,MVO Portfolio,Equal Weight,Benchmark
Total Return (%),50.9947,33.5854,45.9018,52.2475
Annualized Return (%),8.612,5.9771,7.8675,8.7921
Volatility (%),17.5688,16.395,16.9851,17.2193
Sharpe Ratio,0.4449,0.3143,0.4138,0.46
Max Drawdown (%),-30.7087,-27.9771,-30.735,-30.8091
Max Drawdown Duration (days),552.0,978.0,563.0,533.0
Sortino Ratio,0.5364,0.4148,0.4923,0.548
Treynor Ratio,0.0777,0.0649,0.0717,0.0792
Jensen's Alpha (%),-0.1547,-1.1403,-0.7411,-0.0063
Beta,1.0062,0.7944,0.9808,1.0008


In [22]:
# Plotting
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Cumulative Returns', 'Maximum Drawdown'),
    vertical_spacing=0.12,
    row_heights=[0.7, 0.3]
)

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']

# Plot cumulative returns
for i, (name, returns) in enumerate(portfolio_returns.items()):
    #if name != 'Benchmark':
    cumulative_returns = (1 + returns).cumprod()
    fig.add_trace(
        go.Scatter(
            x=cumulative_returns.index,
            y=cumulative_returns.values,
            mode='lines',
            name=name,
            line=dict(color=colors[i], width=2),
            showlegend=True
        ),
        row=1, col=1
    )

# Plot drawdowns
for i, (name, returns) in enumerate(portfolio_returns.items()):
#if name != 'Benchmark':
    cumulative = (1 + returns).cumprod()
    rolling_max = cumulative.expanding().max()
    drawdown = (cumulative - rolling_max) / rolling_max * 100
    
    fig.add_trace(
        go.Scatter(
            x=drawdown.index,
            y=drawdown.values,
            mode='lines',
            name=name,
            line=dict(color=colors[i], width=2),
            showlegend=False
        ),
        row=2, col=1
    )

# Update layout
fig.update_layout(
    title='Portfolio Performance Comparison',
    height=800,
    hovermode='x unified',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

fig.update_xaxes(title_text="Date", row=2, col=1)
fig.update_yaxes(title_text="Cumulative Return", row=1, col=1)
fig.update_yaxes(title_text="Drawdown (%)", row=2, col=1)

fig.show()

# Save the plot to a file
fig.write_html(f'{output_dir}/{train_type}/portfolio_performance_{model_type}_run_{run_no}.html')
# save image to file
fig.write_image(f'{output_dir}/{train_type}/portfolio_performance_{model_type}_run_{run_no}.png')

In [23]:
# Summary statistics
print("\nKey Performance Highlights:")
print("=" * 30)

best_return = performance_df['Annualized Return (%)'].idxmax()
print(f"Best Annualized Return: {best_return} ({performance_df.loc[best_return, 'Annualized Return (%)']:.2f}%)")

best_sharpe = performance_df['Sharpe Ratio'].idxmax()
print(f"Best Sharpe Ratio: {best_sharpe} ({performance_df.loc[best_sharpe, 'Sharpe Ratio']:.3f})")

best_sortino = performance_df['Sortino Ratio'].idxmax()
print(f"Best Sortino Ratio: {best_sortino} ({performance_df.loc[best_sortino, 'Sortino Ratio']:.3f})")

lowest_dd = performance_df['Max Drawdown (%)'].idxmax()  # Most negative (lowest)
print(f"Lowest Max Drawdown: {lowest_dd} ({performance_df.loc[lowest_dd, 'Max Drawdown (%)']:.2f}%)")

lowest_vol = performance_df['Volatility (%)'].idxmin()
print(f"Lowest Volatility: {lowest_vol} ({performance_df.loc[lowest_vol, 'Volatility (%)']:.2f}%)")


Key Performance Highlights:
Best Annualized Return: Benchmark (8.79%)
Best Sharpe Ratio: Benchmark (0.460)
Best Sortino Ratio: Benchmark (0.548)
Lowest Max Drawdown: MVO Portfolio (-27.98%)
Lowest Volatility: MVO Portfolio (16.39%)


In [24]:
# os.system('say "Model comparison has finished"')

# Quarterly Comparison

In [25]:
# Get quarterly periods
quarterly_periods = get_quarterly_periods(rebalance_dates, data)
quarterly_periods

# Portfolio definitions
portfolios = {
    'Model Portfolio': model_weights_df,
    'MVO Portfolio': mvo_weights_df,
    'Equal Weight': equal_weights_df,
    'Benchmark': benchmark_df
}

# Calculate quarterly performance for each portfolio
quarterly_results = {}
weights_df_2 = weights_df.copy
for portfolio_name, weights_df_2 in portfolios.items():
    quarterly_results[portfolio_name] = {}
    
    for period in quarterly_periods:
        quarter = period['quarter']
        returns = calculate_quarterly_portfolio_returns(data, weights_df_2, period)
        metrics = calculate_quarterly_metrics(returns)
        quarterly_results[portfolio_name][quarter] = metrics

# Create comprehensive results DataFrame
all_metrics = ['Total Return (%)', 'Annualized Return (%)',
               'Volatility (%)', 'Max Drawdown (%)', 
               'Max Drawdown Duration (days)', 'Sharpe Ratio', 'Sortino Ratio']
quarterly_comparison = {}

for metric in all_metrics:
    quarterly_comparison[metric] = pd.DataFrame({
        portfolio: {quarter: quarterly_results[portfolio][quarter][metric] 
                   for quarter in quarterly_results[portfolio]}
        for portfolio in portfolios.keys()
    })

# Display results
for metric in all_metrics:
    print(f"\n{metric}")
    print("-" * 40)
    print(quarterly_comparison[metric].round(4))
    # quarterly_comparison[metric].to_csv(f'{output_dir}/{train_type}/quarterly_{metric.lower().replace(" ", "_")}_run{run_no}.csv', index=True)



Total Return (%)
----------------------------------------
         Model Portfolio  MVO Portfolio  Equal Weight  Benchmark
Q1 2020         -18.9602        -8.1440      -20.4899   -20.4869
Q2 2020          24.9315        24.3225       22.1839    21.5340
Q3 2020           7.5700         6.1182        6.1387     6.2743
Q4 2020          11.6221        16.1943       14.0268    12.4655
Q1 2021           4.5561        -7.4943        4.4425     4.9436
Q2 2021           4.8071         2.2525        4.4421     5.2773
Q3 2021          -2.2416        -2.6480       -1.4958    -1.1347
Q4 2021           3.6080         4.8973        4.1704     4.8769
Q1 2022          -4.1054        -5.1376       -3.1157    -3.4918
Q2 2022         -13.1124       -12.4501      -12.3079   -13.2633
Q3 2022          -7.2982        -5.6272       -7.5308    -7.4998
Q4 2022           8.5054         3.7545        7.0251     6.5586
Q1 2023           5.4659        -0.2666        5.6967     6.0480
Q2 2023           2.1459       

In [26]:
# --- เริ่มโค้ดสำหรับบันทึก Excel ---
qoq_excel_path = f"{output_dir}/{train_type}/02_Quarterly_{model_type}_{pe_type}_{pre_post}_run_{run_no}.xlsx"

for metric in all_metrics:
    sheet_name = metric.replace(" ", "_")
    df_to_save = quarterly_comparison[metric].round(4)
    save_dataframe_to_new_sheet(df_to_save, qoq_excel_path, sheet_name)


DataFrame saved to sheet 'Total_Return_(%)' in new file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/02_Quarterly_Transformer_OriPE_PreNorm_run_551.xlsx ✨
DataFrame saved to sheet 'Annualized_Return_(%)' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/02_Quarterly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Volatility_(%)' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/02_Quarterly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Max_Drawdown_(%)' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/02_Quarterly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Max_Drawdown_Duration_(days)' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/02_Quarterly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Sharpe_Ratio' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/02_Quarterly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame 

In [27]:
# Plotting
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Quarterly Returns (%)', 'Quarterly Volatility (%)', 
                   'Quarterly Sharpe Ratio', 'Quarterly Max Drawdown (%)'),
    vertical_spacing=0.2,
    horizontal_spacing=0.2
)

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
quarters = list(quarterly_comparison['Total Return (%)'].index)

# Plot quarterly returns
for i, portfolio in enumerate(portfolios.keys()):
    fig.add_trace(
        go.Bar(
            x=quarters,
            y=quarterly_comparison['Total Return (%)'][portfolio],
            name=portfolio,
            marker_color=colors[i],
            showlegend=True
        ),
        row=1, col=1
    )

for i, portfolio in enumerate(portfolios.keys()):
    fig.add_trace(
        go.Bar(
            x=quarters,
            y=quarterly_comparison['Volatility (%)'][portfolio],
            name=portfolio,
            marker_color=colors[i],
            showlegend=False
        ),
        row=1, col=2
    )

for i, portfolio in enumerate(portfolios.keys()):
    fig.add_trace(
        go.Bar(
            x=quarters,
            y=quarterly_comparison['Sharpe Ratio'][portfolio],
            name=portfolio,
            marker_color=colors[i],
            showlegend=False
        ),
        row=2, col=1
    )

for i, portfolio in enumerate(portfolios.keys()):
    fig.add_trace(
        go.Bar(
            x=quarters,
            y=quarterly_comparison['Max Drawdown (%)'][portfolio],
            name=portfolio,
            marker_color=colors[i],
            showlegend=False
        ),
        row=2, col=2
    )

fig.update_layout(
    title='Quarterly Portfolio Performance Comparison',
    height=700,
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# Update axes labels
fig.update_yaxes(title_text="Return (%)", row=1, col=1)
fig.update_yaxes(title_text="Volatility (%)", row=1, col=2)
fig.update_yaxes(title_text="Sharpe Ratio", row=2, col=1)
fig.update_yaxes(title_text="Max Drawdown (%)", row=2, col=2)

fig.show()

# Summary Statistics by Quarter
print("\n" + "="*60)
print("QUARTERLY PERFORMANCE SUMMARY")
print("="*60)

for quarter in quarters:
    print(f"\n{quarter}:")
    print("-" * 20)
    
    # Best performing portfolio this quarter
    quarter_returns = quarterly_comparison['Total Return (%)'].loc[quarter]
    best_return = quarter_returns.idxmax()
    print(f"Best Return: {best_return} ({quarter_returns[best_return]:.2f}%)")
    
    # Best Sharpe ratio this quarter
    quarter_sharpe = quarterly_comparison['Sharpe Ratio'].loc[quarter].dropna()
    if not quarter_sharpe.empty:
        best_sharpe = quarter_sharpe.idxmax()
        print(f"Best Sharpe: {best_sharpe} ({quarter_sharpe[best_sharpe]:.3f})")
    
    # Lowest volatility this quarter
    quarter_vol = quarterly_comparison['Volatility (%)'].loc[quarter].dropna()
    if not quarter_vol.empty:
        lowest_vol = quarter_vol.idxmin()
        print(f"Lowest Vol: {lowest_vol} ({quarter_vol[lowest_vol]:.2f}%)")

# Ranking Analysis
print("\n" + "="*60)
print("PORTFOLIO RANKINGS BY QUARTER")
print("="*60)

ranking_df = pd.DataFrame(index=quarters, columns=portfolios.keys())

for quarter in quarters:
    # Rank by quarterly returns (1 = best)
    quarter_returns = quarterly_comparison['Total Return (%)'].loc[quarter]
    ranks = quarter_returns.rank(ascending=False, method='min')
    ranking_df.loc[quarter] = ranks

print("\nRanking by Quarterly Returns (1=Best, 4=Worst):")
print(ranking_df.astype(int))

# Average ranking
avg_ranking = ranking_df.mean().sort_values()
print(f"\nAverage Ranking Across All Quarters:")
print("-" * 40)
for portfolio, avg_rank in avg_ranking.items():
    print(f"{portfolio}: {avg_rank:.2f}")

# Win rate analysis
print(f"\nQuarterly Win Rate (% of quarters ranked #1):")
print("-" * 45)
for portfolio in portfolios.keys():
    win_rate = (ranking_df[portfolio] == 1).sum() / len(quarters) * 100
    print(f"{portfolio}: {win_rate:.1f}%")

# Save the plot to a file
fig.write_html(f'{output_dir}/{train_type}/QoQ_performance_{model_type}_run_{run_no}.html')
fig.write_image(f'{output_dir}/{train_type}/QoQ_performance_{model_type}_run_{run_no}.png')


QUARTERLY PERFORMANCE SUMMARY

Q1 2020:
--------------------
Best Return: MVO Portfolio (-8.14%)
Best Sharpe: MVO Portfolio (-0.886)
Lowest Vol: MVO Portfolio (34.96%)

Q2 2020:
--------------------
Best Return: Model Portfolio (24.93%)
Best Sharpe: MVO Portfolio (4.799)
Lowest Vol: MVO Portfolio (18.40%)

Q3 2020:
--------------------
Best Return: Model Portfolio (7.57%)
Best Sharpe: Model Portfolio (2.126)
Lowest Vol: Model Portfolio (13.20%)

Q4 2020:
--------------------
Best Return: MVO Portfolio (16.19%)
Best Sharpe: Model Portfolio (4.111)
Lowest Vol: Model Portfolio (10.35%)

Q1 2021:
--------------------
Best Return: Benchmark (4.94%)
Best Sharpe: Benchmark (1.469)
Lowest Vol: Benchmark (13.00%)

Q2 2021:
--------------------
Best Return: Benchmark (5.28%)
Best Sharpe: Benchmark (2.038)
Lowest Vol: Model Portfolio (8.78%)

Q3 2021:
--------------------
Best Return: Benchmark (-1.13%)
Best Sharpe: Benchmark (-0.636)
Lowest Vol: Equal Weight (9.50%)

Q4 2021:
------------------

### Yearly Comparison

In [28]:
# Main Analysis
print("Yearly Performance Analysis")
print("=" * 50)

# Get yearly periods
yearly_periods = get_yearly_periods(rebalance_dates, data)

# Portfolio definitions
portfolios = {
    'Model Portfolio': model_weights_df,
    'MVO Portfolio': mvo_weights_df,
    'Equal Weight': equal_weights_df,
    'Benchmark': benchmark_df
}

# Calculate yearly performance for each portfolio
yearly_results = {}
weights_df_2 = weights_df.copy()
for portfolio_name, weights_df_2 in portfolios.items():
    yearly_results[portfolio_name] = {}
    
    for period in yearly_periods:
        year = period['year']
        returns = calculate_yearly_portfolio_returns(data, weights_df_2, period, rebalance_dates)
        metrics = calculate_yearly_metrics(returns)
        yearly_results[portfolio_name][year] = metrics

# Create comprehensive results DataFrame
all_metrics = ['Total Return (%)', 'Annualized Return (%)', 'Volatility (%)', 
               'Sharpe Ratio', 'Sortino Ratio', 'Max Drawdown (%)', 
               'Max DD Duration (days)', 'VaR 95% (%)', 'Calmar Ratio', 'Trading Days']

yearly_comparison = {}

for metric in all_metrics:
    yearly_comparison[metric] = pd.DataFrame({
        portfolio: {year: yearly_results[portfolio][year][metric] 
                   for year in yearly_results[portfolio]}
        for portfolio in portfolios.keys()
    })

# Display results
for metric in all_metrics:
    print(f"\n{metric}")
    print("-" * 40)
    if metric in ['Trading Days']:
        print(yearly_comparison[metric].astype(int))
    else:
        print(yearly_comparison[metric].round(3))


Yearly Performance Analysis

Total Return (%)
----------------------------------------
      Model Portfolio  MVO Portfolio  Equal Weight  Benchmark
2020           17.333         40.841        14.444     12.466
2021           12.915         -2.458        14.010     17.212
2022          -13.444        -17.106       -13.372    -14.657
2023           15.485          5.529        15.389     18.062
2024           14.920         11.338        12.438     15.562

Annualized Return (%)
----------------------------------------
      Model Portfolio  MVO Portfolio  Equal Weight  Benchmark
2020           17.333         40.841        14.444     12.466
2021           12.970         -2.468        14.070     17.286
2022          -13.544        -17.230       -13.471    -14.765
2023           15.686          5.597        15.588     18.298
2024           15.048         11.433        12.543     15.695

Volatility (%)
----------------------------------------
      Model Portfolio  MVO Portfolio  Equal Weig

In [29]:
# Plotting
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Annual Returns (%)', 'Annual Volatility (%)', 
                   'Annual Sharpe Ratio', 'Annual Max Drawdown (%)'),
                   #'Annual Sortino Ratio', 'Annual Calmar Ratio'),
    vertical_spacing=0.12,
    horizontal_spacing=0.1
)

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
years = list(yearly_comparison['Total Return (%)'].index)

# Plot annual returns
for i, portfolio in enumerate(portfolios.keys()):
    fig.add_trace(
        go.Bar(
            x=years,
            y=yearly_comparison['Total Return (%)'][portfolio],
            name=portfolio,
            marker_color=colors[i],
            showlegend=True
        ),
        row=1, col=1
    )

# Plot annual volatility
for i, portfolio in enumerate(portfolios.keys()):
    fig.add_trace(
        go.Bar(
            x=years,
            y=yearly_comparison['Volatility (%)'][portfolio],
            name=portfolio,
            marker_color=colors[i],
            showlegend=False
        ),
        row=1, col=2
    )

# Plot annual Sharpe ratio
for i, portfolio in enumerate(portfolios.keys()):
    fig.add_trace(
        go.Bar(
            x=years,
            y=yearly_comparison['Sharpe Ratio'][portfolio],
            name=portfolio,
            marker_color=colors[i],
            showlegend=False
        ),
        row=2, col=1
    )

# Plot annual max drawdown
for i, portfolio in enumerate(portfolios.keys()):
    fig.add_trace(
        go.Bar(
            x=years,
            y=yearly_comparison['Max Drawdown (%)'][portfolio],
            name=portfolio,
            marker_color=colors[i],
            showlegend=False
        ),
        row=2, col=2
    )

# # Plot annual Sortino ratio
# for i, portfolio in enumerate(portfolios.keys()):
#     fig.add_trace(
#         go.Scatter(
#             x=years,
#             y=yearly_comparison['Sortino Ratio'][portfolio],
#             mode='lines+markers',
#             name=portfolio,
#             line=dict(color=colors[i]),
#             showlegend=False
#         ),
#         row=3, col=1
#     )

# # Plot annual Calmar ratio
# for i, portfolio in enumerate(portfolios.keys()):
#     fig.add_trace(
#         go.Scatter(
#             x=years,
#             y=yearly_comparison['Calmar Ratio'][portfolio],
#             mode='lines+markers',
#             name=portfolio,
#             line=dict(color=colors[i]),
#             showlegend=False
#         ),
#         row=3, col=2
#     )

# Update layout
fig.update_layout(
    title='Annual Portfolio Performance Comparison',
    height=900,
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

# Update axes labels
fig.update_yaxes(title_text="Return (%)", row=1, col=1)
fig.update_yaxes(title_text="Volatility (%)", row=1, col=2)
fig.update_yaxes(title_text="Sharpe Ratio", row=2, col=1)
fig.update_yaxes(title_text="Max Drawdown (%)", row=2, col=2)
# fig.update_yaxes(title_text="Sortino Ratio", row=3, col=1)
# fig.update_yaxes(title_text="Calmar Ratio", row=3, col=2)

fig.show()

# Save the plot to a file
fig.write_html(f'{output_dir}/{train_type}/YoY_performance_{model_type}_run_{run_no}.html')
fig.write_image(f'{output_dir}/{train_type}/YoY_performance_{model_type}_run_{run_no}.png')

In [30]:
# Summary Statistics by Year
print("\n" + "="*60)
print("ANNUAL PERFORMANCE SUMMARY")
print("="*60)

for year in years:
    print(f"\n{year}:")
    print("-" * 20)
    
    # Best performing portfolio this year
    year_returns = yearly_comparison['Total Return (%)'].loc[year]
    best_return = year_returns.idxmax()
    print(f"Best Return: {best_return} ({year_returns[best_return]:.2f}%)")
    
    # Best Sharpe ratio this year
    year_sharpe = yearly_comparison['Sharpe Ratio'].loc[year].dropna()
    if not year_sharpe.empty:
        best_sharpe = year_sharpe.idxmax()
        print(f"Best Sharpe: {best_sharpe} ({year_sharpe[best_sharpe]:.3f})")
    
    # Lowest volatility this year
    year_vol = yearly_comparison['Volatility (%)'].loc[year].dropna()
    if not year_vol.empty:
        lowest_vol = year_vol.idxmin()
        print(f"Lowest Vol: {lowest_vol} ({year_vol[lowest_vol]:.2f}%)")
    
    # Best Calmar ratio this year
    year_calmar = yearly_comparison['Calmar Ratio'].loc[year].dropna()
    if not year_calmar.empty:
        best_calmar = year_calmar.idxmax()
        print(f"Best Calmar: {best_calmar} ({year_calmar[best_calmar]:.3f})")

# Ranking Analysis
print("\n" + "="*60)
print("PORTFOLIO RANKINGS BY YEAR")
print("="*60)

ranking_df = pd.DataFrame(index=years, columns=portfolios.keys())

for year in years:
    # Rank by annual returns (1 = best)
    year_returns = yearly_comparison['Total Return (%)'].loc[year]
    ranks = year_returns.rank(ascending=False, method='min')
    ranking_df.loc[year] = ranks

print("\nRanking by Annual Returns (1=Best, 4=Worst):")
print(ranking_df.astype(int))

# Average ranking
avg_ranking = ranking_df.mean().sort_values()
print(f"\nAverage Ranking Across All Years:")
print("-" * 40)
for portfolio, avg_rank in avg_ranking.items():
    print(f"{portfolio}: {avg_rank:.2f}")

# Win rate analysis
print(f"\nAnnual Win Rate (% of years ranked #1):")
print("-" * 45)
for portfolio in portfolios.keys():
    win_rate = (ranking_df[portfolio] == 1).sum() / len(years) * 100
    print(f"{portfolio}: {win_rate:.1f}%")

# Multi-year consistency analysis
print(f"\nConsistency Analysis:")
print("-" * 25)
for portfolio in portfolios.keys():
    returns_series = yearly_comparison['Total Return (%)'][portfolio].dropna()
    if len(returns_series) > 1:
        consistency = returns_series.std()
        print(f"{portfolio} - Return Std Dev: {consistency:.2f}%")

# Best and worst years
print(f"\nBest and Worst Years:")
print("-" * 25)
for portfolio in portfolios.keys():
    returns_series = yearly_comparison['Total Return (%)'][portfolio].dropna()
    if len(returns_series) > 0:
        best_year = returns_series.idxmax()
        worst_year = returns_series.idxmin()
        print(f"{portfolio}:")
        print(f"  Best: {best_year} ({returns_series[best_year]:.2f}%)")
        print(f"  Worst: {worst_year} ({returns_series[worst_year]:.2f}%)")


ANNUAL PERFORMANCE SUMMARY

2020:
--------------------
Best Return: MVO Portfolio (40.84%)
Best Sharpe: MVO Portfolio (1.531)
Lowest Vol: MVO Portfolio (22.77%)
Best Calmar: MVO Portfolio (1.886)

2021:
--------------------
Best Return: Benchmark (17.21%)
Best Sharpe: Benchmark (1.341)
Lowest Vol: Benchmark (10.84%)
Best Calmar: Benchmark (3.412)

2022:
--------------------
Best Return: Equal Weight (-13.37%)
Best Sharpe: Model Portfolio (-0.754)
Lowest Vol: Equal Weight (18.39%)
Best Calmar: Model Portfolio (-0.605)

2023:
--------------------
Best Return: Benchmark (18.06%)
Best Sharpe: Benchmark (1.414)
Lowest Vol: MVO Portfolio (10.58%)
Best Calmar: Benchmark (1.926)

2024:
--------------------
Best Return: Benchmark (15.56%)
Best Sharpe: Benchmark (1.313)
Lowest Vol: Equal Weight (9.64%)
Best Calmar: Benchmark (2.416)

PORTFOLIO RANKINGS BY YEAR

Ranking by Annual Returns (1=Best, 4=Worst):
      Model Portfolio  MVO Portfolio  Equal Weight  Benchmark
2020                2       

In [31]:
# --- เริ่มโค้ดสำหรับบันทึก Excel ---
output_excel_path = f"{output_dir}/{train_type}/03_Yearly_{model_type}_{pe_type}_{pre_post}_run_{run_no}.xlsx"

for metric in all_metrics:
    sheet_name = metric.replace(" ", "_")
    df_to_save = yearly_comparison[metric].round(4)
    save_dataframe_to_new_sheet(df_to_save, output_excel_path, sheet_name)


DataFrame saved to sheet 'Total_Return_(%)' in new file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/03_Yearly_Transformer_OriPE_PreNorm_run_551.xlsx ✨
DataFrame saved to sheet 'Annualized_Return_(%)' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/03_Yearly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Volatility_(%)' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/03_Yearly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Sharpe_Ratio' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/03_Yearly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Sortino_Ratio' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/03_Yearly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Max_Drawdown_(%)' in existing file: Results/01_1_Transformer_OriPE_PreNorm_temp_1.0/03_Yearly_Transformer_OriPE_PreNorm_run_551.xlsx 📄
DataFrame saved to sheet 'Max_DD_Duration_(

In [32]:
yearly_comparison['Sortino Ratio']

Unnamed: 0,Model Portfolio,MVO Portfolio,Equal Weight,Benchmark
2020,0.675749,1.712821,0.586782,0.520732
2021,1.270448,-0.302262,1.507446,1.909716
2022,-1.29888,-1.712287,-1.358343,-1.422602
2023,1.901867,0.589701,2.045469,2.398409
2024,1.715328,1.139317,1.473423,1.737746


In [33]:
os.system('say "All code has finished"')

0