In [1]:
import os
import math
import random 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from Util_def import *
from Util_model import *

import warnings
warnings.filterwarnings('ignore')


Devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU details:  {'device_name': 'METAL'}


In [2]:
ETF_list = [
    'SHV',
    'BND', 'BNDX', 'JNK',
    'VT', 'VEA', 'IEMG',
    'VOO', 'QQQ', 'DIA', 'VGK', 'EWJ', 'MCHI', 'THD', 'VNM', 'INDA',
    'RXI', 'KXI', 'IXC', 'IXG', 'IXJ', 'EXI', 'IXN', 'IXP', 'JXI',
    'ITA', 'ICLN', 'SKYY', 'SMH',
    'REET', 'IGF', 'PDBC', 'GLD'
]

# 5 years data
startDate = dt.datetime(2015, 1, 1)
endDate = dt.datetime(2024, 12, 31)

start_rebalance_year = 2020  # startDate.year + 3

# read csv
data = pd.read_csv('33_ETF_data.csv', index_col='Date', parse_dates=True)
# print(data.info())
avg_days = avg_days_per_month(data)

# print("=" * 50)
# print("Min Date:", data.index.min())
# print("Max Date:", data.index.max())
# print("Start Rebalance Year:", start_rebalance_year)
# print(f"Average number of trading days per month: {avg_days}", "days")
# print("=" * 50)


In [3]:
rebalance_dates =  get_rebalance_dates(data, start_year=start_rebalance_year)
start_date = rebalance_dates[0]  # First rebalance date

Rebalance Dates: ['2020-01-02', '2020-04-01', '2020-07-01', '2020-10-01', '2021-01-04', '2021-04-01', '2021-07-01', '2021-10-01', '2022-01-03', '2022-04-01', '2022-07-01', '2022-10-03', '2023-01-03', '2023-04-03', '2023-07-03', '2023-10-02', '2024-01-02', '2024-04-01', '2024-07-01', '2024-10-01']


In [4]:
###### Portfolio Type ######
long_only = tuple([0,1])
long_short = tuple([-1,1])

port_type = long_only 

###### Adding Constraints ######
# Asset Mapping
asset_map = {
    'SHV': 'Cash_Equivalent',
    
    'BND': 'Fixed_Income',
    'BNDX': 'Fixed_Income',
    'JNK': 'Fixed_Income',

    'VT': 'Equity',
    'VEA': 'Equity',
    'IEMG': 'Equity',

    'VOO': 'Equity',
    'QQQ': 'Equity',
    'DIA': 'Equity',
    'VGK': 'Equity',
    'EWJ': 'Equity',
    'MCHI': 'Equity',
    'THD': 'Equity',
    'VNM': 'Equity',
    'INDA': 'Equity',

    'RXI': 'Equity',
    'KXI': 'Equity',
    'IXC': 'Equity',
    'IXG': 'Equity',
    'IXJ': 'Equity',
    'EXI': 'Equity',
    'IXN': 'Equity',
    'IXP': 'Equity',
    'JXI': 'Equity',

    'ITA': 'Equity',
    'ICLN': 'Equity',
    'SKYY': 'Equity',
    'SMH': 'Equity',

    'REET': 'Alternatives',
    'IGF': 'Alternatives',
    'PDBC': 'Alternatives',
    'GLD': 'Alternatives',
}

### Aggressive Portfolio ###
asset_lower_aggressive = {
    'Cash_Equivalent': 0.0,
    'Fixed_Income': 0.0,
    'Equity': 0.55,
    'Alternatives': 0.0}
asset_upper_aggressive = {
    'Cash_Equivalent': 0.4,
    'Fixed_Income': 0.3,
    'Equity': 0.9,
    'Alternatives': 0.3}

len(ETF_list), len(asset_map)

(33, 33)

### Read Weight data

In [5]:

model_weights_df = pd.read_excel(f'data_analysis/All_best_weight.xlsx', index_col=0, parse_dates=True, sheet_name='Model Weights')
mvo_weights_df = pd.read_excel(f'data_analysis/All_best_weight.xlsx', index_col=0, parse_dates=True, sheet_name='MVO Weights')
equal_weights_df = pd.read_excel(f'data_analysis/All_best_weight.xlsx', index_col=0, parse_dates=True, sheet_name='Equal Weights')
benchmark_df = pd.read_excel(f'data_analysis/All_best_weight.xlsx', index_col=0, parse_dates=True, sheet_name='Beanchmark Weights')
rnn_df = pd.read_excel(f'data_analysis/All_best_weight.xlsx', index_col=0, parse_dates=True, sheet_name='RNN')
lstm_df = pd.read_excel(f'data_analysis/All_best_weight.xlsx', index_col=0, parse_dates=True, sheet_name='LSTM')
gru_df = pd.read_excel(f'data_analysis/All_best_weight.xlsx', index_col=0, parse_dates=True, sheet_name='GRU')


# check if all dataframes have the same index
if not (model_weights_df.index.equals(mvo_weights_df.index) and
        model_weights_df.index.equals(equal_weights_df.index) and
        model_weights_df.index.equals(benchmark_df.index) and
        model_weights_df.index.equals(rnn_df.index) and
        model_weights_df.index.equals(lstm_df.index) and
        model_weights_df.index.equals(gru_df.index)):
    raise ValueError("DataFrames do not have the same index.")

# check na values
if model_weights_df.isna().any().any() or mvo_weights_df.isna().any().any() or equal_weights_df.isna().any().any() or benchmark_df.isna().any().any() or rnn_df.isna().any().any() or lstm_df.isna().any().any() or gru_df.isna().any().any():
    raise ValueError("DataFrames contain NaN values.")

# results_excel_path = f"data_analysis/DA_Results/{pe_type}/01_L1_{pe_type}_{n_temp}_{no}.xlsx"


# Compared

### 1. All periods

In [6]:
# Main analysis
rebalance_dates =  get_rebalance_dates(data, start_year=start_rebalance_year)
start_date = rebalance_dates[0]  # First rebalance date

# Calculate portfolio returns for each strategy
portfolios = {
    'Our Model': model_weights_df,
    'RNN': rnn_df,
    'LSTM': lstm_df,
    'GRU': gru_df,
    'MVO': mvo_weights_df,
    'Equal Weight': equal_weights_df,
    'Benchmark': benchmark_df
}

portfolio_returns = {}
for name, weights in portfolios.items():
    returns = calculate_portfolio_returns(data, weights, rebalance_dates, start_date)
    portfolio_returns[name] = returns

# Calculate performance metrics
performance_metrics = {}
benchmark_returns = portfolio_returns['Benchmark']

for name, returns in portfolio_returns.items():
    if name == 'Benchmark':
        metrics = calculate_performance_metrics(returns, returns)  # Self as benchmark
    else:
        metrics = calculate_performance_metrics(returns, benchmark_returns)
    performance_metrics[name] = metrics

# Create performance comparison DataFrame
performance_df = pd.DataFrame(performance_metrics).T
print("Portfolio Performance Comparison:")
print("=" * 50)
# performance_df.to_csv(f'{output_dir}/{train_type}/performance_comparison_run{run_no}.csv')
# save_dataframe_to_new_sheet(performance_df.T, results_excel_path, 'Performance Comparison')
performance_df.round(4).T

Rebalance Dates: ['2020-01-02', '2020-04-01', '2020-07-01', '2020-10-01', '2021-01-04', '2021-04-01', '2021-07-01', '2021-10-01', '2022-01-03', '2022-04-01', '2022-07-01', '2022-10-03', '2023-01-03', '2023-04-03', '2023-07-03', '2023-10-02', '2024-01-02', '2024-04-01', '2024-07-01', '2024-10-01']
Portfolio Performance Comparison:


Unnamed: 0,Our Model,RNN,LSTM,GRU,MVO,Equal Weight,Benchmark
Total Return (%),151.4792,111.4874,84.6282,85.0785,33.5875,45.9006,52.2477
Annualized Return (%),20.3072,16.2016,13.0802,13.1355,5.9774,7.8673,8.7921
Volatility (%),18.8483,19.5572,18.3902,19.2126,16.3945,16.986,17.2165
Sharpe Ratio,0.9696,0.7636,0.6524,0.635,0.3143,0.4138,0.4601
Max Drawdown (%),-24.6481,-27.263,-31.1025,-29.9029,-27.9695,-30.7354,-30.8061
Max Drawdown Duration (days),518.0,504.0,367.0,487.0,978.0,563.0,534.0
Sortino Ratio,1.277,0.995,0.7989,0.8002,0.4147,0.4921,0.548
Treynor Ratio,0.1952,0.1469,0.1162,0.119,0.0648,0.0716,0.0791
Jensen's Alpha (%),10.8578,6.883,3.817,4.0797,-1.1411,-0.7423,-0.0063
Beta,0.9364,1.0164,1.0328,1.0252,0.7946,0.981,1.0008


In [7]:
# Plotting
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Cumulative Returns', 'Maximum Drawdown'),
    vertical_spacing=0.12,
    row_heights=[0.7, 0.3]
)

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']

# Plot cumulative returns
for i, (name, returns) in enumerate(portfolio_returns.items()):
    #if name != 'Benchmark':
    cumulative_returns = (1 + returns).cumprod()
    fig.add_trace(
        go.Scatter(
            x=cumulative_returns.index,
            y=cumulative_returns.values,
            mode='lines',
            name=name,
            line=dict(color=colors[i], width=2),
            showlegend=True
        ),
        row=1, col=1
    )

# Plot drawdowns
for i, (name, returns) in enumerate(portfolio_returns.items()):
#if name != 'Benchmark':
    cumulative = (1 + returns).cumprod()
    rolling_max = cumulative.expanding().max()
    drawdown = (cumulative - rolling_max) / rolling_max * 100
    
    fig.add_trace(
        go.Scatter(
            x=drawdown.index,
            y=drawdown.values,
            mode='lines',
            name=name,
            line=dict(color=colors[i], width=2),
            showlegend=False
        ),
        row=2, col=1
    )

# Update layout
fig.update_layout(
    title='Portfolio Performance Comparison',
    height=800,
    hovermode='x unified',
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

fig.update_xaxes(title_text="Date", row=2, col=1)
fig.update_yaxes(title_text="Cumulative Return", row=1, col=1)
fig.update_yaxes(title_text="Drawdown (%)", row=2, col=1)

fig.show()

# # Save the plot to a file
# fig.write_html(f'data_analysis/DA_Results/pic/01_All_portfolio_performance.html')
# # save image to file
# fig.write_image(f'data_analysis/DA_Results/pic/01_All_portfolio_performance.png')

In [8]:
# portfolio_returns to portfolio_returns_df
portfolio_returns_df = pd.DataFrame(portfolio_returns)
portfolio_returns_df.index.name = 'Date'

cumulative_returns_df = (1 + portfolio_returns_df).cumprod()
cumulative_returns_df

Unnamed: 0_level_0,Our Model,RNN,LSTM,GRU,MVO,Equal Weight,Benchmark
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-02,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000
2020-01-03,0.999039,0.998669,0.994434,1.001340,1.001133,0.994416,0.994104
2020-01-06,0.999962,1.004425,0.995702,1.003978,1.003512,0.995569,0.997280
2020-01-07,0.997172,1.003851,0.995162,1.004804,1.003142,0.994727,0.994787
2020-01-08,0.998488,1.005617,0.997212,1.004398,1.003025,0.995966,0.997196
...,...,...,...,...,...,...,...
2024-12-23,2.525417,2.134224,1.862724,1.862456,1.337870,1.465727,1.531259
2024-12-24,2.537384,2.144596,1.879471,1.879297,1.343592,1.474336,1.541556
2024-12-26,2.544823,2.143788,1.879899,1.876200,1.346343,1.476409,1.543449
2024-12-27,2.529925,2.132922,1.863728,1.863378,1.341616,1.468516,1.533228


In [None]:
cumulative_returns_df.to_excel(f'data_analysis/DA_Results/final_cumulative_returns_df.xlsx', sheet_name='Cumulative Returns')

In [23]:
cumulative_returns_df_week =cumulative_returns_df.resample('W').last()
cumulative_returns_df_week.to_excel(f'data_analysis/DA_Results/final_cumulative_returns_df_week.xlsx', sheet_name='Cumulative Returns Weekly')

### QoQ

In [None]:
## Get quarterly periods
quarterly_periods = get_quarterly_periods(rebalance_dates, data)
quarterly_periods

# Portfolio definitions
portfolios = {
    'Model Portfolio': L1_weight,
    'MVO Portfolio': mvo_weights_df,
    'Equal Weight': equal_weights_df,
    'Benchmark': benchmark_df
}

# Calculate quarterly performance for each portfolio
quarterly_results = {}
weights_df_2 = L1_weight.copy()
for portfolio_name, weights_df_2 in portfolios.items():
    quarterly_results[portfolio_name] = {}
    
    for period in quarterly_periods:
        quarter = period['quarter']
        returns = calculate_quarterly_portfolio_returns(data, weights_df_2, period)
        metrics = calculate_quarterly_metrics(returns)
        quarterly_results[portfolio_name][quarter] = metrics

# Create comprehensive results DataFrame
all_metrics = ['Total Return (%)', 'Annualized Return (%)',
               'Volatility (%)', 'Max Drawdown (%)', 
               'Max Drawdown Duration (days)', 'Sharpe Ratio', 'Sortino Ratio']
quarterly_comparison = {}

for metric in all_metrics:
    quarterly_comparison[metric] = pd.DataFrame({
        portfolio: {quarter: quarterly_results[portfolio][quarter][metric] 
                   for quarter in quarterly_results[portfolio]}
        for portfolio in portfolios.keys()
    })

# # Display results
# for metric in all_metrics:
#     print(f"\n{metric}")
#     print("-" * 40)
#     print(quarterly_comparison[metric].round(4))
#     # quarterly_comparison[metric].to_csv(f'{output_dir}/{train_type}/quarterly_{metric.lower().replace(" ", "_")}_run{run_no}.csv', index=True)


In [None]:
quarterly_comparison

In [None]:
# # --- เริ่มโค้ดสำหรับบันทึก Excel ---
# qoq_excel_path = f"data_analysis/DA_Results/{pe_type}/02_QoQ_L1_{pe_type}_{n_temp}_{no}.xlsx"

# for metric in all_metrics:
#     sheet_name = metric.replace(" ", "_")
#     df_to_save = quarterly_comparison[metric].round(4)
#     # save_dataframe_to_new_sheet(df_to_save, qoq_excel_path, sheet_name)


### 3. YoY

In [None]:
# Main Analysis
print("Yearly Performance Analysis")
print("=" * 50)

# Get yearly periods
yearly_periods = get_yearly_periods(rebalance_dates, data)

# Portfolio definitions
portfolios = {
    'Model Portfolio': L1_weight,
    'MVO Portfolio': mvo_weights_df,
    'Equal Weight': equal_weights_df,
    'Benchmark': benchmark_df
}

# Calculate yearly performance for each portfolio
yearly_results = {}
weights_df_2 = L1_weight.copy()
for portfolio_name, weights_df_2 in portfolios.items():
    yearly_results[portfolio_name] = {}
    
    for period in yearly_periods:
        year = period['year']
        returns = calculate_yearly_portfolio_returns(data, weights_df_2, period, rebalance_dates)
        metrics = calculate_yearly_metrics(returns)
        yearly_results[portfolio_name][year] = metrics

# Create comprehensive results DataFrame
all_metrics = ['Total Return (%)', 'Annualized Return (%)', 'Volatility (%)', 
               'Sharpe Ratio', 'Sortino Ratio', 'Max Drawdown (%)', 
               'Max DD Duration (days)', 'VaR 95% (%)', 'Calmar Ratio', 'Trading Days']

yearly_comparison = {}

for metric in all_metrics:
    yearly_comparison[metric] = pd.DataFrame({
        portfolio: {year: yearly_results[portfolio][year][metric] 
                   for year in yearly_results[portfolio]}
        for portfolio in portfolios.keys()
    })


In [None]:
yearly_comparison

In [None]:
# Summary Statistics by Year
print("\n" + "="*60)
print("ANNUAL PERFORMANCE SUMMARY")
print("="*60)
years = list(yearly_comparison['Total Return (%)'].index)
for year in years:
    print(f"\n{year}:")
    print("-" * 20)
    
    # Best performing portfolio this year
    year_returns = yearly_comparison['Total Return (%)'].loc[year]
    best_return = year_returns.idxmax()
    print(f"Best Return: {best_return} ({year_returns[best_return]:.2f}%)")
    
    # Best Sharpe ratio this year
    year_sharpe = yearly_comparison['Sharpe Ratio'].loc[year].dropna()
    if not year_sharpe.empty:
        best_sharpe = year_sharpe.idxmax()
        print(f"Best Sharpe: {best_sharpe} ({year_sharpe[best_sharpe]:.3f})")
    
    # Lowest volatility this year
    year_vol = yearly_comparison['Volatility (%)'].loc[year].dropna()
    if not year_vol.empty:
        lowest_vol = year_vol.idxmin()
        print(f"Lowest Vol: {lowest_vol} ({year_vol[lowest_vol]:.2f}%)")
    
    # Best Calmar ratio this year
    year_calmar = yearly_comparison['Calmar Ratio'].loc[year].dropna()
    if not year_calmar.empty:
        best_calmar = year_calmar.idxmax()
        print(f"Best Calmar: {best_calmar} ({year_calmar[best_calmar]:.3f})")

# Ranking Analysis
print("\n" + "="*60)
print("PORTFOLIO RANKINGS BY YEAR")
print("="*60)

ranking_df = pd.DataFrame(index=years, columns=portfolios.keys())

for year in years:
    # Rank by annual returns (1 = best)
    year_returns = yearly_comparison['Total Return (%)'].loc[year]
    ranks = year_returns.rank(ascending=False, method='min')
    ranking_df.loc[year] = ranks

print("\nRanking by Annual Returns (1=Best, 4=Worst):")
print(ranking_df.astype(int))

# Average ranking
avg_ranking = ranking_df.mean().sort_values()
print(f"\nAverage Ranking Across All Years:")
print("-" * 40)
for portfolio, avg_rank in avg_ranking.items():
    print(f"{portfolio}: {avg_rank:.2f}")

# Win rate analysis
print(f"\nAnnual Win Rate (% of years ranked #1):")
print("-" * 45)
for portfolio in portfolios.keys():
    win_rate = (ranking_df[portfolio] == 1).sum() / len(years) * 100
    print(f"{portfolio}: {win_rate:.1f}%")

# Multi-year consistency analysis
print(f"\nConsistency Analysis:")
print("-" * 25)
for portfolio in portfolios.keys():
    returns_series = yearly_comparison['Total Return (%)'][portfolio].dropna()
    if len(returns_series) > 1:
        consistency = returns_series.std()
        print(f"{portfolio} - Return Std Dev: {consistency:.2f}%")

# Best and worst years
print(f"\nBest and Worst Years:")
print("-" * 25)
for portfolio in portfolios.keys():
    returns_series = yearly_comparison['Total Return (%)'][portfolio].dropna()
    if len(returns_series) > 0:
        best_year = returns_series.idxmax()
        worst_year = returns_series.idxmin()
        print(f"{portfolio}:")
        print(f"  Best: {best_year} ({returns_series[best_year]:.2f}%)")
        print(f"  Worst: {worst_year} ({returns_series[worst_year]:.2f}%)")

In [None]:
# # --- เริ่มโค้ดสำหรับบันทึก Excel ---
# output_excel_path = f"data_analysis/DA_Results/{pe_type}/03_YoY_{pe_type}_{n_temp}_{no}.xlsx"

# for metric in all_metrics:
#     sheet_name = metric.replace(" ", "_")
#     df_to_save = yearly_comparison[metric].round(4)
#     save_dataframe_to_new_sheet(df_to_save, output_excel_path, sheet_name)


In [None]:
os.system('say "All code has finished"')

## Try Others QP

In [None]:
# def create_constraint_matrices(columns, asset_map, asset_lower, asset_upper):
#     """
#     สร้างเมทริกซ์ข้อจำกัดสำหรับขอบเขตของประเภทสินทรัพย์ (asset-type bounds)

#     Args:
#         columns (pd.Index): รายชื่อคอลัมน์ของสินทรัพย์
#         asset_map (dict): แมพชื่อสินทรัพย์กับประเภทของสินทรัพย์ เช่น {'SPY': 'Equity', 'AGG': 'Bond'}
#         asset_lower (dict): ขอบเขตล่างของแต่ละประเภทสินทรัพย์ เช่น {'Equity': 0.4}
#         asset_upper (dict): ขอบเขตบนของแต่ละประเภทสินทรัพย์ เช่น {'Equity': 0.8}

#     Returns:
#         dict: ประกอบด้วยเมทริกซ์และเวกเตอร์ข้อจำกัดต่างๆ ที่จำเป็นสำหรับ Solver
#     """
#     asset_types = {}
#     for asset in columns:
#         t = asset_map.get(asset, "Unknown")
#         asset_types.setdefault(t, []).append(asset)
    
#     mats = []
#     lbs = []
#     ubs = []
#     names = []
#     for t, assets in asset_types.items():
#         vec = np.zeros(len(columns), dtype=float)
#         for a in assets:
#             idx = columns.get_loc(a)
#             vec[idx] = 1.0
#         mats.append(vec)
#         lbs.append(asset_lower.get(t, 0.0))
#         ubs.append(asset_upper.get(t, 1.0))
#         names.append(t)
    
#     constraint_info = {
#         'constraint_matrix': np.vstack(mats),
#         'lower_bounds': np.array(lbs, dtype=float),
#         'upper_bounds': np.array(ubs, dtype=float),
#         'asset_type_names': names,
#         'asset_columns': columns
#     }
    
#     return constraint_info


# def apply_constraints_final(weights, asset_columns, port_type, constraint_info, fallback_method, distance_norm='L2'):
#     """
#     ใช้ Optimization เพื่อปรับน้ำหนักพอร์ตให้เป็นไปตามข้อจำกัด

#     Args:
#         weights (np.ndarray): น้ำหนักเริ่มต้นที่ต้องการปรับ
#         asset_columns (pd.Index): รายชื่อคอลัมน์ของสินทรัพย์
#         port_type (tuple): ประเภทของพอร์ต เช่น (0,) สำหรับ long_only
#         constraint_info (dict): ผลลัพธ์ที่ได้จากฟังก์ชัน create_constraint_matrices
#         fallback_method (function): ฟังก์ชันที่จะเรียกใช้ในกรณีที่ Solver หาคำตอบไม่ได้
#         distance_norm (str): ประเภทของระยะห่างที่ใช้วัดการเปลี่ยนแปลง ('L2', 'L1', or 'L-inf')
#                              - 'L2' (default): Squared Euclidean, ปรับอย่างนุ่มนวล
#                              - 'L1': Manhattan, ส่งเสริมให้ค่าน้ำหนักน้อยๆ กลายเป็นศูนย์ (Sparsity)
#                              - 'L-inf': Chebyshev, ลดการเปลี่ยนแปลงที่มากที่สุด
#     Returns:
#         np.ndarray: น้ำหนักใหม่ที่ผ่านการปรับตามข้อจำกัดแล้ว
#     """
#     w0 = np.asarray(weights, float).copy()
#     n  = w0.size

#     # ---------- สร้างขอบรายตัว --------------------------------------------
#     ub = np.full(n, 0.30)
#     if asset_columns is not None and "SHV" in asset_columns:
#         ub[asset_columns.get_loc("SHV")] = 0.40
#     lb = np.full(n, port_type[0])      

#     # ---------- ตัวแปรและข้อจำกัดพื้นฐาน ------------------------------------
#     w = cp.Variable(n)
#     constraints = [
#         cp.sum(w) == 1,
#         w >= lb,
#         w <= ub
#     ]

#     # ---------- ข้อจำกัดรายหมวด -------------------------------------------
#     if constraint_info and 'constraint_matrix' in constraint_info:
#         C = constraint_info['constraint_matrix']
#         constraints += [
#             C @ w >= constraint_info['lower_bounds'],
#             C @ w <= constraint_info['upper_bounds']
#         ]

#     # ---------- Objective: เลือกฟังก์ชันเป้าหมายตาม distance_norm ---------
#     eps = 1e-4
#     alpha = 1.0 / (w0 + eps) # ตัวคูณลงโทษการเปลี่ยนน้ำหนักจาก 0

#     if distance_norm == 'L2':
#         # L2 Norm (Squared Euclidean): min Σ α_i * (w_i - w0_i)^2
#         # ปรับอย่างนุ่มนวล ลงโทษการเปลี่ยนแปลงใหญ่ๆ อย่างรุนแรง
#         obj = cp.Minimize(cp.sum(cp.multiply(alpha, cp.square(w - w0))))
#     elif distance_norm == 'L1':
#         # L1 Norm (Manhattan): min Σ α_i * |w_i - w0_i|
#         # ส่งเสริมให้ค่าน้ำหนักที่ไม่สำคัญกลายเป็นศูนย์ (Sparsity)
#         obj = cp.Minimize(cp.sum(cp.multiply(alpha, cp.abs(w - w0))))
#     elif distance_norm == 'L-inf':
#         # L-infinity Norm (Chebyshev): min max_i(α_i * |w_i - w0_i|)
#         # ลดการเปลี่ยนแปลงที่ "เลวร้ายที่สุด" เพียงตัวเดียว
#         obj = cp.Minimize(cp.norm(cp.multiply(alpha, (w - w0)), "inf"))
#     else:
#         raise ValueError("distance_norm ต้องเป็น 'L1', 'L2', หรือ 'L-inf' เท่านั้น")

#     prob = cp.Problem(obj, constraints)

#     # ---------- แก้ปัญหา ------------------------------------------------
#     try:
#         prob.solve(solver=cp.OSQP, verbose=False)
#     except cp.error.SolverError:
#         print("OSQP failed, trying ECOS solver.")
#         prob.solve(solver=cp.ECOS, verbose=False)

#     if w.value is None:
#         print("QP/LP is infeasible, falling back to the provided fallback method.")
#         return fallback_method(weights)

#     return np.asarray(w.value).flatten()

# def simple_fallback(weights):
#     """ฟังก์ชันสำรองแบบง่ายๆ ในกรณีที่ QP แก้ไม่ได้"""
#     print("Executing the simple fallback method.")
#     # แค่ทำให้น้ำหนักรวมเป็น 1 และตัดค่าที่น้อยกว่า 0 ออก
#     w = np.maximum(0, weights)
#     w /= w.sum()
#     return w

In [None]:
# # def create_constraint_matrices(columns, asset_map, asset_lower, asset_upper):
# #     """
# #     สร้างเมทริกซ์ข้อจำกัดสำหรับขอบเขตของประเภทสินทรัพย์ (asset-type bounds)
# #     """
# #     asset_types = {}
# #     for asset in columns:
# #         t = asset_map.get(asset, "Unknown")
# #         asset_types.setdefault(t, []).append(asset)
    
# #     mats = []
# #     lbs = []
# #     ubs = []
# #     names = []
# #     for t, assets in asset_types.items():
# #         vec = np.zeros(len(columns), dtype=float)
# #         for a in assets:
# #             idx = columns.get_loc(a)
# #             vec[idx] = 1.0
# #         mats.append(vec)
# #         lbs.append(asset_lower.get(t, 0.0))
# #         ubs.append(asset_upper.get(t, 1.0))
# #         names.append(t)
    
# #     constraint_info = {
# #         'constraint_matrix': np.vstack(mats),
# #         'lower_bounds': np.array(lbs, dtype=float),
# #         'upper_bounds': np.array(ubs, dtype=float),
# #         'asset_type_names': names,
# #         'asset_columns': columns
# #     }
    
# #     return constraint_info


# # def apply_constraints_final(weights, asset_columns, port_type, constraint_info, fallback_method, 
# #                             distance_norm='L2', cov_matrix=None):
# #     """
# #     ใช้ Optimization เพื่อปรับน้ำหนักพอร์ตให้เป็นไปตามข้อจำกัด

# #     Args:
# #         weights (np.ndarray): น้ำหนักเริ่มต้นที่ต้องการปรับ
# #         asset_columns (pd.Index): รายชื่อคอลัมน์ของสินทรัพย์
# #         port_type (tuple): ประเภทของพอร์ต เช่น (0,) สำหรับ long_only
# #         constraint_info (dict): ผลลัพธ์ที่ได้จากฟังก์ชัน create_constraint_matrices
# #         fallback_method (function): ฟังก์ชันที่จะเรียกใช้ในกรณีที่ Solver หาคำตอบไม่ได้
# #         distance_norm (str): ประเภทของระยะห่างที่ใช้ ('L2', 'L1', 'L-inf', 'KL', 'Mahalanobis')
# #         cov_matrix (np.ndarray, optional): เมทริกซ์ความแปรปรวนร่วม (Covariance Matrix)
# #                                             จำเป็นต้องใช้สำหรับ distance_norm='Mahalanobis'
# #     Returns:
# #         np.ndarray: น้ำหนักใหม่ที่ผ่านการปรับตามข้อจำกัดแล้ว
# #     """
# #     w0 = np.asarray(weights, float).copy()
# #     n  = w0.size

# #     # ---------- สร้างขอบรายตัว --------------------------------------------
# #     ub = np.full(n, 0.30)
# #     if asset_columns is not None and "SHV" in asset_columns:
# #         ub[asset_columns.get_loc("SHV")] = 0.40
# #     lb = np.full(n, port_type[0])      

# #     # ---------- ตัวแปรและข้อจำกัดพื้นฐาน ------------------------------------
# #     w = cp.Variable(n)
# #     constraints = [
# #         cp.sum(w) == 1,
# #         w >= lb,
# #         w <= ub
# #     ]

# #     # ---------- ข้อจำกัดรายหมวด -------------------------------------------
# #     if constraint_info and 'constraint_matrix' in constraint_info:
# #         C = constraint_info['constraint_matrix']
# #         constraints += [
# #             C @ w >= constraint_info['lower_bounds'],
# #             C @ w <= constraint_info['upper_bounds']
# #         ]

# #     # ---------- Objective: เลือกฟังก์ชันเป้าหมายตาม distance_norm ---------
# #     eps = 1e-8 # ค่า epsilon ที่เล็กมากๆ เพื่อป้องกันปัญหาทางคณิตศาสตร์

# #     # ตัวคูณ alpha ใช้สำหรับ L1/L2/L-inf เพื่อลงโทษการเปลี่ยนน้ำหนักจาก 0
# #     alpha = 1.0 / (w0 + eps) 

# #     if distance_norm == 'L2':
# #         obj = cp.Minimize(cp.sum(cp.multiply(alpha, cp.square(w - w0))))
# #     elif distance_norm == 'L1':
# #         obj = cp.Minimize(cp.sum(cp.multiply(alpha, cp.abs(w - w0))))
# #     elif distance_norm == 'L-inf':
# #         obj = cp.Minimize(cp.norm(cp.multiply(alpha, (w - w0)), "inf"))
# #     elif distance_norm == 'KL':
# #         # KL Divergence: min Σ w_i * log(w_i / w0_i)
# #         # เหมาะกับการเปรียบเทียบการแจกแจง และลงโทษการเพิ่มสินทรัพย์ใหม่โดยธรรมชาติ
# #         # เราต้องทำให้ w0 ไม่มีค่า 0 และผลรวมเป็น 1
# #         w0_safe = w0 + eps
# #         w0_safe /= w0_safe.sum()
# #         obj = cp.Minimize(cp.sum(cp.kl_div(w, w0_safe)))
# #     elif distance_norm == 'Mahalanobis':
# #         # Mahalanobis: min (w - w0)' * inv(Σ) * (w - w0)
# #         # พิจารณาความสัมพันธ์ระหว่างสินทรัพย์
# #         if cov_matrix is None:
# #             raise ValueError("ต้องระบุ Covariance Matrix (cov_matrix) เมื่อใช้ Mahalanobis distance")
# #         if cov_matrix.shape != (n, n):
# #             raise ValueError(f"Covariance Matrix ต้องมี shape ({n}, {n})")
        
# #         # ใช้ pseudo-inverse เพื่อความเสถียรทางตัวเลข
# #         inv_cov = np.linalg.pinv(cov_matrix)
# #         obj = cp.Minimize(cp.quad_form(w - w0, inv_cov))
# #     else:
# #         raise ValueError("distance_norm ต้องเป็น 'L1', 'L2', 'L-inf', 'KL', หรือ 'Mahalanobis'")

# #     prob = cp.Problem(obj, constraints)

# #     # ---------- แก้ปัญหา ------------------------------------------------
# #     # Solver บางตัวไม่รองรับปัญหาทุกประเภท
# #     # KL Divergence เป็นปัญหาประเภท 'Exponential Cone' ซึ่ง OSQP ไม่รองรับ
# #     solver_choice = cp.ECOS if distance_norm == 'KL' else cp.OSQP
    
# #     try:
# #         prob.solve(solver=solver_choice, verbose=False)
# #     except cp.error.SolverError:
# #         # หาก Solver แรกไม่สำเร็จ ลองใช้ตัวอื่น
# #         fallback_solver = cp.SCS if solver_choice == cp.ECOS else cp.ECOS
# #         print(f"{solver_choice} failed, trying {fallback_solver} solver.")
# #         try:
# #             prob.solve(solver=fallback_solver, verbose=False)
# #         except cp.error.SolverError:
# #             print(f"All attempted solvers failed.")
# #             w.value = None # กำหนดให้เป็น None เพื่อเข้าสู่ fallback method

# #     if prob.status in [cp.INFEASIBLE, cp.UNBOUNDED] or w.value is None:
# #         print(f"Problem is {prob.status}, falling back to the provided fallback method.")
# #         return fallback_method(weights)

# #     return np.asarray(w.value).flatten()

# # # ===== ตัวอย่างการใช้งาน =====

# # def simple_fallback(weights):
# #     print("Executing the simple fallback method.")
# #     w = np.maximum(0, weights)
# #     w /= w.sum()
# #     return w


In [None]:
# L1_weight = raw_weights_df.copy()

# # --- 2. สร้างเมทริกซ์ข้อจำกัด ---
# print("--- Creating Constraint Matrices ---")
# asset_names = pd.Index(list(asset_map.keys()))
# constraint_data = create_constraint_matrices(
#     columns=asset_names,
#     asset_map=asset_map,
#     asset_lower=asset_lower_aggressive,
#     asset_upper=asset_upper_aggressive
# )

# # --- 3. ปรับน้ำหนักด้วย QP ---
# distance_type = 'L1'  # L2, L1, L-inf
# print("--- Applying Constraints using QP ---")
# # print("Initial Weights: ", L1_weight)

# for i in range(L1_weight.shape[0]):
#     L1_weight.iloc[i] = apply_constraints_final(
#         weights=L1_weight.iloc[i],
#         asset_columns=asset_names,
#         port_type=long_only,  # Long-only port
#         constraint_info=constraint_data,
#         fallback_method=simple_fallback,
#         distance_norm=distance_type
#     )

# L1_weight = L1_weight.round(4)
# L1_weight = L1_weight.abs()

# print("\n===== Checking Constraints and Total Weights =====")
# # check total weights
# total_weights = L1_weight.sum(axis=1).round(2)
# if not total_weights.equals(pd.Series(1.0, index=total_weights.index)):
#     raise ValueError("Total weights do not sum to 1 after applying constraints.")
# else:
#     print("✅ Total weights sum to 1 for all dates after applying constraints.")

# # --- Run the Check and Print Results ---
# violations_found = check_portfolio_constraints(
#     L1_weight, 
#     asset_map, 
#     asset_lower_aggressive, 
#     asset_upper_aggressive
# )

# if not violations_found:
#     print("✅ All portfolio weights satisfy the constraints.")
# else:
#     print("❌ Constraint violations were found:")
#     for date, messages in violations_found.items():
#         print(f"\nOn {date}:")
#         for msg in messages:
#             print(f"  - {msg}")

In [None]:
# # check that model_weights_df == L1_weight
# if distance_type == 'L2':
#     if not model_weights_df.equals(L1_weight):
#         raise ValueError("Model weights do not match the adjusted weights after applying constraints.")



In [None]:
# # Main analysis
# rebalance_dates =  get_rebalance_dates(data, start_year=start_rebalance_year)
# start_date = rebalance_dates[0]  # First rebalance date

# # Calculate portfolio returns for each strategy
# portfolios = {
#     'Model Portfolio': L1_weight,
#     'MVO Portfolio': mvo_weights_df,
#     'Equal Weight': equal_weights_df,
#     'Benchmark': benchmark_df
# }

# portfolio_returns = {}
# for name, weights in portfolios.items():
#     returns = calculate_portfolio_returns(data, weights, rebalance_dates, start_date)
#     portfolio_returns[name] = returns

# # Calculate performance metrics
# performance_metrics = {}
# benchmark_returns = portfolio_returns['Benchmark']

# for name, returns in portfolio_returns.items():
#     if name == 'Benchmark':
#         metrics = calculate_performance_metrics(returns, returns)  # Self as benchmark
#     else:
#         metrics = calculate_performance_metrics(returns, benchmark_returns)
#     performance_metrics[name] = metrics

# # Create performance comparison DataFrame
# performance_df = pd.DataFrame(performance_metrics).T
# print("Portfolio Performance Comparison:")
# print("=" * 50)
# # performance_df.to_csv(f'{output_dir}/{train_type}/performance_comparison_run{run_no}.csv')
# # save_dataframe_to_new_sheet(performance_df.T, results_excel_path, 'Performance Comparison')
# performance_df.round(4).T

In [None]:
# L1_weight.to_csv(f'data_analysis/DA_Results/total_{no}_after_constraints.csv')