In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
import optuna

from scipy.optimize import minimize
from scipy.optimize import Bounds

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [2]:
path_results = Path("../data/results/")

In [3]:
cv = "cv3"

all_oof = {
    "lightgbm": pd.read_parquet(path_results / f"oof_lightgbm_int97_{cv}.parquet"),
    "lightgbm_linear": pd.read_parquet(path_results / f"oof_lightgbm_linear_uni95_{cv}.parquet"),
    "xgboost": pd.read_parquet(path_results / f"oof_xgboost_uni80_{cv}.parquet"), 
    "catboost": pd.read_parquet(path_results / f"oof_catboost_int99_{cv}.parquet"),
    # --------------------------------------------------------------------------------------------
    "mlp": pd.read_parquet(path_results / f"oof_nn-mlp_uni95_{cv}.parquet"),
    "1dcnn": pd.read_parquet(path_results / f"oof_nn-1dcnn_uni95_{cv}.parquet"),
    "gandalf": pd.read_parquet(path_results / f"oof_nn-gandalf_uni95_{cv}.parquet"),
}

combined_df = all_oof["lightgbm"][["utility_agent1_true","fold"]]
for model, df in all_oof.items():
    combined_df[f'utility_agent1_pred_{model}'] = df['utility_agent1_pred']

combined_df = combined_df.dropna(subset=["fold"], ignore_index=True)
combined_df["fold"] = combined_df["fold"].astype(int)
combined_df

Unnamed: 0,utility_agent1_true,fold,utility_agent1_pred_lightgbm,utility_agent1_pred_lightgbm_linear,utility_agent1_pred_xgboost,utility_agent1_pred_catboost,utility_agent1_pred_mlp,utility_agent1_pred_1dcnn,utility_agent1_pred_gandalf
0,-0.466667,2,-0.312711,-0.343538,-0.371282,-0.140778,-0.194608,-0.232008,0.048197
1,-0.333333,2,-0.067989,-0.130576,0.013122,-0.060329,-0.087002,0.250238,0.253728
2,-0.066667,2,0.028722,0.110679,0.175175,0.236053,0.002282,0.328399,0.336407
3,-0.333333,2,-0.027028,0.038348,0.157617,0.111223,-0.167267,0.148452,0.249325
4,-0.333333,2,0.053506,0.117267,0.196683,0.093483,-0.028701,0.366571,0.429119
...,...,...,...,...,...,...,...,...,...
208603,-0.733333,3,-0.455402,-0.408042,-0.443232,-0.465189,-0.271464,-0.527001,-0.513383
208604,0.266667,3,-0.233319,-0.216536,-0.243728,-0.096964,0.077222,-0.097327,-0.264358
208605,0.666667,3,0.005615,0.040002,-0.042721,0.086905,0.123920,-0.114086,-0.074179
208606,0.666667,3,-0.044354,-0.070769,-0.093249,0.123682,0.098985,-0.154095,-0.084955


***
## blend test

In [4]:
# Dictionary to store scores for each model
model_scores = {}

# Get all prediction columns
pred_columns = [col for col in combined_df.columns if 'pred' in col]

for model_col in pred_columns:
    scores = list()
    
    for _, df in combined_df.groupby("fold"):
        pred = df[model_col].clip(-1, 1)
        squared_errors = (df['utility_agent1_true'] - pred)**2
        rmse = np.sqrt(squared_errors.mean())
        scores.append(rmse)
        
    model_scores[model_col] = scores
    print(f"\n{model_col}:")
    print("Scores per fold:", [f"{score:.4f}" for score in scores])
    print("Average score: {:.4f}".format(np.mean(scores)))


utility_agent1_pred_lightgbm:
Scores per fold: ['0.4301', '0.4138', '0.4199', '0.4133', '0.4252']
Average score: 0.4204

utility_agent1_pred_lightgbm_linear:
Scores per fold: ['0.4372', '0.4158', '0.4355', '0.4126', '0.4317']
Average score: 0.4266

utility_agent1_pred_xgboost:
Scores per fold: ['0.4350', '0.4130', '0.4239', '0.4140', '0.4275']
Average score: 0.4227

utility_agent1_pred_catboost:
Scores per fold: ['0.4213', '0.4073', '0.4117', '0.4142', '0.4202']
Average score: 0.4150

utility_agent1_pred_mlp:
Scores per fold: ['0.4391', '0.4310', '0.4372', '0.4168', '0.4215']
Average score: 0.4291

utility_agent1_pred_1dcnn:
Scores per fold: ['0.4401', '0.4202', '0.4377', '0.4183', '0.4263']
Average score: 0.4285

utility_agent1_pred_gandalf:
Scores per fold: ['0.4344', '0.4258', '0.4279', '0.4284', '0.4376']
Average score: 0.4308


In [5]:
# Create a linear blend of all predictions 
pred_columns = [
    # 'utility_agent1_pred_lightgbm',
    # 'utility_agent1_pred_lightgbm_linear',
    # 'utility_agent1_pred_xgboost',
    'utility_agent1_pred_catboost',
    'utility_agent1_pred_mlp',
    'utility_agent1_pred_1dcnn',
    'utility_agent1_pred_gandalf',
]

# Function to calculate RMSE for given weights
def calculate_blend_rmse(weights, pred_cols, df):
    # Convert raw weights to normalized weights using softmax
    weights = np.exp(weights) / np.sum(np.exp(weights))
    
    blended_predictions = np.zeros(len(df))
    for i, col in enumerate(pred_cols):
        blended_predictions += weights[i] * df[col].values
        
    scores = []
    for _, fold_df in df.groupby("fold"):
        fold_preds = blended_predictions[fold_df.index].clip(-1, 1)
        squared_errors = (fold_df['utility_agent1_true'] - fold_preds)**2
        rmse = np.sqrt(squared_errors.mean())
        scores.append(rmse)
    return np.mean(scores)

# Initial weights - equal raw weights that will be converted to normalized weights
initial_weights = np.zeros(len(pred_columns))

# Try different optimization methods
methods = [
    'Nelder-Mead',
    'Powell',
    'CG',
    'BFGS',
    'L-BFGS-B',
    'TNC',
    'COBYLA',
    'SLSQP',
    'trust-constr',
]

for method in methods:
    print("-"*100)
    print(f"Optimizing blend using {method}:")
    try:
        result = minimize(
            lambda w: calculate_blend_rmse(w, pred_columns, combined_df),
            initial_weights,
            method=method
        )
            
        if not result.success:
            print(f"\nOptimizer {method} failed to converge")
            continue
            
        raw_weights = result.x
        optimal_weights = np.exp(raw_weights) / np.sum(np.exp(raw_weights))

        # Calculate final blended predictions with optimal weights
        blended_predictions = np.zeros(len(combined_df))
        for i, col in enumerate(pred_columns):
            blended_predictions += optimal_weights[i] * combined_df[col].values

        # Calculate and print final scores
        blend_scores = []
        for _, df in combined_df.groupby("fold"):
            fold_preds = blended_predictions[df.index].clip(-1, 1)
            squared_errors = (df['utility_agent1_true'] - fold_preds)**2
            rmse = np.sqrt(squared_errors.mean())
            blend_scores.append(rmse)

        print(f"\nOptimized blend using {method}:")
        print("Optimal weights:")
        for col, weight in zip(pred_columns, optimal_weights):
            print(f"{col}: {weight:.4f}")
        print("\nScores per fold:", [f"{score:.4f}" for score in blend_scores])
        print("Average score: {:.4f}".format(np.mean(blend_scores)))
        
    except Exception as e:
        print(f"\nOptimizer {method} failed with error: {str(e)}")

----------------------------------------------------------------------------------------------------
Optimizing blend using Nelder-Mead:

Optimized blend using Nelder-Mead:
Optimal weights:
utility_agent1_pred_catboost: 0.4806
utility_agent1_pred_mlp: 0.1569
utility_agent1_pred_1dcnn: 0.1763
utility_agent1_pred_gandalf: 0.1861

Scores per fold: ['0.4146', '0.3996', '0.4066', '0.4051', '0.4106']
Average score: 0.4073
----------------------------------------------------------------------------------------------------
Optimizing blend using Powell:

Optimized blend using Powell:
Optimal weights:
utility_agent1_pred_catboost: 0.4804
utility_agent1_pred_mlp: 0.1572
utility_agent1_pred_1dcnn: 0.1764
utility_agent1_pred_gandalf: 0.1861

Scores per fold: ['0.4146', '0.3996', '0.4066', '0.4051', '0.4106']
Average score: 0.4073
----------------------------------------------------------------------------------------------------
Optimizing blend using CG:

Optimized blend using CG:
Optimal weights

In [6]:
# optimize using optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)

def objective(trial):
    # Generate weights that sum to 1 using softmax
    raw_weights = [trial.suggest_float(f'w_{i}', -10, 10) for i in range(len(pred_columns))]
    weights = np.exp(raw_weights) / np.sum(np.exp(raw_weights))
    return calculate_blend_rmse(weights, pred_columns, combined_df)

# Create and run study
# First 1000 trials with QMC sampler
study_qmc = optuna.create_study(
    study_name="blend_optuna",
    direction='minimize',
    load_if_exists=False,
    sampler=optuna.samplers.QMCSampler()
)
study_qmc.optimize(objective, n_trials=1000)

# Next 1000 trials with TPE sampler, starting from QMC results
study_tpe = optuna.create_study(
    study_name="blend_optuna",
    direction='minimize',
    load_if_exists=True,
    sampler=optuna.samplers.TPESampler(
        n_startup_trials=1,    # Increase random sampling at start
        n_ei_candidates=100,   # Consider more candidates
        multivariate=True,     # Enable multivariate sampling
        constant_liar=True     # Help with parallel optimization
    )
)

# Transfer trials from QMC study to TPE study
for trial in study_qmc.trials:
    if trial.state == optuna.trial.TrialState.COMPLETE:
        study_tpe.add_trial(trial)

# Continue optimization with TPE sampler
study_tpe.optimize(objective, n_trials=2000)

# Get best weights using softmax
raw_weights = [study_tpe.best_params[f'w_{i}'] for i in range(len(pred_columns))]
optimal_weights = np.exp(raw_weights) / np.sum(np.exp(raw_weights))

# Calculate final blended predictions with optimal weights
blended_predictions = np.zeros(len(combined_df))
for i, col in enumerate(pred_columns):
    blended_predictions += optimal_weights[i] * combined_df[col].values

# Calculate and print final scores
blend_scores = []
for _, df in combined_df.groupby("fold"):
    fold_preds = blended_predictions[df.index].clip(-1, 1)
    squared_errors = (df['utility_agent1_true'] - fold_preds)**2
    rmse = np.sqrt(squared_errors.mean())
    blend_scores.append(rmse)

print("\nOptimized blend using Optuna:")
print("Optimal weights:")
for col, weight in zip(pred_columns, optimal_weights):
    print(f"{col}: {weight:.4f}")
print("\nScores per fold:", [f"{score:.4f}" for score in blend_scores])
print("Average score: {:.4f}".format(np.mean(blend_scores)))

  sampler=optuna.samplers.QMCSampler()



Optimized blend using Optuna:
Optimal weights:
utility_agent1_pred_catboost: 0.9976
utility_agent1_pred_mlp: 0.0000
utility_agent1_pred_1dcnn: 0.0000
utility_agent1_pred_gandalf: 0.0024

Scores per fold: ['0.4213', '0.4072', '0.4117', '0.4141', '0.4202']
Average score: 0.4149


In [7]:
# from sklearn.linear_model import LinearRegression, Ridge, RidgeCV, LassoCV, ElasticNetCV
 
# # Create a linear blend of all predictions (excluding dt_nn) using sklearn LinearRegression
# pred_columns = [
#     col for col in combined_df.columns 
#     if 'pred' in col
#     and 'dt_nn' not in col
# ]

# # Split data by fold and fit/predict on each fold
# blend_scores = []
# blended_predictions = np.zeros(len(combined_df))

# for fold in combined_df['fold'].unique():
#     # Split into train/test for this fold
#     train_mask = combined_df['fold'] != fold
#     test_mask = combined_df['fold'] == fold
    
#     X_train = combined_df.loc[train_mask, pred_columns]
#     y_train = combined_df.loc[train_mask, 'utility_agent1_true']
#     X_test = combined_df.loc[test_mask, pred_columns]
#     y_test = combined_df.loc[test_mask, 'utility_agent1_true']
    
#     # Fit linear regression
#     lr = LinearRegression(positive=True)  # Enforce positive weights
#     # lr = Ridge(positive=True)
#     # lr = RidgeCV()
#     # lr = LassoCV()
#     # lr = ElasticNetCV()
#     lr.fit(X_train, y_train)
    
#     # Make predictions
#     fold_preds = lr.predict(X_test).clip(-1, 1)
#     blended_predictions[test_mask] = fold_preds
    
#     # Calculate score for this fold
#     squared_errors = (y_test - fold_preds)**2
#     rmse = np.sqrt(squared_errors.mean())
#     blend_scores.append(rmse)

# # Get final weights by fitting on all data
# # lr_final = LinearRegression(positive=True)
# # lr_final = RidgeCV()
# # lr_final = LassoCV()
# # lr_final = ElasticNetCV()
# # lr_final = LinearSVR()

# # lr_final.fit(combined_df[pred_columns], combined_df['utility_agent1_true'])
# # optimal_weights = lr_final.coef_
# # # Normalize weights to sum to 1
# # optimal_weights = optimal_weights / np.sum(optimal_weights)

# # print("\nOptimized blend:")
# # print("Optimal weights:")
# # for col, weight in zip(pred_columns, optimal_weights):
# #     print(f"{col}: {weight:.4f}")

# print("\nScores per fold:", [f"{score:.4f}" for score in blend_scores])
# print("Average score: {:.4f}".format(np.mean(blend_scores)))

***
## post-proc test

In [None]:
oof = pd.read_csv(path_results / "oof_catboost_cv1.csv")
oof = oof.dropna(subset=["fold"], ignore_index=True)
oof["fold"] = oof["fold"].astype(int)
oof

In [10]:
def compute_cv_score(oof_df):
    scores = list()
    for _, df in oof_df.groupby("fold"):
        pred = df["utility_agent1_pred"].clip(-1, 1)
        squared_errors = (df['utility_agent1_true'] - pred)**2
        rmse = np.sqrt(squared_errors.mean())
        scores.append(rmse)
    return np.mean(scores)

In [None]:
compute_cv_score(oof)

In [None]:
_oof = oof.copy()

w = 1.1
shift=0
p_min = -0.985
p_max = 0.985

_oof["utility_agent1_pred"] = (_oof["utility_agent1_pred"] * w + shift).clip(p_min, p_max)

compute_cv_score(_oof)

***