In [1]:
import pandas as pd
import optuna
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit, cross_validate, cross_val_score
from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score


# To avoid truncating columns
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

# For reproducibility
RANDOM_SEED = 1

# Ensures the splits are in chronological order
TSCV = TimeSeriesSplit(n_splits=5)

# For hyperparameter tuning
ITERATIONS = 100

# Number of CPU cores to use (-1 means for all cores)
N_JOBS = -1

# Load the Data

In [2]:
train = pd.read_csv("vct_data/train_preprocessed.csv")
train.head()

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team A,Team A Score,Team A Attacker Score,Team A Defender Score,Team A Overtime Score,Team B,Team B Score,Team B Attacker Score,Team B Defender Score,Team B Overtime Score,Duration,Rating_TeamA,Average Combat Score_TeamA,Kills_TeamA,Deaths_TeamA,Assists_TeamA,Kills - Deaths (KD)_TeamA,"Kill, Assist, Trade, Survive %_TeamA",Average Damage Per Round_TeamA,Headshot %_TeamA,First Kills_TeamA,First Deaths_TeamA,Kills - Deaths (FKD)_TeamA,Rating_TeamB,Average Combat Score_TeamB,Kills_TeamB,Deaths_TeamB,Assists_TeamB,Kills - Deaths (KD)_TeamB,"Kill, Assist, Trade, Survive %_TeamB",Average Damage Per Round_TeamB,Headshot %_TeamB,First Kills_TeamB,First Deaths_TeamB,Kills - Deaths (FKD)_TeamB,Loadout Value_TeamA,Remaining Credits_TeamA,Type_TeamA,Loadout Value_TeamB,Remaining Credits_TeamB,Type_TeamB,2k_TeamA,3k_TeamA,4k_TeamA,5k_TeamA,1v1_TeamA,1v2_TeamA,1v3_TeamA,1v4_TeamA,1v5_TeamA,Econ_TeamA,Spike Plants_TeamA,Spike Defuses_TeamA,2k_TeamB,3k_TeamB,4k_TeamB,5k_TeamB,1v1_TeamB,1v2_TeamB,1v3_TeamB,1v4_TeamB,1v5_TeamB,Econ_TeamB,Spike Plants_TeamB,Spike Defuses_TeamB,Elimination_TeamA,Detonated_TeamA,Defused_TeamA,Time Expiry (No Plant)_TeamA,Eliminated_TeamA,Defused Failed_TeamA,Detonation Denied_TeamA,Time Expiry (Failed to Plant)_TeamA,Elimination_TeamB,Detonated_TeamB,Defused_TeamB,Time Expiry (No Plant)_TeamB,Eliminated_TeamB,Defused Failed_TeamB,Detonation Denied_TeamB,Time Expiry (Failed to Plant)_TeamB,KDA_TeamA,Round Win %_TeamA,First Blood %_TeamA,Clutches_TeamA,Attacker Win %_TeamA,Defender Win %_TeamA,Overtime Win %_TeamA,Rating_RollAvg_TeamA,Average Combat Score_RollAvg_TeamA,Average Damage Per Round_RollAvg_TeamA,KDA_RollAvg_TeamA,"Kill, Assist, Trade, Survive %_RollAvg_TeamA",Round Win %_RollAvg_TeamA,Attacker Win %_RollAvg_TeamA,Defender Win %_RollAvg_TeamA,Overtime Win %_RollAvg_TeamA,First Blood %_RollAvg_TeamA,Headshot %_RollAvg_TeamA,Clutches_RollAvg_TeamA,Econ_RollAvg_TeamA,Recent Win %_TeamA,KDA_TeamB,Round Win %_TeamB,First Blood %_TeamB,Clutches_TeamB,Attacker Win %_TeamB,Defender Win %_TeamB,Overtime Win %_TeamB,Rating_RollAvg_TeamB,Average Combat Score_RollAvg_TeamB,Average Damage Per Round_RollAvg_TeamB,KDA_RollAvg_TeamB,"Kill, Assist, Trade, Survive %_RollAvg_TeamB",Round Win %_RollAvg_TeamB,Attacker Win %_RollAvg_TeamB,Defender Win %_RollAvg_TeamB,Overtime Win %_RollAvg_TeamB,First Blood %_RollAvg_TeamB,Headshot %_RollAvg_TeamB,Clutches_RollAvg_TeamB,Econ_RollAvg_TeamB,Recent Win %_TeamB,Team A Tournament Win %,Team A Map Win %,Team A H2H Win %,Team B Tournament Win %,Team B Map Win %,Team B H2H Win %,Map_Abyss,Map_Ascent,Map_Bind,Map_Breeze,Map_Fracture,Map_Haven,Map_Icebox,Map_Lotus,Map_Pearl,Map_Split,Map_Sunset,Team A_Encoded,Team B_Encoded,Winner
0,Champions Tour LATAM Stage 1: Challengers 1,Open Qualifier: LAS,Round of 16,Leviatán vs Furious Gaming,Ascent,Leviatán,7,6,1.0,0.0,Furious Gaming,13,7,6.0,0.0,41.883333,0.990122,186.0,60.0,76.0,22.0,-16.0,0.699855,119.6,0.25,10.0,10.0,0.0,0.990134,218.4,76.0,60.0,25.0,16.0,0.699858,132.8,0.19,10.0,10.0,0.0,17455.584114,8510.17329,Full buy: 20k+,17455.711283,8510.381277,Full buy: 20k+,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.517956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.51867,0.0,0.0,2.0,1.0,3.0,1.0,9.0,0.0,3.0,1.0,9.0,0.0,3.0,1.0,2.0,1.0,3.0,1.0,1.078947,0.35,0.5,0.0,0.5,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.683333,0.65,0.5,0.0,0.875,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.559545,0.534095,0
1,Champions Tour LATAM Stage 1: Challengers 1,Open Qualifier: LAS,Round of 16,Leviatán vs Furious Gaming,Ascent,Furious Gaming,13,7,6.0,0.0,Leviatán,7,6,1.0,0.0,41.883333,0.990122,218.4,76.0,60.0,25.0,16.0,0.699855,132.8,0.19,10.0,10.0,0.0,0.990134,186.0,60.0,76.0,22.0,-16.0,0.699858,119.6,0.25,10.0,10.0,0.0,17455.584114,8510.17329,Full buy: 20k+,17455.711283,8510.381277,Full buy: 20k+,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.517956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.51867,0.0,0.0,9.0,0.0,3.0,1.0,2.0,1.0,3.0,1.0,2.0,1.0,3.0,1.0,9.0,0.0,3.0,1.0,1.683333,0.65,0.5,0.0,0.875,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.078947,0.35,0.5,0.0,0.5,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.465905,0.440455,1
2,Champions Tour LATAM Stage 1: Challengers 1,Open Qualifier: LAS,Round of 16,Leviatán vs Furious Gaming,Bind,Leviatán,7,5,2.0,0.0,Furious Gaming,13,6,7.0,0.0,38.933333,0.990122,175.8,56.0,81.0,27.0,-25.0,0.699855,113.6,0.16,10.0,10.0,0.0,0.990134,228.8,81.0,56.0,35.0,25.0,0.699858,164.6,0.202,10.0,10.0,0.0,17455.584114,8510.17329,Full buy: 20k+,17455.711283,8510.381277,Full buy: 20k+,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.517956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.51867,0.0,0.0,4.0,3.0,0.0,0.0,8.0,0.0,5.0,0.0,8.0,0.0,5.0,0.0,4.0,3.0,0.0,0.0,1.024691,0.35,0.5,0.0,0.416667,0.25,0.0,0.990122,186.0,119.6,1.078947,0.699855,0.35,0.5,0.125,0.0,0.5,0.25,0.0,53.517956,0.0,2.071429,0.65,0.5,0.0,0.75,0.583333,0.0,0.990134,218.4,132.8,1.683333,0.699858,0.65,0.875,0.5,0.0,0.5,0.19,0.0,53.51867,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.559545,0.534095,0
3,Champions Tour LATAM Stage 1: Challengers 1,Open Qualifier: LAS,Round of 16,Leviatán vs Furious Gaming,Bind,Furious Gaming,13,6,7.0,0.0,Leviatán,7,5,2.0,0.0,38.933333,0.990122,228.8,81.0,56.0,35.0,25.0,0.699855,164.6,0.202,10.0,10.0,0.0,0.990134,175.8,56.0,81.0,27.0,-25.0,0.699858,113.6,0.16,10.0,10.0,0.0,17455.584114,8510.17329,Full buy: 20k+,17455.711283,8510.381277,Full buy: 20k+,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.517956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.51867,0.0,0.0,8.0,0.0,5.0,0.0,4.0,3.0,0.0,0.0,4.0,3.0,0.0,0.0,8.0,0.0,5.0,0.0,2.071429,0.65,0.5,0.0,0.75,0.583333,0.0,0.990122,218.4,132.8,1.683333,0.699855,0.65,0.875,0.5,0.0,0.5,0.19,0.0,53.517956,1.0,1.024691,0.35,0.5,0.0,0.416667,0.25,0.0,0.990134,186.0,119.6,1.078947,0.699858,0.35,0.5,0.125,0.0,0.5,0.25,0.0,53.51867,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.465905,0.440455,1
4,Champions Tour LATAM Stage 1: Challengers 1,Open Qualifier: LAS,Round of 16,Procyon Team vs KRÜ Esports,Bind,Procyon Team,1,1,0.0,0.0,KRÜ Esports,13,2,11.0,0.0,28.033333,0.990122,187.2,40.0,67.0,21.0,-27.0,0.699855,130.8,0.192,6.0,8.0,-2.0,0.990134,253.0,67.0,40.0,23.0,27.0,0.699858,150.0,0.314,8.0,6.0,2.0,17455.584114,8510.17329,Full buy: 20k+,17455.711283,8510.381277,Full buy: 20k+,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.517956,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,53.51867,0.0,0.0,0.0,0.0,1.0,0.0,9.0,3.0,1.0,0.0,9.0,3.0,1.0,0.0,0.0,0.0,1.0,0.0,0.910448,0.071429,0.428571,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.25,0.928571,0.571429,0.0,1.0,0.916667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.313396,0


In [3]:
test = pd.read_csv("vct_data/test_preprocessed.csv")
test.head()

Unnamed: 0,Tournament,Stage,Match Type,Match Name,Map,Team A,Team A Score,Team A Attacker Score,Team A Defender Score,Team A Overtime Score,Team B,Team B Score,Team B Attacker Score,Team B Defender Score,Team B Overtime Score,Duration,Rating_TeamA,Average Combat Score_TeamA,Kills_TeamA,Deaths_TeamA,Assists_TeamA,Kills - Deaths (KD)_TeamA,"Kill, Assist, Trade, Survive %_TeamA",Average Damage Per Round_TeamA,Headshot %_TeamA,First Kills_TeamA,First Deaths_TeamA,Kills - Deaths (FKD)_TeamA,Rating_TeamB,Average Combat Score_TeamB,Kills_TeamB,Deaths_TeamB,Assists_TeamB,Kills - Deaths (KD)_TeamB,"Kill, Assist, Trade, Survive %_TeamB",Average Damage Per Round_TeamB,Headshot %_TeamB,First Kills_TeamB,First Deaths_TeamB,Kills - Deaths (FKD)_TeamB,Loadout Value_TeamA,Remaining Credits_TeamA,Type_TeamA,Loadout Value_TeamB,Remaining Credits_TeamB,Type_TeamB,2k_TeamA,3k_TeamA,4k_TeamA,5k_TeamA,1v1_TeamA,1v2_TeamA,1v3_TeamA,1v4_TeamA,1v5_TeamA,Econ_TeamA,Spike Plants_TeamA,Spike Defuses_TeamA,2k_TeamB,3k_TeamB,4k_TeamB,5k_TeamB,1v1_TeamB,1v2_TeamB,1v3_TeamB,1v4_TeamB,1v5_TeamB,Econ_TeamB,Spike Plants_TeamB,Spike Defuses_TeamB,Elimination_TeamA,Detonated_TeamA,Defused_TeamA,Time Expiry (No Plant)_TeamA,Eliminated_TeamA,Defused Failed_TeamA,Detonation Denied_TeamA,Time Expiry (Failed to Plant)_TeamA,Elimination_TeamB,Detonated_TeamB,Defused_TeamB,Time Expiry (No Plant)_TeamB,Eliminated_TeamB,Defused Failed_TeamB,Detonation Denied_TeamB,Time Expiry (Failed to Plant)_TeamB,KDA_TeamA,Round Win %_TeamA,First Blood %_TeamA,Clutches_TeamA,Attacker Win %_TeamA,Defender Win %_TeamA,Overtime Win %_TeamA,Rating_RollAvg_TeamA,Average Combat Score_RollAvg_TeamA,Average Damage Per Round_RollAvg_TeamA,KDA_RollAvg_TeamA,"Kill, Assist, Trade, Survive %_RollAvg_TeamA",Round Win %_RollAvg_TeamA,Attacker Win %_RollAvg_TeamA,Defender Win %_RollAvg_TeamA,Overtime Win %_RollAvg_TeamA,First Blood %_RollAvg_TeamA,Headshot %_RollAvg_TeamA,Clutches_RollAvg_TeamA,Econ_RollAvg_TeamA,Recent Win %_TeamA,KDA_TeamB,Round Win %_TeamB,First Blood %_TeamB,Clutches_TeamB,Attacker Win %_TeamB,Defender Win %_TeamB,Overtime Win %_TeamB,Rating_RollAvg_TeamB,Average Combat Score_RollAvg_TeamB,Average Damage Per Round_RollAvg_TeamB,KDA_RollAvg_TeamB,"Kill, Assist, Trade, Survive %_RollAvg_TeamB",Round Win %_RollAvg_TeamB,Attacker Win %_RollAvg_TeamB,Defender Win %_RollAvg_TeamB,Overtime Win %_RollAvg_TeamB,First Blood %_RollAvg_TeamB,Headshot %_RollAvg_TeamB,Clutches_RollAvg_TeamB,Econ_RollAvg_TeamB,Recent Win %_TeamB,Team A Tournament Win %,Team A Map Win %,Team A H2H Win %,Team B Tournament Win %,Team B Map Win %,Team B H2H Win %,Map_Abyss,Map_Ascent,Map_Bind,Map_Breeze,Map_Fracture,Map_Haven,Map_Icebox,Map_Lotus,Map_Pearl,Map_Split,Map_Sunset,Team A_Encoded,Team B_Encoded,Winner
0,Champions Tour Malaysia & Singapore Stage 2: C...,Group Stage,Day 7,BLEED vs Galaxy Esports,Icebox,Galaxy Esports,13,4,9.0,0.0,BLEED,7,3,4.0,0.0,43.333333,1.164,222.2,82.0,60.0,18.0,22.0,0.73,145.6,0.226,11.0,9.0,2.0,0.806,179.2,60.0,82.0,24.0,-22.0,0.63,119.2,0.24,9.0,11.0,-2.0,19605.0,10715.0,Full buy: 20k+,15860.0,4590.0,Semi-buy: 10-20k,14.0,7.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,67.2,5.0,2.0,10.0,2.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,46.2,5.0,0.0,10.0,1.0,2.0,0.0,4.0,2.0,0.0,1.0,4.0,2.0,0.0,1.0,10.0,1.0,2.0,0.0,1.666667,0.65,0.55,1.0,0.5,0.75,0.0,0.924,194.4,125.38,1.301095,0.7012,0.440863,0.45,0.463308,0.0,0.513037,0.2098,1.6,50.04,0.4,1.02439,0.35,0.45,3.0,0.25,0.5,0.0,0.971,197.86,129.62,1.379461,0.7036,0.489425,0.5625,0.436111,0.025,0.475159,0.2452,1.8,52.34,0.6,0.5,0.2,0.5,0.571429,0.666667,0.5,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.447334,0.414839,1
1,Champions Tour Malaysia & Singapore Stage 2: C...,Group Stage,Day 8,BLEED vs KPMOONIIBLM9,Ascent,BLEED,11,5,6.0,0.0,KPMOONIIBLM9,13,6,7.0,0.0,46.366667,0.92,196.2,80.0,86.0,35.0,-6.0,0.684,131.0,0.23,9.0,15.0,-6.0,1.128,205.8,86.0,81.0,43.0,5.0,0.726,143.2,0.29,15.0,9.0,6.0,17050.0,8370.833333,Full buy: 20k+,18195.833333,11104.166667,Full buy: 20k+,12.0,5.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,54.2,6.0,2.0,17.0,4.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,59.2,7.0,3.0,8.0,1.0,2.0,0.0,10.0,0.0,3.0,0.0,10.0,0.0,3.0,0.0,8.0,1.0,2.0,0.0,1.337209,0.458333,0.375,1.0,0.416667,0.5,0.0,0.9364,194.06,127.78,1.312059,0.6876,0.459425,0.5,0.436111,0.025,0.460159,0.2482,1.8,51.3,0.3,1.592593,0.541667,0.625,1.0,0.5,0.583333,0.0,1.0512,211.96,136.7,1.607471,0.735,0.524396,0.545014,0.516667,0.0,0.550017,0.251,1.4,57.5,0.4,0.533333,0.666667,1.0,0.68,0.666667,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.57295,0.371567,0
2,Champions Tour Malaysia & Singapore Stage 2: C...,Group Stage,Day 8,BLEED vs KPMOONIIBLM9,Ascent,KPMOONIIBLM9,13,6,7.0,0.0,BLEED,11,5,6.0,0.0,46.366667,1.128,205.8,86.0,81.0,43.0,5.0,0.726,143.2,0.29,15.0,9.0,6.0,0.92,196.2,80.0,86.0,35.0,-6.0,0.684,131.0,0.23,9.0,15.0,-6.0,18195.833333,11104.166667,Full buy: 20k+,17050.0,8370.833333,Full buy: 20k+,17.0,4.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,59.2,7.0,3.0,12.0,5.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,54.2,6.0,2.0,10.0,0.0,3.0,0.0,8.0,1.0,2.0,0.0,8.0,1.0,2.0,0.0,10.0,0.0,3.0,0.0,1.592593,0.541667,0.625,1.0,0.5,0.583333,0.0,1.0512,211.96,136.7,1.607471,0.735,0.524396,0.545014,0.516667,0.0,0.550017,0.251,1.4,57.5,0.6,1.337209,0.458333,0.375,1.0,0.416667,0.5,0.0,0.9364,194.06,127.78,1.312059,0.6876,0.459425,0.5,0.436111,0.025,0.460159,0.2482,1.8,51.3,0.7,0.68,0.666667,0.0,0.533333,0.666667,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.628433,0.414839,1
3,Champions Tour Malaysia & Singapore Stage 2: C...,Group Stage,Day 8,BLEED vs KPMOONIIBLM9,Icebox,BLEED,10,5,5.0,0.0,KPMOONIIBLM9,13,6,7.0,0.0,47.7,0.898,199.4,79.0,86.0,27.0,-7.0,0.68,131.4,0.244,16.0,7.0,9.0,1.054,208.4,86.0,79.0,28.0,7.0,0.672,134.6,0.246,7.0,16.0,-9.0,17465.217391,5791.304348,Full buy: 20k+,17930.434783,9091.304348,Full buy: 20k+,12.0,3.0,2.0,1.0,2.0,2.0,0.0,0.0,0.0,52.4,9.0,0.0,10.0,10.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,52.8,7.0,4.0,8.0,1.0,0.0,1.0,8.0,1.0,4.0,0.0,8.0,1.0,4.0,0.0,8.0,1.0,0.0,1.0,1.232558,0.434783,0.695652,4.0,0.416667,0.454545,0.0,0.9648,198.04,130.76,1.367839,0.6984,0.486508,0.491667,0.477778,0.025,0.472659,0.243,1.9,52.7,0.3,1.443038,0.565217,0.304348,2.0,0.545455,0.583333,0.0,1.0178,207.98,134.68,1.434472,0.7212,0.497312,0.520014,0.475,0.0,0.531267,0.2582,1.4,54.7,0.4,0.5,0.6,0.5,0.692308,0.7,0.5,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.57295,0.371567,0
4,Champions Tour Malaysia & Singapore Stage 2: C...,Group Stage,Day 8,BLEED vs KPMOONIIBLM9,Icebox,KPMOONIIBLM9,13,6,7.0,0.0,BLEED,10,5,5.0,0.0,47.7,1.054,208.4,86.0,79.0,28.0,7.0,0.672,134.6,0.246,7.0,16.0,-9.0,0.898,199.4,79.0,86.0,27.0,-7.0,0.68,131.4,0.244,16.0,7.0,9.0,17930.434783,9091.304348,Full buy: 20k+,17465.217391,5791.304348,Full buy: 20k+,10.0,10.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,52.8,7.0,4.0,12.0,3.0,2.0,1.0,2.0,2.0,0.0,0.0,0.0,52.4,9.0,0.0,8.0,1.0,4.0,0.0,8.0,1.0,0.0,1.0,8.0,1.0,0.0,1.0,8.0,1.0,4.0,0.0,1.443038,0.565217,0.304348,2.0,0.545455,0.583333,0.0,1.0178,207.98,134.68,1.434472,0.7212,0.497312,0.520014,0.475,0.0,0.531267,0.2582,1.4,54.7,0.6,1.232558,0.434783,0.695652,4.0,0.416667,0.454545,0.0,0.9648,198.04,130.76,1.367839,0.6984,0.486508,0.491667,0.477778,0.025,0.472659,0.243,1.9,52.7,0.7,0.692308,0.7,0.5,0.5,0.6,0.5,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.628433,0.414839,1


# Data Preprocessing

## Extract features and target

In [4]:
# Commented out features were not selected by Lasso

features = [
    "Team A_Encoded", 
    "Team B_Encoded",

    "Team A H2H Win %",
    "Team B H2H Win %",

    "Team A Tournament Win %",
    "Team B Tournament Win %",

    "Recent Win %_TeamA",
    "Recent Win %_TeamB",

    # "Map_Abyss",
    # "Map_Ascent",
    # "Map_Bind",
    # "Map_Breeze",
    # "Map_Fracture",
    # "Map_Haven",
    # "Map_Icebox",
    # "Map_Lotus",
    # "Map_Pearl",
    # "Map_Split",
    # "Map_Sunset",

    "Team A Map Win %",
    "Team B Map Win %",

    "Round Win %_RollAvg_TeamA",
    "Round Win %_RollAvg_TeamB",

    "Attacker Win %_RollAvg_TeamA",
    "Attacker Win %_RollAvg_TeamB",

    # "Defender Win %_RollAvg_TeamA",
    # "Defender Win %_RollAvg_TeamB",

    "Overtime Win %_RollAvg_TeamA",
    "Overtime Win %_RollAvg_TeamB",

    # "Rating_RollAvg_TeamA",
    # "Rating_RollAvg_TeamB",

    # "Average Combat Score_RollAvg_TeamA",
    # "Average Combat Score_RollAvg_TeamB", 

    # "Average Damage Per Round_RollAvg_TeamA",
    # "Average Damage Per Round_RollAvg_TeamB",

    "KDA_RollAvg_TeamA",
    "KDA_RollAvg_TeamB",

    # "Kill, Assist, Trade, Survive %_RollAvg_TeamA", 
    # "Kill, Assist, Trade, Survive %_RollAvg_TeamB",

    "First Blood %_RollAvg_TeamA",
    "First Blood %_RollAvg_TeamB",

    "Headshot %_RollAvg_TeamA",
    "Headshot %_RollAvg_TeamB",

    "Clutches_RollAvg_TeamA",
    "Clutches_RollAvg_TeamB",

    "Econ_RollAvg_TeamA",
    "Econ_RollAvg_TeamB",
]

print("Features:", len(features))

Features: 26


In [5]:
X_train = train[features]
y_train = train["Winner"]

X_train.head()

Unnamed: 0,Team A_Encoded,Team B_Encoded,Team A H2H Win %,Team B H2H Win %,Team A Tournament Win %,Team B Tournament Win %,Recent Win %_TeamA,Recent Win %_TeamB,Team A Map Win %,Team B Map Win %,Round Win %_RollAvg_TeamA,Round Win %_RollAvg_TeamB,Attacker Win %_RollAvg_TeamA,Attacker Win %_RollAvg_TeamB,Overtime Win %_RollAvg_TeamA,Overtime Win %_RollAvg_TeamB,KDA_RollAvg_TeamA,KDA_RollAvg_TeamB,First Blood %_RollAvg_TeamA,First Blood %_RollAvg_TeamB,Headshot %_RollAvg_TeamA,Headshot %_RollAvg_TeamB,Clutches_RollAvg_TeamA,Clutches_RollAvg_TeamB,Econ_RollAvg_TeamA,Econ_RollAvg_TeamB
0,0.559545,0.534095,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.465905,0.440455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.559545,0.534095,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.35,0.65,0.5,0.875,0.0,0.0,1.078947,1.683333,0.5,0.5,0.25,0.19,0.0,0.0,53.517956,53.51867
3,0.465905,0.440455,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.65,0.35,0.875,0.5,0.0,0.0,1.683333,1.078947,0.5,0.5,0.19,0.25,0.0,0.0,53.517956,53.51867
4,0.0,0.313396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
X_test = test[features]
y_test = test["Winner"]

X_test.head()

Unnamed: 0,Team A_Encoded,Team B_Encoded,Team A H2H Win %,Team B H2H Win %,Team A Tournament Win %,Team B Tournament Win %,Recent Win %_TeamA,Recent Win %_TeamB,Team A Map Win %,Team B Map Win %,Round Win %_RollAvg_TeamA,Round Win %_RollAvg_TeamB,Attacker Win %_RollAvg_TeamA,Attacker Win %_RollAvg_TeamB,Overtime Win %_RollAvg_TeamA,Overtime Win %_RollAvg_TeamB,KDA_RollAvg_TeamA,KDA_RollAvg_TeamB,First Blood %_RollAvg_TeamA,First Blood %_RollAvg_TeamB,Headshot %_RollAvg_TeamA,Headshot %_RollAvg_TeamB,Clutches_RollAvg_TeamA,Clutches_RollAvg_TeamB,Econ_RollAvg_TeamA,Econ_RollAvg_TeamB
0,0.447334,0.414839,0.5,0.5,0.5,0.571429,0.4,0.6,0.2,0.666667,0.440863,0.489425,0.45,0.5625,0.0,0.025,1.301095,1.379461,0.513037,0.475159,0.2098,0.2452,1.6,1.8,50.04,52.34
1,0.57295,0.371567,1.0,0.0,0.533333,0.68,0.3,0.4,0.666667,0.666667,0.459425,0.524396,0.5,0.545014,0.025,0.0,1.312059,1.607471,0.460159,0.550017,0.2482,0.251,1.8,1.4,51.3,57.5
2,0.628433,0.414839,0.0,1.0,0.68,0.533333,0.6,0.7,0.666667,0.666667,0.524396,0.459425,0.545014,0.5,0.0,0.025,1.607471,1.312059,0.550017,0.460159,0.251,0.2482,1.4,1.8,57.5,51.3
3,0.57295,0.371567,0.5,0.5,0.5,0.692308,0.3,0.4,0.6,0.7,0.486508,0.497312,0.491667,0.520014,0.025,0.0,1.367839,1.434472,0.472659,0.531267,0.243,0.2582,1.9,1.4,52.7,54.7
4,0.628433,0.414839,0.5,0.5,0.692308,0.5,0.6,0.7,0.7,0.6,0.497312,0.486508,0.520014,0.491667,0.0,0.025,1.434472,1.367839,0.531267,0.472659,0.2582,0.243,1.4,1.9,54.7,52.7


## Feature selection

In [7]:
# Lasso (L1 regularization)
scaler = StandardScaler()
X_train_scaled_lasso = scaler.fit_transform(X_train)

lasso = Lasso(alpha=0.001)  # This value gave the best results
lasso.fit(X_train_scaled_lasso, y_train)

feature_importance = pd.Series(lasso.coef_, index=X_train.columns)
selected_features = feature_importance[feature_importance != 0]

print("Selected Features:")
print(selected_features)

Selected Features:
Team A_Encoded                  0.048353
Team B_Encoded                  0.047219
Team A H2H Win %                0.051635
Team B H2H Win %               -0.052689
Team A Tournament Win %         0.012959
Team B Tournament Win %        -0.007982
Recent Win %_TeamA             -0.029437
Recent Win %_TeamB             -0.001861
Team A Map Win %                0.030551
Team B Map Win %               -0.029192
Round Win %_RollAvg_TeamA       0.088393
Round Win %_RollAvg_TeamB      -0.057116
Attacker Win %_RollAvg_TeamA    0.001868
Attacker Win %_RollAvg_TeamB   -0.002243
Overtime Win %_RollAvg_TeamA   -0.003405
Overtime Win %_RollAvg_TeamB    0.005126
KDA_RollAvg_TeamA               0.032866
KDA_RollAvg_TeamB              -0.034101
First Blood %_RollAvg_TeamA     0.001373
First Blood %_RollAvg_TeamB    -0.002803
Headshot %_RollAvg_TeamA        0.013860
Headshot %_RollAvg_TeamB       -0.015873
Clutches_RollAvg_TeamA         -0.018875
Clutches_RollAvg_TeamB          0.0191

## Scaling features (for Logistic Regression only)

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Selection

We will be using the following models:
- **Logistic Regression**: This is a simple, linear model used for binary classification. It estimates the probability that a given input belongs to a particular class, making it ideal for tasks where the output is categorical. In this case, the model predicts the log-odds of the binary outcome (win/loss). Despite its simplicity, logistic regression performs well when there is a linear relationship between the input features and the log-odds of the outcome.

- **Random Forest**: This is an ensemble learning method that builds multiple decision trees and combines their predictions to improve accuracy and robustness. Each tree is trained on a random subset of the data, and the final prediction is made by averaging or voting the individual trees' outputs. This approach helps mitigate overfitting and enhances performance, particularly when dealing with non-linear data.

- **XGBoost**: This is a powerful and efficient implementation of gradient-boosted decision trees. It works by sequentially training trees, where each new tree attempts to correct the errors of the previous ones. XGBoost is known for its high performance in machine learning competitions due to its speed, regularization techniques, and ability to handle large datasets and complex patterns in the data.

# Model Training and Evaluation

Function for cross-validating the models

In [9]:
SCORING = ["accuracy", "precision", "recall", "f1", "roc_auc"]

def evaluate_model(model, X_train, y_train, cv=5, scoring=SCORING):
    tscv = TimeSeriesSplit(n_splits=cv)
    scores = cross_validate(model, X_train, y_train, cv=tscv, scoring=scoring)

    for metric in SCORING:
        print(f"{metric}: {scores[f'test_{metric}'].mean()}")


def predict_test(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Precision:", precision_score(y_test, y_pred))
    print("Recall:", recall_score(y_test, y_pred))
    print("F1:", f1_score(y_test, y_pred))
    print("ROC AUC:", roc_auc_score(y_test, y_pred_proba))

## Training baseline models

In [10]:
logreg_base = LogisticRegression(random_state=RANDOM_SEED, max_iter=2000)
rf_base = RandomForestClassifier(random_state=RANDOM_SEED)
xgb_base = XGBClassifier(random_state=RANDOM_SEED)

In [11]:

print("Logistic Regression Baseline")
evaluate_model(logreg_base, X_train_scaled, y_train)

Logistic Regression Baseline
accuracy: 0.6580874722016309
precision: 0.6549252043742148
recall: 0.6690181150360532
f1: 0.6618001499723334
roc_auc: 0.7262363549912625


In [12]:
logreg_base.fit(X_train_scaled, y_train)

print("Logistic Regression Test")
predict_test(logreg_base, X_test_scaled, y_test)

Logistic Regression Test
Accuracy: 0.6080426835292955
Precision: 0.6064616582327754
Recall: 0.6156885990910888
F1: 0.6110402980684381
ROC AUC: 0.6530414516027001


In [13]:
print("Random Forest Baseline")
evaluate_model(rf_base, X_train, y_train)

Random Forest Baseline
accuracy: 0.6692068198665678
precision: 0.6721479302565166
recall: 0.6609528285994529
f1: 0.6663698382572055
roc_auc: 0.7394835795668555


In [14]:
rf_base.fit(X_train, y_train)

print("Random Forest Test")
predict_test(rf_base, X_test, y_test)

Random Forest Test
Accuracy: 0.6068570299377531
Precision: 0.6121475953565506
Recall: 0.5834815253902391
F1: 0.5974709155285787
ROC AUC: 0.654282301377737


In [15]:
print("XGBoost Baseline")
evaluate_model(xgb_base, X_train, y_train)

XGBoost Baseline
accuracy: 0.6450704225352112
precision: 0.6447617431977988
recall: 0.6463651761124469
f1: 0.6452822052051533
roc_auc: 0.7140792979909187


In [16]:
xgb_base.fit(X_train, y_train)

print("XGBoost Test")
predict_test(xgb_base, X_test, y_test)

XGBoost Test
Accuracy: 0.5999407173204229
Precision: 0.6016472478907192
Recall: 0.5917802805769611
F1: 0.5966729753959558
ROC AUC: 0.6417090546713495


## Hyperparameter tuning (Optuna)

In [17]:
# Random Forest Tuning
def tune_rf(X_train, y_train, tscv, scoring, random_state, n_iter=100):
    
    def rf_objective(trial):
        rf_params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
            "max_depth": trial.suggest_int("max_depth", 5, 50),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 32),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 32),
            "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None]),
        }

        rf = RandomForestClassifier(random_state=random_state, n_jobs=-1, bootstrap=True, **rf_params)
        scores = cross_val_score(rf, X_train, y_train, cv=tscv, scoring=scoring)
        return scores.mean()
    
    study = optuna.create_study(direction="maximize")
    study.optimize(rf_objective, n_trials=n_iter)
    return study.best_params, study.best_value


# Tune model
rf_params_optuna, rf_score_optuna = tune_rf(X_train, y_train, TSCV, "accuracy", RANDOM_SEED)

# Display results
print("\nRandom Forest Optuna Tuned")
print("Best params:", rf_params_optuna)
print("Best score:", rf_score_optuna)


[I 2024-09-23 05:54:42,794] A new study created in memory with name: no-name-80333f1a-c11a-416c-aff5-ea1a1499ea08
[I 2024-09-23 05:54:54,205] Trial 0 finished with value: 0.6779836916234248 and parameters: {'n_estimators': 395, 'max_depth': 28, 'min_samples_split': 19, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 0 with value: 0.6779836916234248.
[I 2024-09-23 05:55:12,922] Trial 1 finished with value: 0.6789028910303928 and parameters: {'n_estimators': 777, 'max_depth': 10, 'min_samples_split': 18, 'min_samples_leaf': 20, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.6789028910303928.
[I 2024-09-23 05:55:20,622] Trial 2 finished with value: 0.6779243884358784 and parameters: {'n_estimators': 265, 'max_depth': 27, 'min_samples_split': 25, 'min_samples_leaf': 6, 'max_features': 'log2'}. Best is trial 1 with value: 0.6789028910303928.
[I 2024-09-23 05:55:35,940] Trial 3 finished with value: 0.6758191252779837 and parameters: {'n_estimators': 826, 'max_depth': 8,


Random Forest Optuna Tuned
Best params: {'n_estimators': 816, 'max_depth': 33, 'min_samples_split': 3, 'min_samples_leaf': 10, 'max_features': 'log2'}
Best score: 0.6805930318754634


In [18]:
rf_tuned_optuna = RandomForestClassifier(random_state=RANDOM_SEED, n_jobs=-1, **rf_params_optuna)

print("Random Forest Optuna Tuned")
evaluate_model(rf_tuned_optuna, X_train, y_train)

Random Forest Optuna Tuned
accuracy: 0.6805930318754634
precision: 0.6785099210562536
recall: 0.687937476415003
f1: 0.683061404582781
roc_auc: 0.7527587896205967


In [19]:
# XGBoost Tuning
def tune_xgb(X_train, y_train, tscv, scoring, random_state, n_iter=100):
    
    def xgb_objective(trial):
        xgb_params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
            "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1, log=True),
            "max_depth": trial.suggest_int("max_depth", 3, 20),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.4, 1.0),
            "gamma": trial.suggest_float("gamma", 0, 5),
            "reg_alpha": trial.suggest_float("reg_alpha", 1e-5, 10.0, log=True),
            "reg_lambda": trial.suggest_float("reg_lambda", 1e-5, 10.0, log=True),
        }

        xgb = XGBClassifier(random_state=random_state, n_jobs=-1, **xgb_params)
        scores = cross_val_score(xgb, X_train, y_train, cv=tscv, scoring=scoring)
        return scores.mean()
    
    study = optuna.create_study(direction="maximize")
    study.optimize(xgb_objective, n_trials=n_iter)
    return study.best_params, study.best_value

# Tune model
xgb_params_optuna, xgb_score_optuna = tune_xgb(X_train, y_train, TSCV, "accuracy", RANDOM_SEED)

# Display results
print("\nXGBoost Optuna Tuned")
print("Best params:", xgb_params_optuna)
print("Best score:", xgb_score_optuna)

[I 2024-09-23 06:36:22,087] A new study created in memory with name: no-name-3027e483-df8b-4016-897a-189a75381cb5
[I 2024-09-23 06:36:44,967] Trial 0 finished with value: 0.674959229058562 and parameters: {'n_estimators': 171, 'learning_rate': 0.001764803153496475, 'max_depth': 18, 'subsample': 0.8580789884079807, 'colsample_bytree': 0.7334438817232057, 'gamma': 3.8601358111275714, 'reg_alpha': 0.019809583766441697, 'reg_lambda': 0.016245896691878446}. Best is trial 0 with value: 0.674959229058562.
[I 2024-09-23 06:38:03,218] Trial 1 finished with value: 0.6726760563380283 and parameters: {'n_estimators': 819, 'learning_rate': 0.0013976508597788777, 'max_depth': 15, 'subsample': 0.9061507175208711, 'colsample_bytree': 0.9563503455841728, 'gamma': 3.782630701302476, 'reg_alpha': 3.7558389759349234e-05, 'reg_lambda': 1.1742748395596316e-05}. Best is trial 0 with value: 0.674959229058562.
[I 2024-09-23 06:38:05,005] Trial 2 finished with value: 0.6745737583395107 and parameters: {'n_estim


XGBoost Optuna Tuned
Best params: {'n_estimators': 250, 'learning_rate': 0.011122684366810513, 'max_depth': 17, 'subsample': 0.9307675383456219, 'colsample_bytree': 0.5385619080086272, 'gamma': 4.0433339169224904, 'reg_alpha': 0.8586846894671879, 'reg_lambda': 6.41536777771546}
Best score: 0.6817197924388436


In [20]:
xgb_tuned_optuna = XGBClassifier(random_state=RANDOM_SEED, n_jobs=-1, **xgb_params_optuna)

print("\nXGBoost Optuna Tuned")
evaluate_model(xgb_tuned_optuna, X_train, y_train)


XGBoost Optuna Tuned
accuracy: 0.6817197924388436
precision: 0.6841628861006839
recall: 0.6760158385673123
f1: 0.6799024109945452
roc_auc: 0.7540751357774864


In [21]:
# Logistic Regression Tuning
def tune_logreg(X_train, y_train, tscv, scoring, random_state, n_iter=100):

    def logreg_objective(trial):
        logreg_params = {
            "C": trial.suggest_float("C", 1e-5, 100, log=True),
            "penalty": trial.suggest_categorical("penalty", ["l1", "l2"]),
            "solver": trial.suggest_categorical("solver", ["liblinear", "saga"]),
        }

        logreg = LogisticRegression(random_state=random_state, max_iter=2000, **logreg_params)
        scores = cross_val_score(logreg, X_train, y_train, cv=tscv, scoring=scoring)
        return scores.mean()
    
    study = optuna.create_study(direction="maximize")
    study.optimize(logreg_objective, n_trials=n_iter)
    return study.best_params, study.best_value


# Tune model
logreg_params_optuna, logreg_score_optuna = tune_logreg(X_train_scaled, y_train, TSCV, "accuracy", RANDOM_SEED)

# Display results
print("Logistic Regression Optuna Tuned")
print("Best params:", logreg_params_optuna)
print("Best score:", logreg_score_optuna)

[I 2024-09-23 06:54:49,930] A new study created in memory with name: no-name-911e8d41-efa7-4e8c-b3aa-ec00af71ab72
[I 2024-09-23 06:54:50,898] Trial 0 finished with value: 0.6577020014825796 and parameters: {'C': 3.954704296203501, 'penalty': 'l1', 'solver': 'saga'}. Best is trial 0 with value: 0.6577020014825796.
[I 2024-09-23 06:54:52,447] Trial 1 finished with value: 0.6577316530763528 and parameters: {'C': 22.486600865887873, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 1 with value: 0.6577316530763528.
[I 2024-09-23 06:54:53,917] Trial 2 finished with value: 0.6617049666419571 and parameters: {'C': 0.24204152499746576, 'penalty': 'l1', 'solver': 'liblinear'}. Best is trial 2 with value: 0.6617049666419571.
[I 2024-09-23 06:54:54,796] Trial 3 finished with value: 0.6607857672349889 and parameters: {'C': 0.3114511833445422, 'penalty': 'l1', 'solver': 'saga'}. Best is trial 2 with value: 0.6617049666419571.
[I 2024-09-23 06:54:55,447] Trial 4 finished with value: 0.658680504

Logistic Regression Optuna Tuned
Best params: {'C': 0.0015237843796876871, 'penalty': 'l2', 'solver': 'liblinear'}
Best score: 0.665114899925871


In [22]:
logreg_tuned_optuna = LogisticRegression(random_state=RANDOM_SEED, max_iter=2000, **logreg_params_optuna)

print("Logistic Regression Optuna Tuned")
evaluate_model(logreg_tuned_optuna, X_train_scaled, y_train)

Logistic Regression Optuna Tuned
accuracy: 0.665114899925871
precision: 0.6693956233694612
recall: 0.6519969304774957
f1: 0.660545384045801
roc_auc: 0.7316242673044859


## Model Stacking

In [None]:
# Level 1 models that train on the train set
base_models = [
    ("rf", rf_tuned_optuna),
    ("xgb", xgb_tuned_optuna)
]

# Trains on the predictions of the base models
meta_model = LogisticRegression(random_state=RANDOM_SEED, max_iter=2000)
# meta_model = RandomForestClassifier(random_state=RANDOM_SEED, n_jobs=-1, **rf_params_optuna)
# meta_model = XGBClassifier(random_state=RANDOM_SEED, **xgb_params_optuna)

# Leverages the predictions of the base models to improve predictive performance
stacking_model = StackingClassifier(estimators=base_models, 
                                    final_estimator=meta_model,
                                    n_jobs=-1)

print("Stacking Classifier")
evaluate_model(stacking_model, X_train, y_train)

# Saving Tuned Models

In [23]:
# Random Forest Optuna
rf_tuned_optuna.fit(X_train, y_train)
joblib.dump(rf_tuned_optuna, "models/rf_optuna.pkl")
print("Model trained and saved")

Model trained and saved


In [24]:
# XGBoost Optuna
xgb_tuned_optuna.fit(X_train, y_train)
joblib.dump(xgb_tuned_optuna, "models/xgb_optuna.pkl")
print("Model trained and saved")

Model trained and saved


In [25]:
# Logistic Regression Optuna
logreg_tuned_optuna.fit(X_train_scaled, y_train)
joblib.dump(logreg_tuned_optuna, "models/logreg_optuna.pkl")
print("Model trained and saved")

Model trained and saved


In [None]:
# Stacked Classifier
stacking_model.fit(X_train, y_train)
joblib.dump(stacking_model, "models/stacked_base_rf_xgb_meta_lr_v3.pkl")
print("Model trained and saved")