# Imports and Utilities

In [1]:
from typing import List, Tuple
import numpy as np
import pandas as pd
from IPython.display import Markdown
import optuna
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, brier_score_loss
from sklearn.model_selection import cross_val_score

COMPETITION_NAME = "march-machine-learning-mania-2023"
VERBOSE = False

pd.set_option("display.max_columns", None)

def show_df(df: pd.DataFrame, name: str = "DataFrame", verbose: bool = VERBOSE):
    if verbose:
        display(Markdown(f"## {name}"))
        display(Markdown("### Head"))
        display(df.head())
        display(Markdown("### Tail"))
        display(df.tail())
        display(Markdown("### Description"))
        display(df.describe())


def extract_data(filename: str, competition_name:str = COMPETITION_NAME) -> pd.DataFrame:
    mens_filepath = f"/kaggle/input/{competition_name}/M{filename}.csv"
    womens_filepath = f"/kaggle/input/{competition_name}/W{filename}.csv"
    try:
        df_mens = pd.read_csv(mens_filepath)
    except FileNotFoundError:
        df_mens = None
    try:
        df_womens = pd.read_csv(womens_filepath)
    except FileNotFoundError:
        df_womens = None
    df = pd.concat([df_mens, df_womens])
    show_df(df, filename)
    return df


def get_team_features(detailed_results: pd.DataFrame) -> pd.DataFrame:
    df = detailed_results.copy()
    df = clean_detailed_results(df)
    df = transform_game_to_team(df)
    df = transform_team_results(df)
    show_df(df)
    return df


def get_seed_features(df_in: pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["Season"] > 2002
    df = df[mask]
    df["Seed"] = df["Seed"].str.replace(r"\D+","", regex=True)
    df["Seed"] = df["Seed"].astype(int)
    show_df(df)
    return df


def get_ranking_features(df_in:pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["RankingDayNum"] == df["RankingDayNum"].max()
    df = df[mask]
    df.drop(["SystemName", "RankingDayNum"], axis=1, inplace=True)
    df = df.groupby(["Season", "TeamID"]).agg("median")
    df = df.reset_index()
    show_df(df)
    return df


def get_game_outcomes(df):
    input_rows = df.to_records()
    output_rows = []
    for input_row in input_rows:
        output_rows.extend(parse_row(input_row))
    out_df = pd.DataFrame(output_rows)
    return out_df

def parse_row(row):
    season = row['Season']
    winning_team_id = row['WTeamID']
    losing_team_id = row['LTeamID']
    if winning_team_id < losing_team_id:
        small_id = winning_team_id
        big_id = losing_team_id
        outcome = True
    elif losing_team_id < winning_team_id:
        small_id = losing_team_id
        big_id = winning_team_id
        outcome = False
    records = [
        {
            "ID": f"{season}_{small_id}_{big_id}",
            'Season': season,
            'LowID': small_id,
            'HighID': big_id,
            'Win': outcome
        },
        {
            "ID": f"{season}_{big_id}_{small_id}",
            'Season': season,
            'LowID': big_id,
            'HighID': small_id,
            'Win': not outcome
        },
    ]
    return records


def clean_detailed_results(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(["WLoc", "DayNum"], axis=1)


def transform_game_to_team(game_results: pd.DataFrame) -> pd.DataFrame:
    winners = rename_columns(game_results, "W")
    loosers = rename_columns(game_results, "L")
    team_results = pd.concat((winners, loosers))
    team_results.drop(["TeamIDOpp"], axis=1, inplace=True)
    return team_results


def transform_team_results(df: pd.DataFrame) -> pd.DataFrame:
    df = df.groupby(["Season", "TeamID"]).median()
    df["FGP"] =  df["FGM"] / df["FGA"]
    df["FGP3"] =  df["FGM3"] / df["FGA3"]
    df["FTP"] =  df["FTM"] / df["FTA"]
    df["FGPOpp"] =  df["FGMOpp"] / df["FGAOpp"]
    df["FGP3Opp"] =  df["FGM3Opp"] / df["FGA3Opp"]
    df["FTPOpp"] =  df["FTMOpp"] / df["FTAOpp"]
    return df.reset_index()
    
    
def rename_columns(df: pd.DataFrame, team_prefix: str) -> pd.DataFrame:
    df = df.copy()
    df.columns =  (rename_column(column_name, team_prefix) for column_name in df.columns)
    return df


def rename_column(column_name: str, team_prefix: str) -> pd.DataFrame:
    if team_prefix == "W":
        opponent_prefix = "L"
    elif team_prefix == "L":
        opponent_prefix = "W"
    else:
        raise ValueError
    if column_name.startswith(team_prefix):
        column_name = column_name.lstrip(team_prefix)
    elif column_name.startswith(opponent_prefix):
        column_name = f"{column_name.lstrip(opponent_prefix)}Opp"
    return column_name


def split_winner_and_looser_columns(df: pd.DataFrame) -> Tuple[List[str], List[str]]:
    winner_columns = [name for name in df.columns if not name.startswith("L")]
    looser_columns = [name for name in df.columns if not name.startswith("W")]
    return winner_columns, looser_columns


def merge_features(
    season_features: pd.DataFrame, 
    tournament_features: pd.DataFrame, 
    seed_features: pd.DataFrame, 
    ranking_features: pd.DataFrame
) -> pd.DataFrame:
    features = pd.merge(
        season_features,
        tournament_features,
        how="inner",
        on=["Season", "TeamID"],
        suffixes=("Reg", "Tou")
    )
    features = features.merge(
        seed_features,
        how="inner",
        on=["Season", "TeamID"]
    )
    features = features.merge(
        ranking_features,
        how="left",
        on=["Season", "TeamID"],
    )
    show_df(features)
    return features


def merge_outcomes_with_features(outcomes: pd.DataFrame, features: pd.DataFrame, how: str = "inner") -> pd.DataFrame:
    feature_names = [name for name in features.columns if name not in ["Season", "TeamID", "Gender"]]
    data = pd.merge(
        outcomes, 
        features, 
        how=how, 
        left_on=["Season", "HighID"], 
        right_on=["Season", "TeamID"]
    )
    data = pd.merge(
        data, 
        features, 
        how=how, 
        left_on=["Season", "LowID"], 
        right_on=["Season", "TeamID"],
        suffixes=("High", "Low")
    )
    for name in feature_names:
        data[f"{name}Diff"] = data[f"{name}High"] - data[f"{name}Low"]
        data.drop([f"{name}High", f"{name}Low"], axis=1, inplace=True)
    data.drop(
        ["Season", "HighID", "LowID","TeamIDHigh","TeamIDLow"], 
        axis=1, 
        inplace=True
    )
    data.set_index("ID", inplace=True)
    show_df(data)
    return data


def get_submission_outcomes() -> pd.DataFrame:
    sample_submission = pd.read_csv(f"/kaggle/input/{COMPETITION_NAME}/SampleSubmission.csv")
    df = sample_submission.copy()
    df.drop("Pred", axis=1, inplace=True)
    df[["Season", "LowID", "HighID"]] = df["ID"].str.split("_", expand=True)
    df[["Season", "LowID", "HighID"]] = df[["Season", "LowID", "HighID"]].astype(int)
    show_df(df)
    return df


def objective(trial: optuna.Trial, X_train, y_train):
    params = {
        "objective": "binary",
        "metric": "l2",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }
    model = lgb.LGBMClassifier(**params)
    score = np.mean(cross_val_score(model, X_train, y_train, scoring="neg_brier_score", cv=5))
    return score


def run_study(X_train, y_train):
    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)
    return study


def train_optimized_model(study, X, y):
    params = study.best_params
    model = lgb.LGBMClassifier(
        objective="binary",
        metric="l2",
        verbosity=-1,
        boosting_type="gbdt",
        **params
    )
    model = model.fit(X, y)
    return model

# Load Data

In [2]:
season_results = extract_data("RegularSeasonDetailedResults")
tournament_results = extract_data("NCAATourneyDetailedResults")
seeds = extract_data("NCAATourneySeeds")
rankings = extract_data("MasseyOrdinals")

# Transform Data

In [3]:
season_team_features = get_team_features(season_results)

tournament_results["Season"] += 1
tournament_team_features = get_team_features(tournament_results)

seed_features = get_seed_features(seeds)

ranking_features = get_ranking_features(rankings)

## Merge features

In [4]:
features = merge_features(season_team_features, tournament_team_features, seed_features, ranking_features)

## Build Datasets

In [5]:
from sklearn.model_selection import train_test_split

data = extract_data("NCAATourneyCompactResults")
data_train, data_valid = train_test_split(data, random_state=0, test_size=0.1)

outcomes = get_game_outcomes(data)
outcomes_train = get_game_outcomes(data_train)
outcomes_valid = get_game_outcomes(data_valid)
outcomes_submission = get_submission_outcomes()

In [6]:
features_train = merge_outcomes_with_features(outcomes_train, features)
features_valid = merge_outcomes_with_features(outcomes_valid, features)
features_full = merge_outcomes_with_features(outcomes, features)

In [7]:
y_train = features_train["Win"]
X_train = features_train.drop("Win", axis=1)
y_valid = features_valid["Win"]
X_valid = features_valid.drop("Win", axis=1)
y = features_full["Win"]
X = features_full.drop("Win", axis=1)
X_submission = merge_outcomes_with_features(outcomes_submission, features, how="left").fillna(0)

# Step 4: Train a model


In [8]:
study = run_study(X_train, y_train)
study.best_params

[32m[I 2023-03-15 16:57:17,187][0m A new study created in memory with name: no-name-b4da9861-ad63-4f00-a892-15af649f63a7[0m




[32m[I 2023-03-15 16:57:17,764][0m Trial 0 finished with value: -0.22436824435217093 and parameters: {'lambda_l1': 0.001586910580501821, 'lambda_l2': 7.445561432009636e-05, 'num_leaves': 152, 'feature_fraction': 0.5291107145805203, 'bagging_fraction': 0.5713758130281501, 'bagging_freq': 4, 'min_child_samples': 33}. Best is trial 0 with value: -0.22436824435217093.[0m




[32m[I 2023-03-15 16:57:19,089][0m Trial 1 finished with value: -0.233143770524803 and parameters: {'lambda_l1': 0.0041862008615957645, 'lambda_l2': 8.000402038039354e-06, 'num_leaves': 215, 'feature_fraction': 0.4178189501652701, 'bagging_fraction': 0.7727452707355085, 'bagging_freq': 1, 'min_child_samples': 11}. Best is trial 0 with value: -0.22436824435217093.[0m




[32m[I 2023-03-15 16:57:20,431][0m Trial 2 finished with value: -0.21893480195281753 and parameters: {'lambda_l1': 0.0038646202101359204, 'lambda_l2': 2.2841281078604228, 'num_leaves': 73, 'feature_fraction': 0.7078331308744047, 'bagging_fraction': 0.8837785074467883, 'bagging_freq': 7, 'min_child_samples': 13}. Best is trial 2 with value: -0.21893480195281753.[0m




[32m[I 2023-03-15 16:57:21,218][0m Trial 3 finished with value: -0.21697488016584904 and parameters: {'lambda_l1': 1.9479250165215006e-08, 'lambda_l2': 2.8111098952197096, 'num_leaves': 158, 'feature_fraction': 0.7472595473361368, 'bagging_fraction': 0.6186495197880092, 'bagging_freq': 7, 'min_child_samples': 24}. Best is trial 3 with value: -0.21697488016584904.[0m




[32m[I 2023-03-15 16:57:23,369][0m Trial 4 finished with value: -0.23662500418453294 and parameters: {'lambda_l1': 1.0204621027020979e-07, 'lambda_l2': 0.00024913057746021525, 'num_leaves': 50, 'feature_fraction': 0.9148935664984016, 'bagging_fraction': 0.9879404299421728, 'bagging_freq': 6, 'min_child_samples': 19}. Best is trial 3 with value: -0.21697488016584904.[0m




[32m[I 2023-03-15 16:57:26,646][0m Trial 5 finished with value: -0.24193152988444422 and parameters: {'lambda_l1': 1.1999748675805883e-07, 'lambda_l2': 1.4241488996652772e-05, 'num_leaves': 76, 'feature_fraction': 0.6739772445692112, 'bagging_fraction': 0.9252792339847968, 'bagging_freq': 6, 'min_child_samples': 9}. Best is trial 3 with value: -0.21697488016584904.[0m




[32m[I 2023-03-15 16:57:27,221][0m Trial 6 finished with value: -0.21512519329141772 and parameters: {'lambda_l1': 1.4783172473330206e-05, 'lambda_l2': 2.811927915407899e-08, 'num_leaves': 52, 'feature_fraction': 0.8493777184311131, 'bagging_fraction': 0.7397514232368314, 'bagging_freq': 3, 'min_child_samples': 63}. Best is trial 6 with value: -0.21512519329141772.[0m




[32m[I 2023-03-15 16:57:27,951][0m Trial 7 finished with value: -0.21480871589311143 and parameters: {'lambda_l1': 0.0003627605838695089, 'lambda_l2': 0.0013183772106677267, 'num_leaves': 60, 'feature_fraction': 0.9424717749687839, 'bagging_fraction': 0.8773143153178117, 'bagging_freq': 6, 'min_child_samples': 62}. Best is trial 7 with value: -0.21480871589311143.[0m




[32m[I 2023-03-15 16:57:28,412][0m Trial 8 finished with value: -0.21009023184450348 and parameters: {'lambda_l1': 0.18029233361587282, 'lambda_l2': 9.096995586796854e-07, 'num_leaves': 46, 'feature_fraction': 0.5238304890660789, 'bagging_fraction': 0.91045841079851, 'bagging_freq': 3, 'min_child_samples': 88}. Best is trial 8 with value: -0.21009023184450348.[0m




[32m[I 2023-03-15 16:57:28,792][0m Trial 9 finished with value: -0.1993835984885131 and parameters: {'lambda_l1': 0.18353900403410023, 'lambda_l2': 8.469922453216435, 'num_leaves': 198, 'feature_fraction': 0.827207724345119, 'bagging_fraction': 0.5944585283240563, 'bagging_freq': 1, 'min_child_samples': 98}. Best is trial 9 with value: -0.1993835984885131.[0m




[32m[I 2023-03-15 16:57:29,202][0m Trial 10 finished with value: -0.19762157781207929 and parameters: {'lambda_l1': 3.939304567889168, 'lambda_l2': 0.04339000435986924, 'num_leaves': 253, 'feature_fraction': 0.976584517340128, 'bagging_fraction': 0.5077749921631407, 'bagging_freq': 1, 'min_child_samples': 94}. Best is trial 10 with value: -0.19762157781207929.[0m




[32m[I 2023-03-15 16:57:29,560][0m Trial 11 finished with value: -0.19203622193457012 and parameters: {'lambda_l1': 6.23319493857854, 'lambda_l2': 0.08423262379856582, 'num_leaves': 256, 'feature_fraction': 0.9901164024747781, 'bagging_fraction': 0.4378100180820294, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 11 with value: -0.19203622193457012.[0m




[32m[I 2023-03-15 16:57:29,902][0m Trial 12 finished with value: -0.1934519919849073 and parameters: {'lambda_l1': 7.322262852210654, 'lambda_l2': 0.028417365290758376, 'num_leaves': 252, 'feature_fraction': 0.9963540755334042, 'bagging_fraction': 0.4041168731689514, 'bagging_freq': 2, 'min_child_samples': 81}. Best is trial 11 with value: -0.19203622193457012.[0m




[32m[I 2023-03-15 16:57:30,298][0m Trial 13 finished with value: -0.19837136489269347 and parameters: {'lambda_l1': 3.8376075366783655, 'lambda_l2': 0.034660984682832, 'num_leaves': 252, 'feature_fraction': 0.9982128685871977, 'bagging_fraction': 0.42601945619480713, 'bagging_freq': 2, 'min_child_samples': 75}. Best is trial 11 with value: -0.19203622193457012.[0m




[32m[I 2023-03-15 16:57:30,668][0m Trial 14 finished with value: -0.19744222559437472 and parameters: {'lambda_l1': 3.4257183262696445, 'lambda_l2': 0.017100679467324936, 'num_leaves': 203, 'feature_fraction': 0.9971512444145848, 'bagging_fraction': 0.4008681561656816, 'bagging_freq': 2, 'min_child_samples': 78}. Best is trial 11 with value: -0.19203622193457012.[0m




[32m[I 2023-03-15 16:57:31,231][0m Trial 15 finished with value: -0.211396333657719 and parameters: {'lambda_l1': 0.2751720340927538, 'lambda_l2': 0.230478528951028, 'num_leaves': 113, 'feature_fraction': 0.8887740541686776, 'bagging_fraction': 0.4825908599997769, 'bagging_freq': 2, 'min_child_samples': 42}. Best is trial 11 with value: -0.19203622193457012.[0m




[32m[I 2023-03-15 16:57:31,566][0m Trial 16 finished with value: -0.19131107681185258 and parameters: {'lambda_l1': 9.703862679584928, 'lambda_l2': 0.002448175046292767, 'num_leaves': 220, 'feature_fraction': 0.9145208963694682, 'bagging_fraction': 0.461104318804218, 'bagging_freq': 3, 'min_child_samples': 87}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:32,030][0m Trial 17 finished with value: -0.20902985413254505 and parameters: {'lambda_l1': 0.02418632053989681, 'lambda_l2': 0.00299460034185782, 'num_leaves': 7, 'feature_fraction': 0.8981600570533556, 'bagging_fraction': 0.4979987153051481, 'bagging_freq': 4, 'min_child_samples': 63}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:32,457][0m Trial 18 finished with value: -0.20666490785822306 and parameters: {'lambda_l1': 0.5256764559949183, 'lambda_l2': 0.31186142841352327, 'num_leaves': 219, 'feature_fraction': 0.7898268880235086, 'bagging_fraction': 0.6325329001031614, 'bagging_freq': 3, 'min_child_samples': 99}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:32,862][0m Trial 19 finished with value: -0.20425581453873748 and parameters: {'lambda_l1': 0.025841159273161086, 'lambda_l2': 0.0020183005389955173, 'num_leaves': 175, 'feature_fraction': 0.864505375621656, 'bagging_fraction': 0.5305823202512632, 'bagging_freq': 5, 'min_child_samples': 87}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:33,706][0m Trial 20 finished with value: -0.19476731069436856 and parameters: {'lambda_l1': 9.754568596656897, 'lambda_l2': 0.00420809704030704, 'num_leaves': 119, 'feature_fraction': 0.9401113261401259, 'bagging_fraction': 0.6702095992911739, 'bagging_freq': 5, 'min_child_samples': 47}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:34,062][0m Trial 21 finished with value: -0.1930094954073378 and parameters: {'lambda_l1': 9.451534065309923, 'lambda_l2': 0.17871424907463557, 'num_leaves': 236, 'feature_fraction': 0.9432436004393989, 'bagging_fraction': 0.45527364361770434, 'bagging_freq': 2, 'min_child_samples': 81}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:34,441][0m Trial 22 finished with value: -0.20513340276219766 and parameters: {'lambda_l1': 0.5197359543051472, 'lambda_l2': 0.41318573846165785, 'num_leaves': 224, 'feature_fraction': 0.9281755291394546, 'bagging_fraction': 0.466130030797204, 'bagging_freq': 3, 'min_child_samples': 89}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:34,901][0m Trial 23 finished with value: -0.20865753724411232 and parameters: {'lambda_l1': 1.0329894984468233, 'lambda_l2': 0.1404527312822685, 'num_leaves': 236, 'feature_fraction': 0.8267852780890455, 'bagging_fraction': 0.5462061299704217, 'bagging_freq': 1, 'min_child_samples': 70}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:35,316][0m Trial 24 finished with value: -0.19886380007925167 and parameters: {'lambda_l1': 1.4865348658993591, 'lambda_l2': 0.009537392737943368, 'num_leaves': 180, 'feature_fraction': 0.9311277553021436, 'bagging_fraction': 0.4812839413618554, 'bagging_freq': 2, 'min_child_samples': 86}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:35,656][0m Trial 25 finished with value: -0.19340521082007509 and parameters: {'lambda_l1': 8.923466787666412, 'lambda_l2': 0.0008959668103849126, 'num_leaves': 234, 'feature_fraction': 0.8752561609094802, 'bagging_fraction': 0.4422749787660176, 'bagging_freq': 3, 'min_child_samples': 71}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:36,038][0m Trial 26 finished with value: -0.20139860825652015 and parameters: {'lambda_l1': 0.0881953543056517, 'lambda_l2': 0.08199007377568482, 'num_leaves': 184, 'feature_fraction': 0.9500061484042984, 'bagging_fraction': 0.45296143630004787, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:36,564][0m Trial 27 finished with value: -0.2078513413137831 and parameters: {'lambda_l1': 0.9697225597522064, 'lambda_l2': 0.008329856698765022, 'num_leaves': 145, 'feature_fraction': 0.9615887481090027, 'bagging_fraction': 0.5235073660605859, 'bagging_freq': 2, 'min_child_samples': 55}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:36,972][0m Trial 28 finished with value: -0.20182917548063997 and parameters: {'lambda_l1': 1.6111685795935373, 'lambda_l2': 0.9554122918827209, 'num_leaves': 199, 'feature_fraction': 0.8945975127672634, 'bagging_fraction': 0.5652186687300986, 'bagging_freq': 4, 'min_child_samples': 93}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:37,419][0m Trial 29 finished with value: -0.2089618213235705 and parameters: {'lambda_l1': 0.05324516791019398, 'lambda_l2': 0.00033700180691799403, 'num_leaves': 235, 'feature_fraction': 0.8047557870906706, 'bagging_fraction': 0.5715179073619636, 'bagging_freq': 3, 'min_child_samples': 79}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:37,814][0m Trial 30 finished with value: -0.1952216245775799 and parameters: {'lambda_l1': 9.417797296162114, 'lambda_l2': 0.09437864062297259, 'num_leaves': 152, 'feature_fraction': 0.9580999263015163, 'bagging_fraction': 0.4560594600794548, 'bagging_freq': 4, 'min_child_samples': 33}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:38,147][0m Trial 31 finished with value: -0.19209825782886517 and parameters: {'lambda_l1': 9.111136988940782, 'lambda_l2': 0.00039600389408003345, 'num_leaves': 235, 'feature_fraction': 0.8590395851910395, 'bagging_fraction': 0.4380428341115302, 'bagging_freq': 3, 'min_child_samples': 69}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:38,514][0m Trial 32 finished with value: -0.20209090748801045 and parameters: {'lambda_l1': 1.4904877754046673, 'lambda_l2': 0.006965238103350982, 'num_leaves': 214, 'feature_fraction': 0.8983937396142329, 'bagging_fraction': 0.4301937369016481, 'bagging_freq': 2, 'min_child_samples': 82}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:38,963][0m Trial 33 finished with value: -0.2073223420872627 and parameters: {'lambda_l1': 0.3588847400083465, 'lambda_l2': 0.0006028243002643421, 'num_leaves': 239, 'feature_fraction': 0.8624026693763328, 'bagging_fraction': 0.506203806581514, 'bagging_freq': 1, 'min_child_samples': 69}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:39,349][0m Trial 34 finished with value: -0.2016380645050492 and parameters: {'lambda_l1': 1.792429975142707, 'lambda_l2': 0.00011254269480505973, 'num_leaves': 220, 'feature_fraction': 0.9706682331815931, 'bagging_fraction': 0.4734463909187791, 'bagging_freq': 3, 'min_child_samples': 92}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:39,781][0m Trial 35 finished with value: -0.20407754092370797 and parameters: {'lambda_l1': 2.1267458248824, 'lambda_l2': 0.8841316993134207, 'num_leaves': 206, 'feature_fraction': 0.9362756921423633, 'bagging_fraction': 0.41348043060421663, 'bagging_freq': 2, 'min_child_samples': 57}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:40,273][0m Trial 36 finished with value: -0.20990242695281935 and parameters: {'lambda_l1': 0.004381806090601416, 'lambda_l2': 6.825937726424514e-05, 'num_leaves': 243, 'feature_fraction': 0.9120151254431901, 'bagging_fraction': 0.5337076653582257, 'bagging_freq': 4, 'min_child_samples': 84}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:40,659][0m Trial 37 finished with value: -0.19816731653452752 and parameters: {'lambda_l1': 4.213364360828258, 'lambda_l2': 0.030080492728356483, 'num_leaves': 188, 'feature_fraction': 0.7805288122249586, 'bagging_fraction': 0.4321458838105015, 'bagging_freq': 1, 'min_child_samples': 75}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:41,021][0m Trial 38 finished with value: -0.20270507476716415 and parameters: {'lambda_l1': 0.6275379945432598, 'lambda_l2': 0.0023904395491504433, 'num_leaves': 165, 'feature_fraction': 0.8458077960098196, 'bagging_fraction': 0.4826293639166706, 'bagging_freq': 4, 'min_child_samples': 92}. Best is trial 16 with value: -0.19131107681185258.[0m




[32m[I 2023-03-15 16:57:41,350][0m Trial 39 finished with value: -0.1907555870778161 and parameters: {'lambda_l1': 9.548516056151458, 'lambda_l2': 0.010006014143430782, 'num_leaves': 256, 'feature_fraction': 0.7433310061416047, 'bagging_fraction': 0.45082094637281567, 'bagging_freq': 3, 'min_child_samples': 67}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:41,779][0m Trial 40 finished with value: -0.21188784870758903 and parameters: {'lambda_l1': 0.10704740298901533, 'lambda_l2': 0.0007485853916447481, 'num_leaves': 222, 'feature_fraction': 0.7460367439822951, 'bagging_fraction': 0.4055850950801338, 'bagging_freq': 5, 'min_child_samples': 49}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:42,109][0m Trial 41 finished with value: -0.19182192756353164 and parameters: {'lambda_l1': 9.91759416517185, 'lambda_l2': 0.011033346147605549, 'num_leaves': 256, 'feature_fraction': 0.6681960187376804, 'bagging_fraction': 0.45540282765467954, 'bagging_freq': 3, 'min_child_samples': 67}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:42,499][0m Trial 42 finished with value: -0.20227047453845876 and parameters: {'lambda_l1': 3.4083859228000404, 'lambda_l2': 0.012106998512777169, 'num_leaves': 255, 'feature_fraction': 0.6591538851376385, 'bagging_fraction': 0.5026628190526539, 'bagging_freq': 3, 'min_child_samples': 66}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:42,904][0m Trial 43 finished with value: -0.2083802778625184 and parameters: {'lambda_l1': 0.5583259794858384, 'lambda_l2': 0.0036184762032264854, 'num_leaves': 246, 'feature_fraction': 0.6565611725508469, 'bagging_fraction': 0.4421977825301217, 'bagging_freq': 3, 'min_child_samples': 58}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:43,290][0m Trial 44 finished with value: -0.2002895415312964 and parameters: {'lambda_l1': 3.8805248537229606, 'lambda_l2': 0.00142973094852007, 'num_leaves': 228, 'feature_fraction': 0.6926755586991038, 'bagging_fraction': 0.4309666487049852, 'bagging_freq': 3, 'min_child_samples': 61}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:43,665][0m Trial 45 finished with value: -0.20949489831738957 and parameters: {'lambda_l1': 0.2581587182427255, 'lambda_l2': 0.04735589442410063, 'num_leaves': 256, 'feature_fraction': 0.6125915721057091, 'bagging_fraction': 0.47289753558716036, 'bagging_freq': 4, 'min_child_samples': 74}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:44,063][0m Trial 46 finished with value: -0.2028830339221786 and parameters: {'lambda_l1': 2.0415768704112445, 'lambda_l2': 0.01521493595498666, 'num_leaves': 90, 'feature_fraction': 0.7416457279824336, 'bagging_fraction': 0.5082013330761542, 'bagging_freq': 5, 'min_child_samples': 67}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:44,837][0m Trial 47 finished with value: -0.19857361152158465 and parameters: {'lambda_l1': 5.0329956160976765, 'lambda_l2': 0.0002597525951838057, 'num_leaves': 210, 'feature_fraction': 0.7206590381900313, 'bagging_fraction': 0.5559652228054841, 'bagging_freq': 3, 'min_child_samples': 96}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:45,194][0m Trial 48 finished with value: -0.19368840722946384 and parameters: {'lambda_l1': 9.356082086225596, 'lambda_l2': 0.005491300781115999, 'num_leaves': 246, 'feature_fraction': 0.820283417232682, 'bagging_fraction': 0.4015607437322786, 'bagging_freq': 6, 'min_child_samples': 51}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:46,250][0m Trial 49 finished with value: -0.23126175905589935 and parameters: {'lambda_l1': 0.7261569143765347, 'lambda_l2': 0.0208724566511487, 'num_leaves': 194, 'feature_fraction': 0.7648610966615423, 'bagging_fraction': 0.5896264764485535, 'bagging_freq': 2, 'min_child_samples': 14}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:46,652][0m Trial 50 finished with value: -0.2002786081859716 and parameters: {'lambda_l1': 2.9687746810969795, 'lambda_l2': 0.060432955262728016, 'num_leaves': 228, 'feature_fraction': 0.8396003071933273, 'bagging_fraction': 0.5215749557139573, 'bagging_freq': 7, 'min_child_samples': 77}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:47,012][0m Trial 51 finished with value: -0.19302123704327873 and parameters: {'lambda_l1': 8.580986307116083, 'lambda_l2': 0.13194106217837365, 'num_leaves': 241, 'feature_fraction': 0.9758789358216442, 'bagging_fraction': 0.4601147045533004, 'bagging_freq': 2, 'min_child_samples': 82}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:47,411][0m Trial 52 finished with value: -0.2008688909023027 and parameters: {'lambda_l1': 4.155673957510298, 'lambda_l2': 0.02487395295926857, 'num_leaves': 255, 'feature_fraction': 0.9071085833350554, 'bagging_fraction': 0.4467291751773264, 'bagging_freq': 3, 'min_child_samples': 73}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:47,800][0m Trial 53 finished with value: -0.2015622010604831 and parameters: {'lambda_l1': 1.1119911366736435, 'lambda_l2': 0.001822204462016769, 'num_leaves': 233, 'feature_fraction': 0.8631180536410692, 'bagging_fraction': 0.49165750483207477, 'bagging_freq': 2, 'min_child_samples': 90}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:48,159][0m Trial 54 finished with value: -0.1912362994832386 and parameters: {'lambda_l1': 9.9065570809858, 'lambda_l2': 0.06517302440539165, 'num_leaves': 214, 'feature_fraction': 0.9155329084937504, 'bagging_fraction': 0.42157056463847414, 'bagging_freq': 1, 'min_child_samples': 66}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:48,595][0m Trial 55 finished with value: -0.20634500398039343 and parameters: {'lambda_l1': 0.29910712310680326, 'lambda_l2': 0.006154265389519576, 'num_leaves': 215, 'feature_fraction': 0.8849961529658691, 'bagging_fraction': 0.42640632686782, 'bagging_freq': 1, 'min_child_samples': 65}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:49,103][0m Trial 56 finished with value: -0.2100192463055241 and parameters: {'lambda_l1': 2.3837679243410843, 'lambda_l2': 0.05078933457531394, 'num_leaves': 246, 'feature_fraction': 0.9190457698504915, 'bagging_fraction': 0.40010717505382204, 'bagging_freq': 1, 'min_child_samples': 39}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:49,555][0m Trial 57 finished with value: -0.20766709015974705 and parameters: {'lambda_l1': 0.8720814969805655, 'lambda_l2': 0.012717773476669199, 'num_leaves': 36, 'feature_fraction': 0.9808673646565168, 'bagging_fraction': 0.42349562632220983, 'bagging_freq': 1, 'min_child_samples': 60}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:49,937][0m Trial 58 finished with value: -0.20176195198148733 and parameters: {'lambda_l1': 4.809075225860452, 'lambda_l2': 0.0033559327509007243, 'num_leaves': 169, 'feature_fraction': 0.8007832084930349, 'bagging_fraction': 0.4653312882631791, 'bagging_freq': 3, 'min_child_samples': 64}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:50,432][0m Trial 59 finished with value: -0.21300503163026896 and parameters: {'lambda_l1': 0.19190136667076207, 'lambda_l2': 0.4619301480490317, 'num_leaves': 136, 'feature_fraction': 0.8759214861548351, 'bagging_fraction': 0.49112051097099774, 'bagging_freq': 4, 'min_child_samples': 54}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:50,885][0m Trial 60 finished with value: -0.19955504776806693 and parameters: {'lambda_l1': 5.684150708357394, 'lambda_l2': 0.0011904554452495928, 'num_leaves': 228, 'feature_fraction': 0.9212018410676198, 'bagging_fraction': 0.4409642649665528, 'bagging_freq': 2, 'min_child_samples': 46}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:51,281][0m Trial 61 finished with value: -0.19390334349191876 and parameters: {'lambda_l1': 8.237829978476462, 'lambda_l2': 0.2033723053243313, 'num_leaves': 246, 'feature_fraction': 0.9507950173939611, 'bagging_fraction': 0.4618073715731206, 'bagging_freq': 2, 'min_child_samples': 70}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:51,686][0m Trial 62 finished with value: -0.19793766711843902 and parameters: {'lambda_l1': 2.8174557375529212, 'lambda_l2': 0.09736447843068852, 'num_leaves': 206, 'feature_fraction': 0.946359064031227, 'bagging_fraction': 0.4174261434979364, 'bagging_freq': 1, 'min_child_samples': 85}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:52,037][0m Trial 63 finished with value: -0.19350597748298443 and parameters: {'lambda_l1': 8.827309885923315, 'lambda_l2': 0.028131127188988397, 'num_leaves': 237, 'feature_fraction': 0.9942646358390899, 'bagging_fraction': 0.4533220811667805, 'bagging_freq': 3, 'min_child_samples': 80}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:52,454][0m Trial 64 finished with value: -0.2029368453209827 and parameters: {'lambda_l1': 1.0200927481072266, 'lambda_l2': 0.15511621220184207, 'num_leaves': 217, 'feature_fraction': 0.9052752917577199, 'bagging_fraction': 0.476156188169419, 'bagging_freq': 2, 'min_child_samples': 77}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:52,919][0m Trial 65 finished with value: -0.2046228137576715 and parameters: {'lambda_l1': 1.8747687969066316, 'lambda_l2': 0.00995828509303122, 'num_leaves': 231, 'feature_fraction': 0.9625127615927508, 'bagging_fraction': 0.5419531317190915, 'bagging_freq': 1, 'min_child_samples': 73}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:53,316][0m Trial 66 finished with value: -0.19830578689013742 and parameters: {'lambda_l1': 4.024057181602222, 'lambda_l2': 0.054064198588710644, 'num_leaves': 248, 'feature_fraction': 0.9314130875034513, 'bagging_fraction': 0.5084267696121846, 'bagging_freq': 2, 'min_child_samples': 90}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:53,677][0m Trial 67 finished with value: -0.19857183365451553 and parameters: {'lambda_l1': 0.46742669116267244, 'lambda_l2': 4.819110790781924, 'num_leaves': 256, 'feature_fraction': 0.8843878080187839, 'bagging_fraction': 0.4495111090771696, 'bagging_freq': 3, 'min_child_samples': 95}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:54,118][0m Trial 68 finished with value: -0.2063739037869492 and parameters: {'lambda_l1': 1.2938919869410697, 'lambda_l2': 0.004536821575373205, 'num_leaves': 194, 'feature_fraction': 0.9376733647117506, 'bagging_fraction': 0.47674860345615167, 'bagging_freq': 4, 'min_child_samples': 67}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:54,475][0m Trial 69 finished with value: -0.19209684680537972 and parameters: {'lambda_l1': 5.64796244488645, 'lambda_l2': 0.30164259648640696, 'num_leaves': 239, 'feature_fraction': 0.8459446062247403, 'bagging_fraction': 0.4156116542693878, 'bagging_freq': 1, 'min_child_samples': 97}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:55,178][0m Trial 70 finished with value: -0.19306329961058039 and parameters: {'lambda_l1': 4.98290291741559, 'lambda_l2': 1.2799343367039238, 'num_leaves': 241, 'feature_fraction': 0.8401437436672297, 'bagging_fraction': 0.42070799707149326, 'bagging_freq': 1, 'min_child_samples': 97}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:56,360][0m Trial 71 finished with value: -0.19125931211168082 and parameters: {'lambda_l1': 8.737644114790662, 'lambda_l2': 0.296644071064278, 'num_leaves': 223, 'feature_fraction': 0.865633799392988, 'bagging_fraction': 0.43642705832665823, 'bagging_freq': 1, 'min_child_samples': 99}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:56,714][0m Trial 72 finished with value: -0.19499664270048656 and parameters: {'lambda_l1': 2.3444074426410983, 'lambda_l2': 0.29843076153652126, 'num_leaves': 223, 'feature_fraction': 0.8568757805208489, 'bagging_fraction': 0.4151728542065856, 'bagging_freq': 1, 'min_child_samples': 98}. Best is trial 39 with value: -0.1907555870778161.[0m




[32m[I 2023-03-15 16:57:57,039][0m Trial 73 finished with value: -0.19055111682649534 and parameters: {'lambda_l1': 9.853135492917422, 'lambda_l2': 0.4613885100467715, 'num_leaves': 211, 'feature_fraction': 0.8170615885270316, 'bagging_fraction': 0.43751923755289196, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 73 with value: -0.19055111682649534.[0m




[32m[I 2023-03-15 16:57:57,393][0m Trial 74 finished with value: -0.19387826417883716 and parameters: {'lambda_l1': 5.620539459629538, 'lambda_l2': 0.5635270824175053, 'num_leaves': 209, 'feature_fraction': 0.8315537369255538, 'bagging_fraction': 0.43685894672485315, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 73 with value: -0.19055111682649534.[0m




[32m[I 2023-03-15 16:57:57,746][0m Trial 75 finished with value: -0.1965049886710731 and parameters: {'lambda_l1': 2.2158580843389415, 'lambda_l2': 0.24281150922093464, 'num_leaves': 201, 'feature_fraction': 0.8198059305192205, 'bagging_fraction': 0.40037931769585616, 'bagging_freq': 1, 'min_child_samples': 94}. Best is trial 73 with value: -0.19055111682649534.[0m




[32m[I 2023-03-15 16:57:58,146][0m Trial 76 finished with value: -0.19904807849778702 and parameters: {'lambda_l1': 1.2600427298533472, 'lambda_l2': 1.3945435006518454, 'num_leaves': 224, 'feature_fraction': 0.8740177067272918, 'bagging_fraction': 0.49018012370322334, 'bagging_freq': 1, 'min_child_samples': 98}. Best is trial 73 with value: -0.19055111682649534.[0m




[32m[I 2023-03-15 16:57:58,531][0m Trial 77 finished with value: -0.19604261693375188 and parameters: {'lambda_l1': 5.307929294644459, 'lambda_l2': 0.08488210526694212, 'num_leaves': 188, 'feature_fraction': 0.8999538071455899, 'bagging_fraction': 0.4641183557826677, 'bagging_freq': 1, 'min_child_samples': 89}. Best is trial 73 with value: -0.19055111682649534.[0m




[32m[I 2023-03-15 16:57:58,870][0m Trial 78 finished with value: -0.18953205159658548 and parameters: {'lambda_l1': 9.61606265850398, 'lambda_l2': 0.6042813621697077, 'num_leaves': 214, 'feature_fraction': 0.8068497288474045, 'bagging_fraction': 0.420100546408946, 'bagging_freq': 1, 'min_child_samples': 92}. Best is trial 78 with value: -0.18953205159658548.[0m




[32m[I 2023-03-15 16:57:59,863][0m Trial 79 finished with value: -0.2091725787773268 and parameters: {'lambda_l1': 3.019164927611933, 'lambda_l2': 2.289009906283259, 'num_leaves': 179, 'feature_fraction': 0.7852696164216965, 'bagging_fraction': 0.43306012668834637, 'bagging_freq': 1, 'min_child_samples': 5}. Best is trial 78 with value: -0.18953205159658548.[0m




[32m[I 2023-03-15 16:58:00,253][0m Trial 80 finished with value: -0.20125537359552528 and parameters: {'lambda_l1': 0.7204782117408691, 'lambda_l2': 0.6390149931931861, 'num_leaves': 212, 'feature_fraction': 0.8064685132147126, 'bagging_fraction': 0.4485277020385231, 'bagging_freq': 1, 'min_child_samples': 87}. Best is trial 78 with value: -0.18953205159658548.[0m




[32m[I 2023-03-15 16:58:00,591][0m Trial 81 finished with value: -0.18982564901278512 and parameters: {'lambda_l1': 9.103727317492393, 'lambda_l2': 0.3566701125607326, 'num_leaves': 251, 'feature_fraction': 0.8502365625459883, 'bagging_fraction': 0.4161320599647199, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 78 with value: -0.18953205159658548.[0m




[32m[I 2023-03-15 16:58:00,922][0m Trial 82 finished with value: -0.1895162376409987 and parameters: {'lambda_l1': 9.569143681164565, 'lambda_l2': 0.560626500259768, 'num_leaves': 248, 'feature_fraction': 0.7689195539694401, 'bagging_fraction': 0.42207731297991447, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 82 with value: -0.1895162376409987.[0m




[32m[I 2023-03-15 16:58:01,253][0m Trial 83 finished with value: -0.18939667939421484 and parameters: {'lambda_l1': 8.772994195299553, 'lambda_l2': 0.49236268943892314, 'num_leaves': 248, 'feature_fraction': 0.7668271563501782, 'bagging_fraction': 0.4164242117438031, 'bagging_freq': 1, 'min_child_samples': 92}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:01,610][0m Trial 84 finished with value: -0.19402793321306172 and parameters: {'lambda_l1': 3.019217170251332, 'lambda_l2': 0.45354631580381494, 'num_leaves': 250, 'feature_fraction': 0.7605632642463609, 'bagging_fraction': 0.418592787972812, 'bagging_freq': 1, 'min_child_samples': 96}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:01,955][0m Trial 85 finished with value: -0.1916918082716183 and parameters: {'lambda_l1': 6.114538953453517, 'lambda_l2': 0.8569176419315437, 'num_leaves': 228, 'feature_fraction': 0.7711457590475095, 'bagging_fraction': 0.41331592221455243, 'bagging_freq': 1, 'min_child_samples': 92}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:02,317][0m Trial 86 finished with value: -0.1954871371842962 and parameters: {'lambda_l1': 1.6864967223211993, 'lambda_l2': 1.5593108488468785, 'num_leaves': 219, 'feature_fraction': 0.8133072001061202, 'bagging_fraction': 0.4300687392438679, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:02,692][0m Trial 87 finished with value: -0.19590442793939483 and parameters: {'lambda_l1': 3.1329665571578236, 'lambda_l2': 3.5205763062862223, 'num_leaves': 112, 'feature_fraction': 0.7972883228623463, 'bagging_fraction': 0.4689243612825433, 'bagging_freq': 1, 'min_child_samples': 93}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:03,021][0m Trial 88 finished with value: -0.19209809090373078 and parameters: {'lambda_l1': 9.444655446406742, 'lambda_l2': 0.15104254763900984, 'num_leaves': 199, 'feature_fraction': 0.7837742818522682, 'bagging_fraction': 0.44289667350676265, 'bagging_freq': 2, 'min_child_samples': 84}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:03,386][0m Trial 89 finished with value: -0.19836548216637162 and parameters: {'lambda_l1': 1.341690012836082, 'lambda_l2': 0.3617396318323925, 'num_leaves': 242, 'feature_fraction': 0.8282991522418448, 'bagging_fraction': 0.4019718922230214, 'bagging_freq': 1, 'min_child_samples': 91}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:03,757][0m Trial 90 finished with value: -0.1995506708419265 and parameters: {'lambda_l1': 0.45282182889098244, 'lambda_l2': 0.9229865610591786, 'num_leaves': 233, 'feature_fraction': 0.8161821806358068, 'bagging_fraction': 0.42790303838914756, 'bagging_freq': 1, 'min_child_samples': 95}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:04,096][0m Trial 91 finished with value: -0.19063022547601433 and parameters: {'lambda_l1': 5.91429455330934, 'lambda_l2': 0.640787541424157, 'num_leaves': 224, 'feature_fraction': 0.7745015349855007, 'bagging_fraction': 0.41041819232796306, 'bagging_freq': 1, 'min_child_samples': 93}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:04,443][0m Trial 92 finished with value: -0.19187433548184427 and parameters: {'lambda_l1': 6.36965670966335, 'lambda_l2': 0.22228641647935, 'num_leaves': 217, 'feature_fraction': 0.7732894010390129, 'bagging_fraction': 0.4154468345872786, 'bagging_freq': 1, 'min_child_samples': 87}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:04,798][0m Trial 93 finished with value: -0.1938354910259114 and parameters: {'lambda_l1': 5.972419630903758, 'lambda_l2': 0.6258119774791719, 'num_leaves': 251, 'feature_fraction': 0.7562172195669378, 'bagging_fraction': 0.44290682257761615, 'bagging_freq': 1, 'min_child_samples': 94}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:05,171][0m Trial 94 finished with value: -0.19936776170282755 and parameters: {'lambda_l1': 3.548483308670421, 'lambda_l2': 1.719380721530681, 'num_leaves': 208, 'feature_fraction': 0.7325503282104333, 'bagging_fraction': 0.48550879285846116, 'bagging_freq': 2, 'min_child_samples': 99}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:05,556][0m Trial 95 finished with value: -0.20017869711395292 and parameters: {'lambda_l1': 2.1754600513799742, 'lambda_l2': 0.363749529105231, 'num_leaves': 225, 'feature_fraction': 0.7985331536211674, 'bagging_fraction': 0.45900401780165384, 'bagging_freq': 1, 'min_child_samples': 89}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:05,890][0m Trial 96 finished with value: -0.1899806718032109 and parameters: {'lambda_l1': 9.195311845844927, 'lambda_l2': 0.10014295662338933, 'num_leaves': 232, 'feature_fraction': 0.7564249130435227, 'bagging_fraction': 0.4332718200931663, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:06,565][0m Trial 97 finished with value: -0.19346819145725558 and parameters: {'lambda_l1': 9.92599048884988, 'lambda_l2': 0.12237617316234474, 'num_leaves': 237, 'feature_fraction': 0.7548414845229086, 'bagging_fraction': 0.4102814081786128, 'bagging_freq': 1, 'min_child_samples': 26}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:07,059][0m Trial 98 finished with value: -0.193187200782391 and parameters: {'lambda_l1': 4.4839626524510585, 'lambda_l2': 0.07378221532309362, 'num_leaves': 230, 'feature_fraction': 0.7743991427834319, 'bagging_fraction': 0.4285755647436565, 'bagging_freq': 1, 'min_child_samples': 97}. Best is trial 83 with value: -0.18939667939421484.[0m




[32m[I 2023-03-15 16:58:07,433][0m Trial 99 finished with value: -0.19994934996383718 and parameters: {'lambda_l1': 0.8286486325982612, 'lambda_l2': 0.2064788540182722, 'num_leaves': 249, 'feature_fraction': 0.742845009165699, 'bagging_fraction': 0.43621999385040044, 'bagging_freq': 1, 'min_child_samples': 100}. Best is trial 83 with value: -0.18939667939421484.[0m




{'lambda_l1': 8.772994195299553,
 'lambda_l2': 0.49236268943892314,
 'num_leaves': 248,
 'feature_fraction': 0.7668271563501782,
 'bagging_fraction': 0.4164242117438031,
 'bagging_freq': 1,
 'min_child_samples': 92}

In [9]:
model = train_optimized_model(study, X_train, y_train)
preds = model.predict_proba(X_valid)[:,1]
print("Brier Score:", brier_score_loss(y_valid, preds))
print("ROC AUC:", roc_auc_score(y_valid, preds))
print("Parameters")
print(*(f"- {key}: {value}" for key, value in model.get_params(deep=True).items()), sep="\n")
print("Features")
print(*(f"- {name}: {imp}" for name, imp in sorted(zip(model.feature_name_, model.feature_importances_), key=lambda x: x[1], reverse=True)), sep="\n")


Brier Score: 0.1567189348300844
ROC AUC: 0.8659336419753085
Parameters
- boosting_type: gbdt
- class_weight: None
- colsample_bytree: 1.0
- importance_type: split
- learning_rate: 0.1
- max_depth: -1
- min_child_samples: 92
- min_child_weight: 0.001
- min_split_gain: 0.0
- n_estimators: 100
- n_jobs: -1
- num_leaves: 248
- objective: binary
- random_state: None
- reg_alpha: 0.0
- reg_lambda: 0.0
- silent: warn
- subsample: 1.0
- subsample_for_bin: 200000
- subsample_freq: 0
- metric: l2
- verbosity: -1
- lambda_l1: 8.772994195299553
- lambda_l2: 0.49236268943892314
- feature_fraction: 0.7668271563501782
- bagging_fraction: 0.4164242117438031
- bagging_freq: 1
Features
- SeedDiff: 32
- OrdinalRankDiff: 21
- FGMRegDiff: 15
- ScoreTouDiff: 12
- TOTouDiff: 12
- OROppTouDiff: 12
- ORRegDiff: 8
- AstTouDiff: 8
- FTPOppTouDiff: 8
- FGA3RegDiff: 7
- FGAOppRegDiff: 7
- OROppRegDiff: 7
- DROppRegDiff: 7
- FGP3OppRegDiff: 7
- FGAOppTouDiff: 7
- ScoreRegDiff: 6
- ScoreOppRegDiff: 6
- AstRegDiff: 6

# Submit to the competition

We"ll begin by using the trained model to generate predictions, which we"ll save to a CSV file.

In [10]:
# Use the model to generate predictions
model.fit(X, y)
predictions = model.predict_proba(X_submission)[:,1]

# Save the predictions to a CSV file
output = pd.DataFrame({"ID": X_submission.index,
                       "Pred": predictions})
output.to_csv("submission.csv", index=False)
print(output.shape)
output.describe()

(614319, 2)


Unnamed: 0,Pred
count,614319.0
mean,0.440945
std,0.023096
min,0.055278
25%,0.440439
50%,0.440439
75%,0.440439
max,0.946742
