# Imports and Utilities

In [1]:
from typing import List, Tuple
import numpy as np
import pandas as pd
from IPython.display import Markdown
import optuna
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, brier_score_loss
from sklearn.model_selection import cross_val_score

COMPETITION_NAME = "march-machine-learning-mania-2023"
VERBOSE = False

pd.set_option("display.max_columns", None)

def show_df(df: pd.DataFrame, name: str = "DataFrame", verbose: bool = VERBOSE):
    if verbose:
        display(Markdown(f"## {name}"))
        display(Markdown("### Head"))
        display(df.head())
        display(Markdown("### Tail"))
        display(df.tail())
        display(Markdown("### Description"))
        display(df.describe())


def extract_data(filename: str, competition_name:str = COMPETITION_NAME) -> pd.DataFrame:
    mens_filepath = f"/kaggle/input/{competition_name}/M{filename}.csv"
    womens_filepath = f"/kaggle/input/{competition_name}/W{filename}.csv"
    try:
        df_mens = pd.read_csv(mens_filepath)
    except FileNotFoundError:
        df_mens = None
    try:
        df_womens = pd.read_csv(womens_filepath)
    except FileNotFoundError:
        df_womens = None
    df = pd.concat([df_mens, df_womens])
    show_df(df, filename)
    return df


def get_team_features(detailed_results: pd.DataFrame) -> pd.DataFrame:
    df = detailed_results.copy()
    df = clean_detailed_results(df)
    df = transform_game_to_team(df)
    df = transform_team_results(df)
    show_df(df)
    return df


def get_seed_features(df_in: pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["Season"] > 2002
    df = df[mask]
    df["Seed"] = df["Seed"].str.replace(r"\D+","", regex=True)
    df["Seed"] = df["Seed"].astype(int)
    show_df(df)
    return df


def get_ranking_features(df_in:pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["RankingDayNum"] == df["RankingDayNum"].max()
    df = df[mask]
    df.drop(["SystemName", "RankingDayNum"], axis=1, inplace=True)
    df = df.groupby(["Season", "TeamID"]).agg("median")
    df = df.reset_index()
    show_df(df)
    return df


def get_game_outcomes(df):
    input_rows = df.to_records()
    output_rows = []
    for input_row in input_rows:
        output_rows.extend(parse_row(input_row))
    out_df = pd.DataFrame(output_rows)
    return out_df

def parse_row(row):
    season = row['Season']
    winning_team_id = row['WTeamID']
    losing_team_id = row['LTeamID']
    if winning_team_id < losing_team_id:
        small_id = winning_team_id
        big_id = losing_team_id
        outcome = True
    elif losing_team_id < winning_team_id:
        small_id = losing_team_id
        big_id = winning_team_id
        outcome = False
    records = [
        {
            "ID": f"{season}_{small_id}_{big_id}",
            'Season': season,
            'LowID': small_id,
            'HighID': big_id,
            'Win': outcome
        },
        {
            "ID": f"{season}_{big_id}_{small_id}",
            'Season': season,
            'LowID': big_id,
            'HighID': small_id,
            'Win': not outcome
        },
    ]
    return records


def clean_detailed_results(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(["WLoc", "DayNum"], axis=1)


def transform_game_to_team(game_results: pd.DataFrame) -> pd.DataFrame:
    winners = rename_columns(game_results, "W")
    loosers = rename_columns(game_results, "L")
    team_results = pd.concat((winners, loosers))
    team_results.drop(["TeamIDOpp"], axis=1, inplace=True)
    return team_results


def transform_team_results(df: pd.DataFrame) -> pd.DataFrame:
    df = df.groupby(["Season", "TeamID"]).median()
    df["FGP"] =  df["FGM"] / df["FGA"]
    df["FGP3"] =  df["FGM3"] / df["FGA3"]
    df["FTP"] =  df["FTM"] / df["FTA"]
    df["FGPOpp"] =  df["FGMOpp"] / df["FGAOpp"]
    df["FGP3Opp"] =  df["FGM3Opp"] / df["FGA3Opp"]
    df["FTPOpp"] =  df["FTMOpp"] / df["FTAOpp"]
    return df.reset_index()
    
    
def rename_columns(df: pd.DataFrame, team_prefix: str) -> pd.DataFrame:
    df = df.copy()
    df.columns =  (rename_column(column_name, team_prefix) for column_name in df.columns)
    return df


def rename_column(column_name: str, team_prefix: str) -> pd.DataFrame:
    if team_prefix == "W":
        opponent_prefix = "L"
    elif team_prefix == "L":
        opponent_prefix = "W"
    else:
        raise ValueError
    if column_name.startswith(team_prefix):
        column_name = column_name.lstrip(team_prefix)
    elif column_name.startswith(opponent_prefix):
        column_name = f"{column_name.lstrip(opponent_prefix)}Opp"
    return column_name


def split_winner_and_looser_columns(df: pd.DataFrame) -> Tuple[List[str], List[str]]:
    winner_columns = [name for name in df.columns if not name.startswith("L")]
    looser_columns = [name for name in df.columns if not name.startswith("W")]
    return winner_columns, looser_columns


def merge_features(
    season_features: pd.DataFrame, 
    tournament_features: pd.DataFrame, 
    seed_features: pd.DataFrame, 
    ranking_features: pd.DataFrame
) -> pd.DataFrame:
    features = pd.merge(
        season_features,
        tournament_features,
        how="inner",
        on=["Season", "TeamID"],
        suffixes=("Reg", "Tou")
    )
    features = features.merge(
        seed_features,
        how="inner",
        on=["Season", "TeamID"]
    )
    features = features.merge(
        ranking_features,
        how="left",
        on=["Season", "TeamID"],
    )
    show_df(features)
    return features


def merge_outcomes_with_features(outcomes: pd.DataFrame, features: pd.DataFrame, how: str = "inner") -> pd.DataFrame:
    feature_names = [name for name in features.columns if name not in ["Season", "TeamID", "Gender"]]
    data = pd.merge(
        outcomes, 
        features, 
        how=how, 
        left_on=["Season", "HighID"], 
        right_on=["Season", "TeamID"]
    )
    data = pd.merge(
        data, 
        features, 
        how=how, 
        left_on=["Season", "LowID"], 
        right_on=["Season", "TeamID"],
        suffixes=("High", "Low")
    )
    for name in feature_names:
        data[f"{name}Diff"] = data[f"{name}High"] - data[f"{name}Low"]
        data.drop([f"{name}High", f"{name}Low"], axis=1, inplace=True)
    data.drop(
        ["Season", "HighID", "LowID","TeamIDHigh","TeamIDLow"], 
        axis=1, 
        inplace=True
    )
    data.set_index("ID", inplace=True)
    show_df(data)
    return data


def get_submission_outcomes() -> pd.DataFrame:
    sample_submission = pd.read_csv(f"/kaggle/input/{COMPETITION_NAME}/SampleSubmission2023.csv")
    df = sample_submission.copy()
    df.drop("Pred", axis=1, inplace=True)
    df[["Season", "LowID", "HighID"]] = df["ID"].str.split("_", expand=True)
    df[["Season", "LowID", "HighID"]] = df[["Season", "LowID", "HighID"]].astype(int)
    show_df(df)
    return df


def objective(trial: optuna.Trial, X_train, y_train):
    params = {
        "objective": "binary",
        "metric": "l2",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }
    model = lgb.LGBMClassifier(**params)
    score = np.mean(cross_val_score(model, X_train, y_train, scoring="neg_brier_score", cv=5))
    return score


def run_study(X_train, y_train):
    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)
    return study


def train_optimized_model(study, X, y):
    params = study.best_params
    model = lgb.LGBMClassifier(
        objective="binary",
        metric="l2",
        verbosity=-1,
        boosting_type="gbdt",
        **params
    )
    model = model.fit(X, y)
    return model

# Load Data

In [2]:
season_results = extract_data("RegularSeasonDetailedResults")
tournament_results = extract_data("NCAATourneyDetailedResults")
seeds = extract_data("NCAATourneySeeds")
rankings = extract_data("MasseyOrdinals_thru_Season2023_Day128")

# Transform Data

In [3]:
season_team_features = get_team_features(season_results)

tournament_results["Season"] += 1
tournament_team_features = get_team_features(tournament_results)

seed_features = get_seed_features(seeds)

ranking_features = get_ranking_features(rankings)

## Merge features

In [4]:
features = merge_features(season_team_features, tournament_team_features, seed_features, ranking_features)

## Build Datasets

In [5]:
from sklearn.model_selection import train_test_split

data = extract_data("NCAATourneyCompactResults")
data_train, data_valid = train_test_split(data, random_state=0, test_size=0.1)

outcomes = get_game_outcomes(data)
outcomes_train = get_game_outcomes(data_train)
outcomes_valid = get_game_outcomes(data_valid)
outcomes_submission = get_submission_outcomes()

In [6]:
features_train = merge_outcomes_with_features(outcomes_train, features)
features_valid = merge_outcomes_with_features(outcomes_valid, features)
features_full = merge_outcomes_with_features(outcomes, features)

In [7]:
y_train = features_train["Win"]
X_train = features_train.drop("Win", axis=1)
y_valid = features_valid["Win"]
X_valid = features_valid.drop("Win", axis=1)
y = features_full["Win"]
X = features_full.drop("Win", axis=1)
X_submission = merge_outcomes_with_features(outcomes_submission, features, how="left").fillna(0)

# Step 4: Train a model


In [8]:
study = run_study(X_train, y_train)
study.best_params

[32m[I 2023-03-15 17:17:19,111][0m A new study created in memory with name: no-name-d71d87f6-93a2-4e67-a856-c5ce27b54a6c[0m




[32m[I 2023-03-15 17:17:19,761][0m Trial 0 finished with value: -0.22386461757226833 and parameters: {'lambda_l1': 1.1933746666297976e-07, 'lambda_l2': 0.04068307042421321, 'num_leaves': 55, 'feature_fraction': 0.6842094302713124, 'bagging_fraction': 0.6913795388664588, 'bagging_freq': 6, 'min_child_samples': 39}. Best is trial 0 with value: -0.22386461757226833.[0m




[32m[I 2023-03-15 17:17:20,379][0m Trial 1 finished with value: -0.21127542921520215 and parameters: {'lambda_l1': 0.001427074272378841, 'lambda_l2': 0.0017886539532516142, 'num_leaves': 209, 'feature_fraction': 0.852619972117554, 'bagging_fraction': 0.8836980185889741, 'bagging_freq': 3, 'min_child_samples': 76}. Best is trial 1 with value: -0.21127542921520215.[0m




[32m[I 2023-03-15 17:17:21,100][0m Trial 2 finished with value: -0.21783991969386993 and parameters: {'lambda_l1': 2.0301859059450872e-06, 'lambda_l2': 1.713383798391584e-07, 'num_leaves': 142, 'feature_fraction': 0.6084654205032806, 'bagging_fraction': 0.929851641616613, 'bagging_freq': 5, 'min_child_samples': 42}. Best is trial 1 with value: -0.21127542921520215.[0m




[32m[I 2023-03-15 17:17:22,026][0m Trial 3 finished with value: -0.2251374504014149 and parameters: {'lambda_l1': 0.00035141892383074903, 'lambda_l2': 0.0037256918123561716, 'num_leaves': 28, 'feature_fraction': 0.9851338008013396, 'bagging_fraction': 0.6692373828542755, 'bagging_freq': 5, 'min_child_samples': 28}. Best is trial 1 with value: -0.21127542921520215.[0m




[32m[I 2023-03-15 17:17:22,344][0m Trial 4 finished with value: -0.2046899253679389 and parameters: {'lambda_l1': 2.530174251148736e-08, 'lambda_l2': 0.42810682199273, 'num_leaves': 92, 'feature_fraction': 0.5234121719556712, 'bagging_fraction': 0.6252379454858741, 'bagging_freq': 7, 'min_child_samples': 90}. Best is trial 4 with value: -0.2046899253679389.[0m




[32m[I 2023-03-15 17:17:22,924][0m Trial 5 finished with value: -0.21694955743362748 and parameters: {'lambda_l1': 5.728563060428261e-06, 'lambda_l2': 2.456110980548844e-08, 'num_leaves': 121, 'feature_fraction': 0.4839313032265312, 'bagging_fraction': 0.81609277257476, 'bagging_freq': 6, 'min_child_samples': 42}. Best is trial 4 with value: -0.2046899253679389.[0m




[32m[I 2023-03-15 17:17:23,875][0m Trial 6 finished with value: -0.22162916734939314 and parameters: {'lambda_l1': 0.3567736042166472, 'lambda_l2': 3.888140867362035e-08, 'num_leaves': 192, 'feature_fraction': 0.8577149200376699, 'bagging_fraction': 0.4685672041680373, 'bagging_freq': 1, 'min_child_samples': 15}. Best is trial 4 with value: -0.2046899253679389.[0m




[32m[I 2023-03-15 17:17:25,079][0m Trial 7 finished with value: -0.23286023465356168 and parameters: {'lambda_l1': 0.02598372816028187, 'lambda_l2': 3.815457430327264e-06, 'num_leaves': 48, 'feature_fraction': 0.4558366084767058, 'bagging_fraction': 0.6402270314405965, 'bagging_freq': 4, 'min_child_samples': 19}. Best is trial 4 with value: -0.2046899253679389.[0m




[32m[I 2023-03-15 17:17:25,738][0m Trial 8 finished with value: -0.21712401302650922 and parameters: {'lambda_l1': 0.2678591872644849, 'lambda_l2': 1.5743170516345806e-06, 'num_leaves': 105, 'feature_fraction': 0.5985760738982793, 'bagging_fraction': 0.9089466553783834, 'bagging_freq': 3, 'min_child_samples': 51}. Best is trial 4 with value: -0.2046899253679389.[0m




[32m[I 2023-03-15 17:17:26,434][0m Trial 9 finished with value: -0.21445333838280273 and parameters: {'lambda_l1': 3.522625165675056e-05, 'lambda_l2': 0.018255249947490868, 'num_leaves': 132, 'feature_fraction': 0.8072936857147341, 'bagging_fraction': 0.9805677884135628, 'bagging_freq': 4, 'min_child_samples': 63}. Best is trial 4 with value: -0.2046899253679389.[0m




[32m[I 2023-03-15 17:17:26,730][0m Trial 10 finished with value: -0.1975698867704465 and parameters: {'lambda_l1': 1.6945964366793083e-08, 'lambda_l2': 8.446733979368778, 'num_leaves': 255, 'feature_fraction': 0.4007606331712723, 'bagging_fraction': 0.5293761915524243, 'bagging_freq': 7, 'min_child_samples': 96}. Best is trial 10 with value: -0.1975698867704465.[0m




[32m[I 2023-03-15 17:17:27,033][0m Trial 11 finished with value: -0.19788753577915238 and parameters: {'lambda_l1': 1.3142056844453394e-08, 'lambda_l2': 4.769870419780381, 'num_leaves': 255, 'feature_fraction': 0.4054511888726441, 'bagging_fraction': 0.5288935124447721, 'bagging_freq': 7, 'min_child_samples': 100}. Best is trial 10 with value: -0.1975698867704465.[0m




[32m[I 2023-03-15 17:17:27,310][0m Trial 12 finished with value: -0.19686941139140937 and parameters: {'lambda_l1': 1.5560432223883213e-08, 'lambda_l2': 7.241331296095427, 'num_leaves': 256, 'feature_fraction': 0.40243900572252467, 'bagging_fraction': 0.4724519031401982, 'bagging_freq': 7, 'min_child_samples': 99}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:27,602][0m Trial 13 finished with value: -0.19828696129277498 and parameters: {'lambda_l1': 2.4772983477081776e-07, 'lambda_l2': 9.323183182036788, 'num_leaves': 253, 'feature_fraction': 0.40193066585409076, 'bagging_fraction': 0.4321089229356331, 'bagging_freq': 7, 'min_child_samples': 77}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:27,869][0m Trial 14 finished with value: -0.19720877715943663 and parameters: {'lambda_l1': 1.1782086558548584e-08, 'lambda_l2': 0.4370281918502161, 'num_leaves': 205, 'feature_fraction': 0.5316285953961261, 'bagging_fraction': 0.4018976757173468, 'bagging_freq': 6, 'min_child_samples': 98}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:28,172][0m Trial 15 finished with value: -0.20331761980091517 and parameters: {'lambda_l1': 8.328485769411187e-07, 'lambda_l2': 0.2954356941438219, 'num_leaves': 203, 'feature_fraction': 0.5488494192029155, 'bagging_fraction': 0.4192634247243427, 'bagging_freq': 6, 'min_child_samples': 81}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:28,519][0m Trial 16 finished with value: -0.2102062582566032 and parameters: {'lambda_l1': 1.9576653671526284e-05, 'lambda_l2': 0.000213166466657143, 'num_leaves': 177, 'feature_fraction': 0.49224784268068494, 'bagging_fraction': 0.5222615556903254, 'bagging_freq': 5, 'min_child_samples': 68}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:28,849][0m Trial 17 finished with value: -0.20268031849653867 and parameters: {'lambda_l1': 1.3740719539629868e-07, 'lambda_l2': 0.41831895252528056, 'num_leaves': 223, 'feature_fraction': 0.5633879535665742, 'bagging_fraction': 0.4042492751565689, 'bagging_freq': 1, 'min_child_samples': 88}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:29,226][0m Trial 18 finished with value: -0.2081704994944412 and parameters: {'lambda_l1': 1.0053368299164735e-08, 'lambda_l2': 1.171013586928782, 'num_leaves': 168, 'feature_fraction': 0.4773416510757485, 'bagging_fraction': 0.5791251648061045, 'bagging_freq': 6, 'min_child_samples': 65}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:29,564][0m Trial 19 finished with value: -0.2017171739354581 and parameters: {'lambda_l1': 7.520336046746846e-07, 'lambda_l2': 0.08979849096044511, 'num_leaves': 229, 'feature_fraction': 0.6282965106916386, 'bagging_fraction': 0.4781858961520673, 'bagging_freq': 3, 'min_child_samples': 88}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:30,084][0m Trial 20 finished with value: -0.21128607371826794 and parameters: {'lambda_l1': 6.485904557359207e-06, 'lambda_l2': 1.6173661683761924, 'num_leaves': 160, 'feature_fraction': 0.5367613483135107, 'bagging_fraction': 0.7699346739725619, 'bagging_freq': 6, 'min_child_samples': 55}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:30,398][0m Trial 21 finished with value: -0.19704101565716442 and parameters: {'lambda_l1': 6.403700823890569e-08, 'lambda_l2': 9.191058368844912, 'num_leaves': 235, 'feature_fraction': 0.4061966685331557, 'bagging_fraction': 0.5374169367237721, 'bagging_freq': 7, 'min_child_samples': 100}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:30,686][0m Trial 22 finished with value: -0.1971439962750569 and parameters: {'lambda_l1': 8.849911650850259e-08, 'lambda_l2': 2.600374217787327, 'num_leaves': 229, 'feature_fraction': 0.44936327820887134, 'bagging_fraction': 0.47187086925208377, 'bagging_freq': 7, 'min_child_samples': 94}. Best is trial 12 with value: -0.19686941139140937.[0m




[32m[I 2023-03-15 17:17:31,017][0m Trial 23 finished with value: -0.1967653599716818 and parameters: {'lambda_l1': 2.019064964228232e-07, 'lambda_l2': 9.267070687580263, 'num_leaves': 233, 'feature_fraction': 0.44642363566908344, 'bagging_fraction': 0.5858397796893928, 'bagging_freq': 7, 'min_child_samples': 85}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:31,367][0m Trial 24 finished with value: -0.2011798268537885 and parameters: {'lambda_l1': 2.31263352634874e-07, 'lambda_l2': 2.0801704896867728, 'num_leaves': 234, 'feature_fraction': 0.4512322708636913, 'bagging_fraction': 0.5951507899550968, 'bagging_freq': 7, 'min_child_samples': 84}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:31,710][0m Trial 25 finished with value: -0.2022655194965047 and parameters: {'lambda_l1': 7.447586674388931e-08, 'lambda_l2': 8.640322329708019, 'num_leaves': 190, 'feature_fraction': 0.43120568172556384, 'bagging_fraction': 0.5694643277169064, 'bagging_freq': 5, 'min_child_samples': 73}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:32,046][0m Trial 26 finished with value: -0.20473418813518585 and parameters: {'lambda_l1': 6.106607677822751e-07, 'lambda_l2': 0.1534065432057436, 'num_leaves': 236, 'feature_fraction': 0.4952654585049495, 'bagging_fraction': 0.5563438473319826, 'bagging_freq': 2, 'min_child_samples': 100}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:32,400][0m Trial 27 finished with value: -0.20273435251416122 and parameters: {'lambda_l1': 5.353116998922746e-08, 'lambda_l2': 1.1743902492554834, 'num_leaves': 220, 'feature_fraction': 0.44105944437849204, 'bagging_fraction': 0.6262006576699296, 'bagging_freq': 7, 'min_child_samples': 83}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:32,697][0m Trial 28 finished with value: -0.20040590084464588 and parameters: {'lambda_l1': 5.130318977086485e-07, 'lambda_l2': 0.06221515667977462, 'num_leaves': 151, 'feature_fraction': 0.40213237872924396, 'bagging_fraction': 0.510747245158116, 'bagging_freq': 6, 'min_child_samples': 90}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:33,108][0m Trial 29 finished with value: -0.2066350344216243 and parameters: {'lambda_l1': 8.646355050567141e-08, 'lambda_l2': 0.026831671242033044, 'num_leaves': 183, 'feature_fraction': 0.6545394659172009, 'bagging_fraction': 0.7096298805884013, 'bagging_freq': 6, 'min_child_samples': 93}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:34,103][0m Trial 30 finished with value: -0.2099168267861909 and parameters: {'lambda_l1': 1.523903369222342e-06, 'lambda_l2': 9.723481157076506, 'num_leaves': 77, 'feature_fraction': 0.5005530903817983, 'bagging_fraction': 0.48952250216994025, 'bagging_freq': 7, 'min_child_samples': 5}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:34,401][0m Trial 31 finished with value: -0.19805764094956152 and parameters: {'lambda_l1': 5.947985614403197e-08, 'lambda_l2': 2.394225497532907, 'num_leaves': 238, 'feature_fraction': 0.4494627171425863, 'bagging_fraction': 0.4677904039240737, 'bagging_freq': 7, 'min_child_samples': 95}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:34,706][0m Trial 32 finished with value: -0.202912694162061 and parameters: {'lambda_l1': 1.1829353417644099e-07, 'lambda_l2': 1.5881289734386705, 'num_leaves': 215, 'feature_fraction': 0.45649129202944727, 'bagging_fraction': 0.44324688707457477, 'bagging_freq': 7, 'min_child_samples': 80}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:35,082][0m Trial 33 finished with value: -0.20628283410673523 and parameters: {'lambda_l1': 4.07779109216649e-08, 'lambda_l2': 2.7611405667508517, 'num_leaves': 244, 'feature_fraction': 0.56655708920154, 'bagging_fraction': 0.5471092690355959, 'bagging_freq': 6, 'min_child_samples': 72}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:35,795][0m Trial 34 finished with value: -0.20361324557416194 and parameters: {'lambda_l1': 2.2398701045512845e-07, 'lambda_l2': 0.5565588979235518, 'num_leaves': 211, 'feature_fraction': 0.42587017348277584, 'bagging_fraction': 0.5121339715570041, 'bagging_freq': 5, 'min_child_samples': 92}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:36,095][0m Trial 35 finished with value: -0.2003703248230932 and parameters: {'lambda_l1': 3.411035199121697e-08, 'lambda_l2': 0.12911125880561852, 'num_leaves': 13, 'feature_fraction': 0.4726944201683222, 'bagging_fraction': 0.466173501677125, 'bagging_freq': 7, 'min_child_samples': 85}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:36,434][0m Trial 36 finished with value: -0.20175714127321015 and parameters: {'lambda_l1': 3.594506835416433e-06, 'lambda_l2': 4.532279498144425, 'num_leaves': 202, 'feature_fraction': 0.5120656163952193, 'bagging_fraction': 0.5967364611205215, 'bagging_freq': 4, 'min_child_samples': 96}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:36,710][0m Trial 37 finished with value: -0.19921262680162977 and parameters: {'lambda_l1': 1.8755582197020176e-06, 'lambda_l2': 0.8154579093676946, 'num_leaves': 244, 'feature_fraction': 0.4357567675088276, 'bagging_fraction': 0.4397968618902991, 'bagging_freq': 7, 'min_child_samples': 100}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:37,218][0m Trial 38 finished with value: -0.2120167116170939 and parameters: {'lambda_l1': 3.409042254085176e-08, 'lambda_l2': 2.9515758606602307, 'num_leaves': 226, 'feature_fraction': 0.5125837424651403, 'bagging_fraction': 0.4970982240442908, 'bagging_freq': 6, 'min_child_samples': 30}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:37,560][0m Trial 39 finished with value: -0.21028009769096387 and parameters: {'lambda_l1': 3.273958570533436e-07, 'lambda_l2': 0.20731680999004756, 'num_leaves': 193, 'feature_fraction': 0.47492664037443866, 'bagging_fraction': 0.546489357236255, 'bagging_freq': 5, 'min_child_samples': 76}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:37,959][0m Trial 40 finished with value: -0.2084594690892727 and parameters: {'lambda_l1': 6.754845035366759e-05, 'lambda_l2': 0.015358210792252096, 'num_leaves': 245, 'feature_fraction': 0.5897734239593493, 'bagging_fraction': 0.6582151025441565, 'bagging_freq': 7, 'min_child_samples': 86}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:38,254][0m Trial 41 finished with value: -0.20167670719368602 and parameters: {'lambda_l1': 1.0275750893755609e-08, 'lambda_l2': 0.7156786060951321, 'num_leaves': 209, 'feature_fraction': 0.5296881721394843, 'bagging_fraction': 0.4492666089449122, 'bagging_freq': 6, 'min_child_samples': 95}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:38,530][0m Trial 42 finished with value: -0.202699010955935 and parameters: {'lambda_l1': 3.171983563956363e-08, 'lambda_l2': 0.35350252273899574, 'num_leaves': 226, 'feature_fraction': 0.4279207224320498, 'bagging_fraction': 0.4078138860349454, 'bagging_freq': 7, 'min_child_samples': 90}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:38,821][0m Trial 43 finished with value: -0.1980601946303134 and parameters: {'lambda_l1': 1.1888095157410004e-07, 'lambda_l2': 3.9894813945694314, 'num_leaves': 201, 'feature_fraction': 0.4702606787618221, 'bagging_fraction': 0.4944003984000828, 'bagging_freq': 6, 'min_child_samples': 100}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:39,105][0m Trial 44 finished with value: -0.19830148172035938 and parameters: {'lambda_l1': 1.859925327151472e-08, 'lambda_l2': 0.8952894229492814, 'num_leaves': 255, 'feature_fraction': 0.5239113526033548, 'bagging_fraction': 0.4017252297857611, 'bagging_freq': 7, 'min_child_samples': 96}. Best is trial 23 with value: -0.1967653599716818.[0m




[32m[I 2023-03-15 17:17:39,399][0m Trial 45 finished with value: -0.19643303487209848 and parameters: {'lambda_l1': 1.0379661443537683e-08, 'lambda_l2': 9.785714616509413, 'num_leaves': 216, 'feature_fraction': 0.4232430103666618, 'bagging_fraction': 0.4605238211853421, 'bagging_freq': 7, 'min_child_samples': 92}. Best is trial 45 with value: -0.19643303487209848.[0m




[32m[I 2023-03-15 17:17:39,754][0m Trial 46 finished with value: -0.20080369391000463 and parameters: {'lambda_l1': 1.9203885227469518e-07, 'lambda_l2': 9.579152396177504, 'num_leaves': 121, 'feature_fraction': 0.4121848654503317, 'bagging_fraction': 0.4616628191394638, 'bagging_freq': 2, 'min_child_samples': 54}. Best is trial 45 with value: -0.19643303487209848.[0m




[32m[I 2023-03-15 17:17:40,089][0m Trial 47 finished with value: -0.20220239529670478 and parameters: {'lambda_l1': 7.893327609883802e-08, 'lambda_l2': 3.403679227197244, 'num_leaves': 242, 'feature_fraction': 0.4365911408465109, 'bagging_fraction': 0.5298408478349907, 'bagging_freq': 7, 'min_child_samples': 80}. Best is trial 45 with value: -0.19643303487209848.[0m




[32m[I 2023-03-15 17:17:40,821][0m Trial 48 finished with value: -0.1967451265592534 and parameters: {'lambda_l1': 2.2557698184369574e-08, 'lambda_l2': 5.502372000885415, 'num_leaves': 221, 'feature_fraction': 0.4021040349540727, 'bagging_fraction': 0.49395250618057857, 'bagging_freq': 7, 'min_child_samples': 91}. Best is trial 45 with value: -0.19643303487209848.[0m




[32m[I 2023-03-15 17:17:41,586][0m Trial 49 finished with value: -0.2009622421136112 and parameters: {'lambda_l1': 2.5641274778772523e-08, 'lambda_l2': 5.58247569898895, 'num_leaves': 216, 'feature_fraction': 0.40113388125572313, 'bagging_fraction': 0.4992260138735897, 'bagging_freq': 6, 'min_child_samples': 60}. Best is trial 45 with value: -0.19643303487209848.[0m




[32m[I 2023-03-15 17:17:41,919][0m Trial 50 finished with value: -0.20516830903544853 and parameters: {'lambda_l1': 1.959966788095304e-08, 'lambda_l2': 0.1790032031453617, 'num_leaves': 77, 'feature_fraction': 0.48772657438272177, 'bagging_fraction': 0.5992163526405717, 'bagging_freq': 7, 'min_child_samples': 90}. Best is trial 45 with value: -0.19643303487209848.[0m




[32m[I 2023-03-15 17:17:42,201][0m Trial 51 finished with value: -0.1962940282097425 and parameters: {'lambda_l1': 3.2975417183384987e-07, 'lambda_l2': 4.456987373185494, 'num_leaves': 233, 'feature_fraction': 0.4610768515555946, 'bagging_fraction': 0.43657142310846947, 'bagging_freq': 7, 'min_child_samples': 93}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:42,558][0m Trial 52 finished with value: -0.20363157735885823 and parameters: {'lambda_l1': 3.7665213877949107e-07, 'lambda_l2': 9.096326726773105, 'num_leaves': 256, 'feature_fraction': 0.4210247550954774, 'bagging_fraction': 0.43826349516831653, 'bagging_freq': 7, 'min_child_samples': 46}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:42,841][0m Trial 53 finished with value: -0.20043840511125946 and parameters: {'lambda_l1': 1.1374533624375996e-08, 'lambda_l2': 1.480100909540321, 'num_leaves': 173, 'feature_fraction': 0.46144504661839814, 'bagging_fraction': 0.43211044773076523, 'bagging_freq': 7, 'min_child_samples': 87}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:43,138][0m Trial 54 finished with value: -0.1994208235126363 and parameters: {'lambda_l1': 1.3262015096241538e-06, 'lambda_l2': 5.037464758786679, 'num_leaves': 235, 'feature_fraction': 0.4171640866651358, 'bagging_fraction': 0.4814893261740225, 'bagging_freq': 6, 'min_child_samples': 93}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:43,462][0m Trial 55 finished with value: -0.2021795285408247 and parameters: {'lambda_l1': 4.030806846605325e-08, 'lambda_l2': 0.713753876471803, 'num_leaves': 193, 'feature_fraction': 0.46119261549860835, 'bagging_fraction': 0.527043188262373, 'bagging_freq': 7, 'min_child_samples': 81}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:43,740][0m Trial 56 finished with value: -0.19671543325659102 and parameters: {'lambda_l1': 1.5844317720944413e-07, 'lambda_l2': 4.266376468848959, 'num_leaves': 220, 'feature_fraction': 0.4001282257075881, 'bagging_fraction': 0.4249987717950442, 'bagging_freq': 6, 'min_child_samples': 97}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:44,033][0m Trial 57 finished with value: -0.2025717327995357 and parameters: {'lambda_l1': 1.9130224940917375e-07, 'lambda_l2': 0.46022802577056643, 'num_leaves': 219, 'feature_fraction': 0.4802061838311755, 'bagging_fraction': 0.42363786424887323, 'bagging_freq': 5, 'min_child_samples': 88}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:44,339][0m Trial 58 finished with value: -0.20422819921783697 and parameters: {'lambda_l1': 6.261940909482095e-07, 'lambda_l2': 1.677541768909583, 'num_leaves': 186, 'feature_fraction': 0.43189240358746334, 'bagging_fraction': 0.4549117630853349, 'bagging_freq': 6, 'min_child_samples': 77}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:44,730][0m Trial 59 finished with value: -0.20832527953479346 and parameters: {'lambda_l1': 1.8903712976559187e-08, 'lambda_l2': 3.778249950690962, 'num_leaves': 248, 'feature_fraction': 0.40018455336722136, 'bagging_fraction': 0.421834529908746, 'bagging_freq': 6, 'min_child_samples': 36}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:45,090][0m Trial 60 finished with value: -0.21240257457281722 and parameters: {'lambda_l1': 3.7029294078670185e-07, 'lambda_l2': 0.3055646028749818, 'num_leaves': 198, 'feature_fraction': 0.45583620293927035, 'bagging_fraction': 0.5100883451376127, 'bagging_freq': 4, 'min_child_samples': 69}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:45,374][0m Trial 61 finished with value: -0.1974347178944995 and parameters: {'lambda_l1': 6.616560043066612e-08, 'lambda_l2': 5.497361736141835, 'num_leaves': 233, 'feature_fraction': 0.41963936741398433, 'bagging_fraction': 0.47650273195333204, 'bagging_freq': 7, 'min_child_samples': 98}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:45,668][0m Trial 62 finished with value: -0.2009646368598334 and parameters: {'lambda_l1': 1.3450394562484219e-07, 'lambda_l2': 1.5056592214560562, 'num_leaves': 226, 'feature_fraction': 0.4396369081695067, 'bagging_fraction': 0.45237909388003994, 'bagging_freq': 7, 'min_child_samples': 92}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:45,968][0m Trial 63 finished with value: -0.19696845215659564 and parameters: {'lambda_l1': 2.2629863018041406e-08, 'lambda_l2': 6.378317827741479, 'num_leaves': 208, 'feature_fraction': 0.41617809842447856, 'bagging_fraction': 0.4877387402996468, 'bagging_freq': 7, 'min_child_samples': 97}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:46,475][0m Trial 64 finished with value: -0.20287173654979535 and parameters: {'lambda_l1': 2.0973530088840897e-08, 'lambda_l2': 2.403508324460882, 'num_leaves': 211, 'feature_fraction': 0.5032236688158351, 'bagging_fraction': 0.49078609280858215, 'bagging_freq': 7, 'min_child_samples': 83}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:47,018][0m Trial 65 finished with value: -0.1986453161496558 and parameters: {'lambda_l1': 1.010777261640247e-08, 'lambda_l2': 5.520515205671957, 'num_leaves': 217, 'feature_fraction': 0.48812733800387414, 'bagging_fraction': 0.5644894991512031, 'bagging_freq': 7, 'min_child_samples': 97}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:47,307][0m Trial 66 finished with value: -0.20017160081842517 and parameters: {'lambda_l1': 9.774488249848444e-07, 'lambda_l2': 0.9792915260404974, 'num_leaves': 250, 'feature_fraction': 0.4447824909935024, 'bagging_fraction': 0.4214184749934101, 'bagging_freq': 6, 'min_child_samples': 92}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:47,593][0m Trial 67 finished with value: -0.20095585628090698 and parameters: {'lambda_l1': 3.707022909019958e-08, 'lambda_l2': 2.348148443083603, 'num_leaves': 165, 'feature_fraction': 0.42106814890141925, 'bagging_fraction': 0.4627115088052324, 'bagging_freq': 7, 'min_child_samples': 90}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:47,901][0m Trial 68 finished with value: -0.1964286845496916 and parameters: {'lambda_l1': 1.2185254479277004e-07, 'lambda_l2': 9.201550524459638, 'num_leaves': 207, 'feature_fraction': 0.45461749388946276, 'bagging_fraction': 0.5150992205478638, 'bagging_freq': 7, 'min_child_samples': 97}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:48,236][0m Trial 69 finished with value: -0.20087902289855375 and parameters: {'lambda_l1': 1.7266808388516696e-07, 'lambda_l2': 1.1369966926990656, 'num_leaves': 239, 'feature_fraction': 0.4530977636596919, 'bagging_fraction': 0.5115470269101986, 'bagging_freq': 6, 'min_child_samples': 87}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:48,572][0m Trial 70 finished with value: -0.19922076950487214 and parameters: {'lambda_l1': 5.117976124665328e-07, 'lambda_l2': 3.5284511064199022, 'num_leaves': 180, 'feature_fraction': 0.47072027081693907, 'bagging_fraction': 0.5512348057496949, 'bagging_freq': 7, 'min_child_samples': 94}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:48,860][0m Trial 71 finished with value: -0.19642406636086873 and parameters: {'lambda_l1': 8.482884354227867e-08, 'lambda_l2': 9.029064081109208, 'num_leaves': 209, 'feature_fraction': 0.4172853192253144, 'bagging_fraction': 0.47537928903383697, 'bagging_freq': 7, 'min_child_samples': 97}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:49,133][0m Trial 72 finished with value: -0.1965900689016289 and parameters: {'lambda_l1': 9.325850393872412e-08, 'lambda_l2': 9.761454480277925, 'num_leaves': 222, 'feature_fraction': 0.40170533868135005, 'bagging_fraction': 0.4438348483419314, 'bagging_freq': 7, 'min_child_samples': 98}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:49,443][0m Trial 73 finished with value: -0.1965117369197164 and parameters: {'lambda_l1': 7.717710286097317e-08, 'lambda_l2': 9.335843657464343, 'num_leaves': 222, 'feature_fraction': 0.4395922719988025, 'bagging_fraction': 0.4449512258057655, 'bagging_freq': 7, 'min_child_samples': 84}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:49,724][0m Trial 74 finished with value: -0.1978464508676086 and parameters: {'lambda_l1': 9.892180069107214e-08, 'lambda_l2': 4.114479794779177, 'num_leaves': 221, 'feature_fraction': 0.4332488676853676, 'bagging_fraction': 0.4365658673394896, 'bagging_freq': 7, 'min_child_samples': 90}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:50,008][0m Trial 75 finished with value: -0.1962984653764412 and parameters: {'lambda_l1': 3.3186163834690067e-07, 'lambda_l2': 9.883783511194602, 'num_leaves': 208, 'feature_fraction': 0.41760299997883976, 'bagging_fraction': 0.41287381003047746, 'bagging_freq': 7, 'min_child_samples': 83}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:50,322][0m Trial 76 finished with value: -0.1979300366825222 and parameters: {'lambda_l1': 3.0709970777424263e-06, 'lambda_l2': 2.129137056032801, 'num_leaves': 194, 'feature_fraction': 0.4879848102259276, 'bagging_fraction': 0.41316208747357835, 'bagging_freq': 7, 'min_child_samples': 97}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:50,608][0m Trial 77 finished with value: -0.19991071515218273 and parameters: {'lambda_l1': 3.098436659422338e-07, 'lambda_l2': 0.526591333076292, 'num_leaves': 205, 'feature_fraction': 0.46314281670298596, 'bagging_fraction': 0.4015926664191032, 'bagging_freq': 6, 'min_child_samples': 83}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:50,913][0m Trial 78 finished with value: -0.19690068341904118 and parameters: {'lambda_l1': 7.528823231042589e-07, 'lambda_l2': 9.787084274791114, 'num_leaves': 141, 'feature_fraction': 0.507789735711065, 'bagging_fraction': 0.44554993758623823, 'bagging_freq': 7, 'min_child_samples': 94}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:51,204][0m Trial 79 finished with value: -0.20331211659032364 and parameters: {'lambda_l1': 5.93948873741135e-08, 'lambda_l2': 2.398840151156223, 'num_leaves': 45, 'feature_fraction': 0.41998627246329145, 'bagging_fraction': 0.42885589758009474, 'bagging_freq': 6, 'min_child_samples': 78}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:51,483][0m Trial 80 finished with value: -0.19886418341739667 and parameters: {'lambda_l1': 9.273196441979875e-08, 'lambda_l2': 1.3337192505024607, 'num_leaves': 213, 'feature_fraction': 0.43477989025468833, 'bagging_fraction': 0.4574128829101792, 'bagging_freq': 7, 'min_child_samples': 99}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:51,775][0m Trial 81 finished with value: -0.19840510408731732 and parameters: {'lambda_l1': 5.2408817432506366e-08, 'lambda_l2': 5.675455609520366, 'num_leaves': 229, 'feature_fraction': 0.40507826814649034, 'bagging_fraction': 0.47237378321826645, 'bagging_freq': 7, 'min_child_samples': 92}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:52,058][0m Trial 82 finished with value: -0.19862588982036739 and parameters: {'lambda_l1': 1.562535595485373e-07, 'lambda_l2': 6.133148181851784, 'num_leaves': 202, 'feature_fraction': 0.4460344613850143, 'bagging_fraction': 0.417346806947329, 'bagging_freq': 7, 'min_child_samples': 85}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:52,391][0m Trial 83 finished with value: -0.20239188863835006 and parameters: {'lambda_l1': 3.07285398747935e-07, 'lambda_l2': 3.393960679095495, 'num_leaves': 222, 'feature_fraction': 0.403610420966725, 'bagging_fraction': 0.4501438474995559, 'bagging_freq': 7, 'min_child_samples': 74}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:52,679][0m Trial 84 finished with value: -0.1972400560646514 and parameters: {'lambda_l1': 3.4442785503047475e-08, 'lambda_l2': 9.753957693243027, 'num_leaves': 186, 'feature_fraction': 0.4179042602723034, 'bagging_fraction': 0.47829796146893166, 'bagging_freq': 7, 'min_child_samples': 88}. Best is trial 51 with value: -0.1962940282097425.[0m




[32m[I 2023-03-15 17:17:53,000][0m Trial 85 finished with value: -0.195663986355056 and parameters: {'lambda_l1': 1.2040554832688375e-06, 'lambda_l2': 5.575092013137995, 'num_leaves': 199, 'feature_fraction': 0.4719571812525823, 'bagging_fraction': 0.4355387196033531, 'bagging_freq': 7, 'min_child_samples': 95}. Best is trial 85 with value: -0.195663986355056.[0m




[32m[I 2023-03-15 17:17:53,293][0m Trial 86 finished with value: -0.19853371632986794 and parameters: {'lambda_l1': 1.100297085949065e-06, 'lambda_l2': 1.9239242903663478, 'num_leaves': 196, 'feature_fraction': 0.47611611349585053, 'bagging_fraction': 0.4217774734257021, 'bagging_freq': 7, 'min_child_samples': 96}. Best is trial 85 with value: -0.195663986355056.[0m




[32m[I 2023-03-15 17:17:53,587][0m Trial 87 finished with value: -0.19931865726190132 and parameters: {'lambda_l1': 2.2585382895855057e-07, 'lambda_l2': 0.8413669840830197, 'num_leaves': 208, 'feature_fraction': 0.4474656391810778, 'bagging_fraction': 0.439189143662596, 'bagging_freq': 3, 'min_child_samples': 100}. Best is trial 85 with value: -0.195663986355056.[0m




[32m[I 2023-03-15 17:17:53,862][0m Trial 88 finished with value: -0.19670634464795805 and parameters: {'lambda_l1': 4.1099998133718915e-07, 'lambda_l2': 3.525053552657166, 'num_leaves': 230, 'feature_fraction': 0.46425145071257345, 'bagging_fraction': 0.4001757699485465, 'bagging_freq': 7, 'min_child_samples': 95}. Best is trial 85 with value: -0.195663986355056.[0m




[32m[I 2023-03-15 17:17:54,151][0m Trial 89 finished with value: -0.19518578813741502 and parameters: {'lambda_l1': 5.370123391392152e-07, 'lambda_l2': 9.945353884750187, 'num_leaves': 238, 'feature_fraction': 0.5009761023678655, 'bagging_fraction': 0.4100356630490645, 'bagging_freq': 7, 'min_child_samples': 89}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:54,710][0m Trial 90 finished with value: -0.2103296898651786 and parameters: {'lambda_l1': 7.696985405898602e-07, 'lambda_l2': 6.82769857619915, 'num_leaves': 240, 'feature_fraction': 0.4952936899150728, 'bagging_fraction': 0.4556926967572818, 'bagging_freq': 7, 'min_child_samples': 18}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:54,989][0m Trial 91 finished with value: -0.19661968622204976 and parameters: {'lambda_l1': 1.6246635769190126e-06, 'lambda_l2': 3.297149505683604, 'num_leaves': 215, 'feature_fraction': 0.46855092027154127, 'bagging_fraction': 0.403275737309917, 'bagging_freq': 7, 'min_child_samples': 94}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:55,281][0m Trial 92 finished with value: -0.1973405066152804 and parameters: {'lambda_l1': 1.9438115769712457e-06, 'lambda_l2': 9.327029248485397, 'num_leaves': 214, 'feature_fraction': 0.5212880493598793, 'bagging_fraction': 0.43975381058441354, 'bagging_freq': 7, 'min_child_samples': 88}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:55,639][0m Trial 93 finished with value: -0.19981453390706122 and parameters: {'lambda_l1': 5.284602925314088e-07, 'lambda_l2': 2.9019524057958845, 'num_leaves': 200, 'feature_fraction': 0.4784805967379807, 'bagging_fraction': 0.46143018056053786, 'bagging_freq': 2, 'min_child_samples': 93}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:55,967][0m Trial 94 finished with value: -0.19642449137768137 and parameters: {'lambda_l1': 1.1199606688032605e-06, 'lambda_l2': 9.968665165564378, 'num_leaves': 227, 'feature_fraction': 0.4316724131879598, 'bagging_fraction': 0.43109104920429275, 'bagging_freq': 7, 'min_child_samples': 89}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:56,262][0m Trial 95 finished with value: -0.19851067867790656 and parameters: {'lambda_l1': 1.0447533352596061e-07, 'lambda_l2': 6.562602419017172, 'num_leaves': 228, 'feature_fraction': 0.4354875932616067, 'bagging_fraction': 0.4158051004163917, 'bagging_freq': 7, 'min_child_samples': 85}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:56,575][0m Trial 96 finished with value: -0.20260870637902312 and parameters: {'lambda_l1': 3.0211972669488654e-06, 'lambda_l2': 1.5495249943654148, 'num_leaves': 98, 'feature_fraction': 0.45202481103069336, 'bagging_fraction': 0.4775092139998097, 'bagging_freq': 7, 'min_child_samples': 79}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:56,867][0m Trial 97 finished with value: -0.19689870727702347 and parameters: {'lambda_l1': 6.001388065427983e-06, 'lambda_l2': 9.881643181541289, 'num_leaves': 189, 'feature_fraction': 0.43323524684902237, 'bagging_fraction': 0.43049808315993804, 'bagging_freq': 7, 'min_child_samples': 89}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:57,193][0m Trial 98 finished with value: -0.1987366447184217 and parameters: {'lambda_l1': 3.084842593173705e-07, 'lambda_l2': 6.170566861236273, 'num_leaves': 236, 'feature_fraction': 0.4970805817048882, 'bagging_fraction': 0.4667731071643576, 'bagging_freq': 7, 'min_child_samples': 86}. Best is trial 89 with value: -0.19518578813741502.[0m




[32m[I 2023-03-15 17:17:57,869][0m Trial 99 finished with value: -0.20347408963666985 and parameters: {'lambda_l1': 5.337012670112608e-08, 'lambda_l2': 1.072244239701656, 'num_leaves': 247, 'feature_fraction': 0.4268505652670811, 'bagging_fraction': 0.447752383766269, 'bagging_freq': 7, 'min_child_samples': 83}. Best is trial 89 with value: -0.19518578813741502.[0m




{'lambda_l1': 5.370123391392152e-07,
 'lambda_l2': 9.945353884750187,
 'num_leaves': 238,
 'feature_fraction': 0.5009761023678655,
 'bagging_fraction': 0.4100356630490645,
 'bagging_freq': 7,
 'min_child_samples': 89}

In [9]:
model = train_optimized_model(study, X_train, y_train)
preds = model.predict_proba(X_valid)[:,1]
print("Brier Score:", brier_score_loss(y_valid, preds))
print("ROC AUC:", roc_auc_score(y_valid, preds))
print("Parameters")
print(*(f"- {key}: {value}" for key, value in model.get_params(deep=True).items()), sep="\n")
print("Features")
print(*(f"- {name}: {imp}" for name, imp in sorted(zip(model.feature_name_, model.feature_importances_), key=lambda x: x[1], reverse=True)), sep="\n")


Brier Score: 0.16191665593248014
ROC AUC: 0.8451003086419754
Parameters
- boosting_type: gbdt
- class_weight: None
- colsample_bytree: 1.0
- importance_type: split
- learning_rate: 0.1
- max_depth: -1
- min_child_samples: 89
- min_child_weight: 0.001
- min_split_gain: 0.0
- n_estimators: 100
- n_jobs: -1
- num_leaves: 238
- objective: binary
- random_state: None
- reg_alpha: 0.0
- reg_lambda: 0.0
- silent: warn
- subsample: 1.0
- subsample_for_bin: 200000
- subsample_freq: 0
- metric: l2
- verbosity: -1
- lambda_l1: 5.370123391392152e-07
- lambda_l2: 9.945353884750187
- feature_fraction: 0.5009761023678655
- bagging_fraction: 0.4100356630490645
- bagging_freq: 7
Features
- SeedDiff: 36
- OrdinalRankDiff: 28
- TOTouDiff: 15
- FGMRegDiff: 12
- ScoreTouDiff: 12
- FGARegDiff: 11
- OROppTouDiff: 11
- ScoreRegDiff: 10
- FTPTouDiff: 10
- ScoreOppRegDiff: 9
- FGPRegDiff: 9
- AstTouDiff: 9
- BlkTouDiff: 9
- FGAOppRegDiff: 8
- DROppRegDiff: 8
- ORRegDiff: 6
- OROppRegDiff: 6
- TOOppRegDiff: 6
- 

# Submit to the competition

We"ll begin by using the trained model to generate predictions, which we"ll save to a CSV file.

In [10]:
# Use the model to generate predictions
model.fit(X, y)
predictions = model.predict_proba(X_submission)[:,1]

# Save the predictions to a CSV file
output = pd.DataFrame({"ID": X_submission.index,
                       "Pred": predictions})
output.to_csv("submission.csv", index=False)
print(output.shape)
output.describe()

(130683, 2)


Unnamed: 0,Pred
count,130683.0
mean,0.466208
std,0.020941
min,0.067112
25%,0.466034
50%,0.466034
75%,0.466034
max,0.909202
