# Imports and Utilities

In [1]:
from typing import List, Tuple
import numpy as np
import pandas as pd
from IPython.display import Markdown
import optuna
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, brier_score_loss
from sklearn.model_selection import cross_val_score

COMPETITION_NAME = "warmup-round-march-machine-learning-mania-2023"
VERBOSE = False

pd.set_option("display.max_columns", None)

def show_df(df: pd.DataFrame, name: str = "DataFrame", verbose: bool = VERBOSE):
    if verbose:
        display(Markdown(f"## {name}"))
        display(Markdown("### Head"))
        display(df.head())
        display(Markdown("### Tail"))
        display(df.tail())
        display(Markdown("### Description"))
        display(df.describe())


def extract_data(filename: str, competition_name:str = COMPETITION_NAME) -> pd.DataFrame:
    mens_filepath = f"/kaggle/input/{competition_name}/M{filename}.csv"
    womens_filepath = f"/kaggle/input/{competition_name}/W{filename}.csv"
    try:
        df_mens = pd.read_csv(mens_filepath)
    except FileNotFoundError:
        df_mens = None
    try:
        df_womens = pd.read_csv(womens_filepath)
    except FileNotFoundError:
        df_womens = None
    df = pd.concat([df_mens, df_womens])
    show_df(df, filename)
    return df


def get_team_features(detailed_results: pd.DataFrame) -> pd.DataFrame:
    df = detailed_results.copy()
    df = clean_detailed_results(df)
    df = transform_game_to_team(df)
    df = transform_team_results(df)
    show_df(df)
    return df


def get_seed_features(df_in: pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["Season"] > 2002
    df = df[mask]
    df["Seed"] = df["Seed"].str.replace(r"\D+","", regex=True)
    df["Seed"] = df["Seed"].astype(int)
    show_df(df)
    return df


def get_ranking_features(df_in:pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["RankingDayNum"] == df["RankingDayNum"].max()
    df = df[mask]
    df.drop(["SystemName", "RankingDayNum"], axis=1, inplace=True)
    df = df.groupby(["Season", "TeamID"]).agg("median")
    df = df.reset_index()
    show_df(df)
    return df


def get_game_outcomes(df):
    input_rows = df.to_records()
    output_rows = [parse_row(input_row) for input_row in input_rows]
    df = pd.DataFrame(output_rows)
    show_df(df)
    return df


def clean_detailed_results(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(["WLoc", "DayNum"], axis=1)


def transform_game_to_team(game_results: pd.DataFrame) -> pd.DataFrame:
    winners = rename_columns(game_results, "W")
    loosers = rename_columns(game_results, "L")
    team_results = pd.concat((winners, loosers))
    team_results.drop(["TeamIDOpp"], axis=1, inplace=True)
    return team_results


def transform_team_results(df: pd.DataFrame) -> pd.DataFrame:
    df = df.groupby(["Season", "TeamID"]).median()
    df["FGP"] =  df["FGM"] / df["FGA"]
    df["FGP3"] =  df["FGM3"] / df["FGA3"]
    df["FTP"] =  df["FTM"] / df["FTA"]
    df["FGPOpp"] =  df["FGMOpp"] / df["FGAOpp"]
    df["FGP3Opp"] =  df["FGM3Opp"] / df["FGA3Opp"]
    df["FTPOpp"] =  df["FTMOpp"] / df["FTAOpp"]
    return df.reset_index()
    
    
def rename_columns(df: pd.DataFrame, team_prefix: str) -> pd.DataFrame:
    df = df.copy()
    df.columns =  (rename_column(column_name, team_prefix) for column_name in df.columns)
    return df


def rename_column(column_name: str, team_prefix: str) -> pd.DataFrame:
    if team_prefix == "W":
        opponent_prefix = "L"
    elif team_prefix == "L":
        opponent_prefix = "W"
    else:
        raise ValueError
    if column_name.startswith(team_prefix):
        column_name = column_name.lstrip(team_prefix)
    elif column_name.startswith(opponent_prefix):
        column_name = f"{column_name.lstrip(opponent_prefix)}Opp"
    return column_name


def split_winner_and_looser_columns(df: pd.DataFrame) -> Tuple[List[str], List[str]]:
    winner_columns = [name for name in df.columns if not name.startswith("L")]
    looser_columns = [name for name in df.columns if not name.startswith("W")]
    return winner_columns, looser_columns


def merge_features(
    season_features: pd.DataFrame, 
    tournament_features: pd.DataFrame, 
    seed_features: pd.DataFrame, 
    ranking_features: pd.DataFrame
) -> pd.DataFrame:
    features = pd.merge(
        season_features,
        tournament_features,
        how="inner",
        on=["Season", "TeamID"],
        suffixes=("Reg", "Tou")
    )
    features = features.merge(
        seed_features,
        how="inner",
        on=["Season", "TeamID"]
    )
    features = features.merge(
        ranking_features,
        how="left",
        on=["Season", "TeamID"],
    )
    show_df(features)
    return features


def parse_row(row):
    season = row['Season']
    winning_team_id = row['WTeamID']
    losing_team_id = row['LTeamID']
    if winning_team_id < losing_team_id:
        small_id = winning_team_id
        big_id = losing_team_id
        outcome = True
    elif losing_team_id < winning_team_id:
        small_id = losing_team_id
        big_id = winning_team_id
        outcome = False
    record = {
            "ID": f"{season}_{small_id}_{big_id}",
            'Season': season,
            'LowID': small_id,
            'HighID': big_id,
            'Win': outcome
    }
    return record


def merge_outcomes_with_features(outcomes: pd.DataFrame, features: pd.DataFrame, how: str = "inner") -> pd.DataFrame:
    feature_names = [name for name in features.columns if name not in ["Season", "TeamID", "Gender"]]
    data = pd.merge(
        outcomes, 
        features, 
        how=how, 
        left_on=["Season", "HighID"], 
        right_on=["Season", "TeamID"]
    )
    data = pd.merge(
        data, 
        features, 
        how=how, 
        left_on=["Season", "LowID"], 
        right_on=["Season", "TeamID"],
        suffixes=("High", "Low")
    )
    for name in feature_names:
        data[f"{name}Diff"] = data[f"{name}High"] - data[f"{name}Low"]
        data.drop([f"{name}High", f"{name}Low"], axis=1, inplace=True)
    data.drop(
        ["Season", "HighID", "LowID","TeamIDHigh","TeamIDLow"], 
        axis=1, 
        inplace=True
    )
    data.set_index("ID", inplace=True)
    show_df(data)
    return data


def get_submission_outcomes() -> pd.DataFrame:
    sample_submission = pd.read_csv(f"/kaggle/input/{COMPETITION_NAME}/SampleSubmissionWarmup.csv")
    df = sample_submission.copy()
    df.drop("Pred", axis=1, inplace=True)
    df[["Season", "LowID", "HighID"]] = df["ID"].str.split("_", expand=True)
    df[["Season", "LowID", "HighID"]] = df[["Season", "LowID", "HighID"]].astype(int)
    show_df(df)
    return df


def objective(trial: optuna.Trial, X_train, y_train):
    params = {
        "objective": "binary",
        "metric": "l2",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }
    model = lgb.LGBMClassifier(**params)
    score = np.mean(cross_val_score(model, X_train, y_train, scoring="neg_brier_score", cv=5))
    return score


def run_study(X_train, y_train):
    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)
    return study


def train_optimized_model(study, X, y):
    params = study.best_params
    model = lgb.LGBMClassifier(
        objective="binary",
        metric="l2",
        verbosity=-1,
        boosting_type="gbdt",
        **params
    )
    model = model.fit(X, y)
    return model

# Load Data

In [2]:
season_results = extract_data("RegularSeasonDetailedResults")
tournament_results = extract_data("NCAATourneyDetailedResults")
seeds = extract_data("NCAATourneySeeds")
rankings = extract_data("MasseyOrdinals")

# Transform Data

In [3]:
season_team_features = get_team_features(season_results)

tournament_results["Season"] += 1
tournament_team_features = get_team_features(tournament_results)

seed_features = get_seed_features(seeds)

ranking_features = get_ranking_features(rankings)

## Merge features

In [4]:
features = merge_features(season_team_features, tournament_team_features, seed_features, ranking_features)

## Build Datasets

In [5]:
from sklearn.model_selection import train_test_split

data = extract_data("NCAATourneyCompactResults")
data_train, data_valid = train_test_split(data, random_state=0)

outcomes = get_game_outcomes(data)
outcomes_train = get_game_outcomes(data_train)
outcomes_valid = get_game_outcomes(data_valid)
outcomes_submission = get_submission_outcomes()

In [6]:
features_train = merge_outcomes_with_features(outcomes_train, features)
features_valid = merge_outcomes_with_features(outcomes_valid, features)
features_full = merge_outcomes_with_features(outcomes, features)

In [7]:
y_train = features_train["Win"]
X_train = features_train.drop("Win", axis=1)
y_valid = features_valid["Win"]
X_valid = features_valid.drop("Win", axis=1)
y = features_full["Win"]
X = features_full.drop("Win", axis=1)
X_submission = merge_outcomes_with_features(outcomes_submission, features, how="left").fillna(0)

# Step 4: Train a model


In [8]:
study = run_study(X_train, y_train)
study.best_params

[32m[I 2023-03-15 16:47:26,607][0m A new study created in memory with name: no-name-2eac46fd-d02b-490d-9472-1014a48b800a[0m




[32m[I 2023-03-15 16:47:26,978][0m Trial 0 finished with value: -0.21994829304718316 and parameters: {'lambda_l1': 9.971306902267015e-08, 'lambda_l2': 0.0003128122059383428, 'num_leaves': 24, 'feature_fraction': 0.6394715984948618, 'bagging_fraction': 0.8737065008476477, 'bagging_freq': 3, 'min_child_samples': 31}. Best is trial 0 with value: -0.21994829304718316.[0m




[32m[I 2023-03-15 16:47:27,202][0m Trial 1 finished with value: -0.2044357163180759 and parameters: {'lambda_l1': 0.8288560347882109, 'lambda_l2': 0.0007511806272907411, 'num_leaves': 229, 'feature_fraction': 0.555111349452285, 'bagging_fraction': 0.47676260301270473, 'bagging_freq': 1, 'min_child_samples': 39}. Best is trial 1 with value: -0.2044357163180759.[0m




[32m[I 2023-03-15 16:47:27,484][0m Trial 2 finished with value: -0.21513092668707862 and parameters: {'lambda_l1': 7.149576225795614e-07, 'lambda_l2': 2.4027870471651297e-06, 'num_leaves': 49, 'feature_fraction': 0.8458014608974884, 'bagging_fraction': 0.8562577022652462, 'bagging_freq': 4, 'min_child_samples': 49}. Best is trial 1 with value: -0.2044357163180759.[0m




[32m[I 2023-03-15 16:47:27,650][0m Trial 3 finished with value: -0.1975608111220999 and parameters: {'lambda_l1': 0.0034832366050449623, 'lambda_l2': 5.674724956140459e-06, 'num_leaves': 252, 'feature_fraction': 0.6218115390498417, 'bagging_fraction': 0.5301818011760662, 'bagging_freq': 7, 'min_child_samples': 75}. Best is trial 3 with value: -0.1975608111220999.[0m




[32m[I 2023-03-15 16:47:27,937][0m Trial 4 finished with value: -0.20632632933021117 and parameters: {'lambda_l1': 1.8540304932321004e-07, 'lambda_l2': 1.8217314468455557, 'num_leaves': 77, 'feature_fraction': 0.8702807490571507, 'bagging_fraction': 0.8093051502091178, 'bagging_freq': 1, 'min_child_samples': 46}. Best is trial 3 with value: -0.1975608111220999.[0m




[32m[I 2023-03-15 16:47:28,451][0m Trial 5 finished with value: -0.22820291687336006 and parameters: {'lambda_l1': 0.013136605995215295, 'lambda_l2': 0.05697692511882116, 'num_leaves': 13, 'feature_fraction': 0.9990354430229181, 'bagging_fraction': 0.5526846517209361, 'bagging_freq': 5, 'min_child_samples': 13}. Best is trial 3 with value: -0.1975608111220999.[0m




[32m[I 2023-03-15 16:47:28,683][0m Trial 6 finished with value: -0.19700357156755688 and parameters: {'lambda_l1': 4.251869868619648, 'lambda_l2': 0.0012521646567345596, 'num_leaves': 167, 'feature_fraction': 0.9224151543093302, 'bagging_fraction': 0.8908987143015779, 'bagging_freq': 3, 'min_child_samples': 75}. Best is trial 6 with value: -0.19700357156755688.[0m




[32m[I 2023-03-15 16:47:28,918][0m Trial 7 finished with value: -0.20606891918603557 and parameters: {'lambda_l1': 2.710975094880878e-08, 'lambda_l2': 5.716068385064915e-07, 'num_leaves': 45, 'feature_fraction': 0.9323965529892259, 'bagging_fraction': 0.6217324460958576, 'bagging_freq': 3, 'min_child_samples': 45}. Best is trial 6 with value: -0.19700357156755688.[0m




[32m[I 2023-03-15 16:47:29,183][0m Trial 8 finished with value: -0.2134639335880383 and parameters: {'lambda_l1': 0.0020608358564900295, 'lambda_l2': 1.4442864108953946e-06, 'num_leaves': 202, 'feature_fraction': 0.8164078450235432, 'bagging_fraction': 0.764062274564844, 'bagging_freq': 5, 'min_child_samples': 49}. Best is trial 6 with value: -0.19700357156755688.[0m




[32m[I 2023-03-15 16:47:29,402][0m Trial 9 finished with value: -0.19980543727603584 and parameters: {'lambda_l1': 0.026697480750349965, 'lambda_l2': 8.407946744089777, 'num_leaves': 164, 'feature_fraction': 0.7650882759953337, 'bagging_fraction': 0.5027522517297154, 'bagging_freq': 7, 'min_child_samples': 36}. Best is trial 6 with value: -0.19700357156755688.[0m




[32m[I 2023-03-15 16:47:29,615][0m Trial 10 finished with value: -0.19032733355980708 and parameters: {'lambda_l1': 4.207619623901993, 'lambda_l2': 1.568891320609816e-08, 'num_leaves': 112, 'feature_fraction': 0.42177240261729254, 'bagging_fraction': 0.9756909848047586, 'bagging_freq': 2, 'min_child_samples': 100}. Best is trial 10 with value: -0.19032733355980708.[0m




[32m[I 2023-03-15 16:47:29,825][0m Trial 11 finished with value: -0.19225600813566346 and parameters: {'lambda_l1': 2.1365616901003297, 'lambda_l2': 1.6082255668125057e-08, 'num_leaves': 115, 'feature_fraction': 0.47505668474468665, 'bagging_fraction': 0.9944858138155152, 'bagging_freq': 2, 'min_child_samples': 100}. Best is trial 10 with value: -0.19032733355980708.[0m




[32m[I 2023-03-15 16:47:30,066][0m Trial 12 finished with value: -0.19174234913136382 and parameters: {'lambda_l1': 8.488011022578977, 'lambda_l2': 1.5279423051927414e-08, 'num_leaves': 102, 'feature_fraction': 0.405769530498596, 'bagging_fraction': 0.9936676198340365, 'bagging_freq': 2, 'min_child_samples': 96}. Best is trial 10 with value: -0.19032733355980708.[0m




[32m[I 2023-03-15 16:47:30,649][0m Trial 13 finished with value: -0.1947376382781963 and parameters: {'lambda_l1': 0.21756249335729894, 'lambda_l2': 1.1214631333132615e-08, 'num_leaves': 101, 'feature_fraction': 0.4336298368169248, 'bagging_fraction': 0.9892560082071009, 'bagging_freq': 2, 'min_child_samples': 99}. Best is trial 10 with value: -0.19032733355980708.[0m




[32m[I 2023-03-15 16:47:30,855][0m Trial 14 finished with value: -0.19421509510259855 and parameters: {'lambda_l1': 0.25700668023546563, 'lambda_l2': 9.187219926131146e-08, 'num_leaves': 148, 'feature_fraction': 0.4130629447345516, 'bagging_fraction': 0.7102548395965963, 'bagging_freq': 2, 'min_child_samples': 80}. Best is trial 10 with value: -0.19032733355980708.[0m




[32m[I 2023-03-15 16:47:31,053][0m Trial 15 finished with value: -0.18920831627419457 and parameters: {'lambda_l1': 9.341354535863696, 'lambda_l2': 1.1875663723161188e-07, 'num_leaves': 86, 'feature_fraction': 0.504792963745103, 'bagging_fraction': 0.938425555572144, 'bagging_freq': 1, 'min_child_samples': 87}. Best is trial 15 with value: -0.18920831627419457.[0m




[32m[I 2023-03-15 16:47:31,241][0m Trial 16 finished with value: -0.18923314537404906 and parameters: {'lambda_l1': 9.471512084748438, 'lambda_l2': 1.9645120685207538e-07, 'num_leaves': 71, 'feature_fraction': 0.5085292569549709, 'bagging_fraction': 0.8915075798313586, 'bagging_freq': 1, 'min_child_samples': 83}. Best is trial 15 with value: -0.18920831627419457.[0m
[32m[I 2023-03-15 16:47:31,413][0m Trial 17 finished with value: -0.2007903960472436 and parameters: {'lambda_l1': 4.6612029191784705e-05, 'lambda_l2': 1.5386524429709584e-05, 'num_leaves': 71, 'feature_fraction': 0.5588293447309475, 'bagging_fraction': 0.4170297660584225, 'bagging_freq': 1, 'min_child_samples': 64}. Best is trial 15 with value: -0.18920831627419457.[0m




[32m[I 2023-03-15 16:47:31,628][0m Trial 18 finished with value: -0.2004136332054073 and parameters: {'lambda_l1': 0.09553709888869015, 'lambda_l2': 3.694404194878219e-05, 'num_leaves': 74, 'feature_fraction': 0.5109892572536379, 'bagging_fraction': 0.9321107072984374, 'bagging_freq': 1, 'min_child_samples': 86}. Best is trial 15 with value: -0.18920831627419457.[0m




[32m[I 2023-03-15 16:47:31,879][0m Trial 19 finished with value: -0.2047832556319651 and parameters: {'lambda_l1': 0.5607779683234949, 'lambda_l2': 3.088655545439923e-07, 'num_leaves': 137, 'feature_fraction': 0.6899184575023948, 'bagging_fraction': 0.7961360419560326, 'bagging_freq': 4, 'min_child_samples': 64}. Best is trial 15 with value: -0.18920831627419457.[0m
[32m[I 2023-03-15 16:47:32,113][0m Trial 20 finished with value: -0.20554583571151305 and parameters: {'lambda_l1': 0.06672790616238321, 'lambda_l2': 1.5706641457909142e-07, 'num_leaves': 42, 'feature_fraction': 0.5034488143538955, 'bagging_fraction': 0.8983981351697107, 'bagging_freq': 6, 'min_child_samples': 64}. Best is trial 15 with value: -0.18920831627419457.[0m




[32m[I 2023-03-15 16:47:32,306][0m Trial 21 finished with value: -0.19105317880128778 and parameters: {'lambda_l1': 9.93433623797628, 'lambda_l2': 8.775430978656859e-08, 'num_leaves': 92, 'feature_fraction': 0.45671062385668126, 'bagging_fraction': 0.9339153135168669, 'bagging_freq': 1, 'min_child_samples': 89}. Best is trial 15 with value: -0.18920831627419457.[0m




[32m[I 2023-03-15 16:47:32,550][0m Trial 22 finished with value: -0.19818037924725163 and parameters: {'lambda_l1': 1.3043836921712724, 'lambda_l2': 7.15605772393024e-08, 'num_leaves': 123, 'feature_fraction': 0.5309888089645737, 'bagging_fraction': 0.9404599151353346, 'bagging_freq': 2, 'min_child_samples': 86}. Best is trial 15 with value: -0.18920831627419457.[0m




[32m[I 2023-03-15 16:47:32,740][0m Trial 23 finished with value: -0.1902443239972119 and parameters: {'lambda_l1': 9.319411617582514, 'lambda_l2': 7.216342052627446e-07, 'num_leaves': 62, 'feature_fraction': 0.4533459167009358, 'bagging_fraction': 0.8387100923907639, 'bagging_freq': 1, 'min_child_samples': 91}. Best is trial 15 with value: -0.18920831627419457.[0m




[32m[I 2023-03-15 16:47:32,959][0m Trial 24 finished with value: -0.20194228725488447 and parameters: {'lambda_l1': 0.6332355931746642, 'lambda_l2': 9.172222687662739e-07, 'num_leaves': 59, 'feature_fraction': 0.48302385258862895, 'bagging_fraction': 0.8440926330312424, 'bagging_freq': 1, 'min_child_samples': 73}. Best is trial 15 with value: -0.18920831627419457.[0m




[32m[I 2023-03-15 16:47:33,165][0m Trial 25 finished with value: -0.1883751525451834 and parameters: {'lambda_l1': 8.400913356205104, 'lambda_l2': 8.696409927360178e-06, 'num_leaves': 30, 'feature_fraction': 0.5619428606900359, 'bagging_fraction': 0.825344731644865, 'bagging_freq': 3, 'min_child_samples': 90}. Best is trial 25 with value: -0.1883751525451834.[0m




[32m[I 2023-03-15 16:47:33,642][0m Trial 26 finished with value: -0.18933626122338815 and parameters: {'lambda_l1': 0.23876251152369793, 'lambda_l2': 6.877397911483581e-06, 'num_leaves': 2, 'feature_fraction': 0.5651935075081659, 'bagging_fraction': 0.7706921661555756, 'bagging_freq': 3, 'min_child_samples': 58}. Best is trial 25 with value: -0.1883751525451834.[0m




[32m[I 2023-03-15 16:47:34,392][0m Trial 27 finished with value: -0.19647775255216407 and parameters: {'lambda_l1': 1.8038994539537132, 'lambda_l2': 1.692291143874309e-05, 'num_leaves': 26, 'feature_fraction': 0.5990761630334157, 'bagging_fraction': 0.8979908698998089, 'bagging_freq': 5, 'min_child_samples': 82}. Best is trial 25 with value: -0.1883751525451834.[0m




[32m[I 2023-03-15 16:47:34,614][0m Trial 28 finished with value: -0.20231638520368556 and parameters: {'lambda_l1': 1.2026649807588468, 'lambda_l2': 2.744822686101558e-06, 'num_leaves': 33, 'feature_fraction': 0.5249747119115542, 'bagging_fraction': 0.8221744980103467, 'bagging_freq': 3, 'min_child_samples': 71}. Best is trial 25 with value: -0.1883751525451834.[0m




[32m[I 2023-03-15 16:47:35,049][0m Trial 29 finished with value: -0.2190654213371866 and parameters: {'lambda_l1': 0.06134524413256134, 'lambda_l2': 6.109891648343265e-05, 'num_leaves': 87, 'feature_fraction': 0.669596797745586, 'bagging_fraction': 0.883368671928269, 'bagging_freq': 3, 'min_child_samples': 25}. Best is trial 25 with value: -0.1883751525451834.[0m




[32m[I 2023-03-15 16:47:35,252][0m Trial 30 finished with value: -0.1937432163489619 and parameters: {'lambda_l1': 2.919270167551605, 'lambda_l2': 3.467944385968634e-07, 'num_leaves': 20, 'feature_fraction': 0.5953156218110951, 'bagging_fraction': 0.7324968397769754, 'bagging_freq': 2, 'min_child_samples': 92}. Best is trial 25 with value: -0.1883751525451834.[0m




[32m[I 2023-03-15 16:47:35,462][0m Trial 31 finished with value: -0.19523964225755486 and parameters: {'lambda_l1': 0.31680162790459104, 'lambda_l2': 5.9146186739327725e-06, 'num_leaves': 3, 'feature_fraction': 0.568970211156452, 'bagging_fraction': 0.7734668662723776, 'bagging_freq': 4, 'min_child_samples': 57}. Best is trial 25 with value: -0.1883751525451834.[0m




[32m[I 2023-03-15 16:47:35,727][0m Trial 32 finished with value: -0.20354546061342 and parameters: {'lambda_l1': 1.0093942873481045, 'lambda_l2': 8.646201150155191e-05, 'num_leaves': 9, 'feature_fraction': 0.6564367043153381, 'bagging_fraction': 0.8450200873488231, 'bagging_freq': 3, 'min_child_samples': 57}. Best is trial 25 with value: -0.1883751525451834.[0m




[32m[I 2023-03-15 16:47:35,924][0m Trial 33 finished with value: -0.18757557690127297 and parameters: {'lambda_l1': 8.42578768681397, 'lambda_l2': 2.4201088034013655e-06, 'num_leaves': 27, 'feature_fraction': 0.5536963500833583, 'bagging_fraction': 0.7844954777732736, 'bagging_freq': 4, 'min_child_samples': 81}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:36,136][0m Trial 34 finished with value: -0.1933461274769995 and parameters: {'lambda_l1': 3.478076175940174, 'lambda_l2': 1.940738596649301e-06, 'num_leaves': 51, 'feature_fraction': 0.4896834103326158, 'bagging_fraction': 0.8660426269910105, 'bagging_freq': 4, 'min_child_samples': 82}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:36,343][0m Trial 35 finished with value: -0.18842632681553667 and parameters: {'lambda_l1': 7.696401459353781, 'lambda_l2': 2.416699938611456e-07, 'num_leaves': 35, 'feature_fraction': 0.6373214441842879, 'bagging_fraction': 0.8092294391316306, 'bagging_freq': 5, 'min_child_samples': 78}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:36,566][0m Trial 36 finished with value: -0.20037917803820576 and parameters: {'lambda_l1': 0.6529843261080504, 'lambda_l2': 3.0828810657378347e-06, 'num_leaves': 39, 'feature_fraction': 0.6348448418501542, 'bagging_fraction': 0.8020137106052191, 'bagging_freq': 6, 'min_child_samples': 77}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:36,776][0m Trial 37 finished with value: -0.1920853096112754 and parameters: {'lambda_l1': 3.005436204844525, 'lambda_l2': 4.727275421769274e-08, 'num_leaves': 26, 'feature_fraction': 0.6196194597176856, 'bagging_fraction': 0.6765060081988712, 'bagging_freq': 5, 'min_child_samples': 69}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:36,976][0m Trial 38 finished with value: -0.19699108901122422 and parameters: {'lambda_l1': 0.014192277077710943, 'lambda_l2': 6.47147957472455e-07, 'num_leaves': 57, 'feature_fraction': 0.5466024945545851, 'bagging_fraction': 0.7441022015320424, 'bagging_freq': 6, 'min_child_samples': 91}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:37,489][0m Trial 39 finished with value: -0.21806223206512673 and parameters: {'lambda_l1': 1.4784236765414183, 'lambda_l2': 0.0001586965034553733, 'num_leaves': 19, 'feature_fraction': 0.6017323359269409, 'bagging_fraction': 0.8165371019716442, 'bagging_freq': 5, 'min_child_samples': 6}. Best is trial 33 with value: -0.18757557690127297.[0m
[32m[I 2023-03-15 16:47:37,710][0m Trial 40 finished with value: -0.20192930038405307 and parameters: {'lambda_l1': 0.13787096333890583, 'lambda_l2': 0.0005619950464398402, 'num_leaves': 36, 'feature_fraction': 0.5774306417354327, 'bagging_fraction': 0.8581058315721863, 'bagging_freq': 4, 'min_child_samples': 79}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:37,906][0m Trial 41 finished with value: -0.18817883572721641 and parameters: {'lambda_l1': 8.666193709499238, 'lambda_l2': 3.6333501267775244e-07, 'num_leaves': 81, 'feature_fraction': 0.5351661727349697, 'bagging_fraction': 0.8698176858782695, 'bagging_freq': 4, 'min_child_samples': 86}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:38,118][0m Trial 42 finished with value: -0.19057955183609937 and parameters: {'lambda_l1': 4.3547399442581085, 'lambda_l2': 9.858107683356326e-07, 'num_leaves': 87, 'feature_fraction': 0.5333312917965936, 'bagging_fraction': 0.8136107264144794, 'bagging_freq': 4, 'min_child_samples': 94}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:38,359][0m Trial 43 finished with value: -0.19615966127302925 and parameters: {'lambda_l1': 0.6233672920061212, 'lambda_l2': 3.164799165437228e-07, 'num_leaves': 220, 'feature_fraction': 0.6344699651461534, 'bagging_fraction': 0.8628311435018474, 'bagging_freq': 5, 'min_child_samples': 86}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:38,578][0m Trial 44 finished with value: -0.19503445069996092 and parameters: {'lambda_l1': 4.255346385877523, 'lambda_l2': 3.202198038199965e-08, 'num_leaves': 50, 'feature_fraction': 0.5452712840114028, 'bagging_fraction': 0.785636959970363, 'bagging_freq': 4, 'min_child_samples': 68}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:38,780][0m Trial 45 finished with value: -0.1898886254646907 and parameters: {'lambda_l1': 9.978276913571877, 'lambda_l2': 1.2610604843893654e-07, 'num_leaves': 67, 'feature_fraction': 0.5808656535902015, 'bagging_fraction': 0.8298861449668741, 'bagging_freq': 6, 'min_child_samples': 74}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:38,996][0m Trial 46 finished with value: -0.19599779140528253 and parameters: {'lambda_l1': 1.7761945547081859, 'lambda_l2': 1.9133902482382826e-06, 'num_leaves': 84, 'feature_fraction': 0.7192955604121923, 'bagging_fraction': 0.7944063807806291, 'bagging_freq': 5, 'min_child_samples': 96}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:39,218][0m Trial 47 finished with value: -0.1944673859008991 and parameters: {'lambda_l1': 0.5407083883693653, 'lambda_l2': 3.7948167888051544e-08, 'num_leaves': 99, 'feature_fraction': 0.6157695167136454, 'bagging_fraction': 0.7503100356504542, 'bagging_freq': 3, 'min_child_samples': 87}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:39,465][0m Trial 48 finished with value: -0.19433764387683042 and parameters: {'lambda_l1': 3.3417237907107977, 'lambda_l2': 4.3096196059486717e-07, 'num_leaves': 187, 'feature_fraction': 0.46743524279594223, 'bagging_fraction': 0.9168798503873602, 'bagging_freq': 4, 'min_child_samples': 77}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:39,683][0m Trial 49 finished with value: -0.19865735581958086 and parameters: {'lambda_l1': 0.004664492091174656, 'lambda_l2': 1.393075977892028e-07, 'num_leaves': 15, 'feature_fraction': 0.5015565336060466, 'bagging_fraction': 0.9524121594739117, 'bagging_freq': 4, 'min_child_samples': 97}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:40,087][0m Trial 50 finished with value: -0.22135039004393983 and parameters: {'lambda_l1': 0.2981320560993987, 'lambda_l2': 4.454918015926495e-06, 'num_leaves': 31, 'feature_fraction': 0.5495255896271055, 'bagging_fraction': 0.9630314932706298, 'bagging_freq': 5, 'min_child_samples': 27}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:40,295][0m Trial 51 finished with value: -0.18877801890839013 and parameters: {'lambda_l1': 9.224118880945829, 'lambda_l2': 1.7619480235648776e-07, 'num_leaves': 79, 'feature_fraction': 0.5202590207033225, 'bagging_fraction': 0.8805475102005996, 'bagging_freq': 1, 'min_child_samples': 83}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:40,532][0m Trial 52 finished with value: -0.19218706907672095 and parameters: {'lambda_l1': 4.9598138219405765, 'lambda_l2': 1.3298424434636824e-06, 'num_leaves': 110, 'feature_fraction': 0.5738505497556007, 'bagging_fraction': 0.9161238454912963, 'bagging_freq': 2, 'min_child_samples': 83}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:40,748][0m Trial 53 finished with value: -0.18953558363877138 and parameters: {'lambda_l1': 5.318480752242002, 'lambda_l2': 3.390352874817813e-07, 'num_leaves': 133, 'feature_fraction': 0.5198679230144188, 'bagging_fraction': 0.8745840851265394, 'bagging_freq': 1, 'min_child_samples': 79}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:40,972][0m Trial 54 finished with value: -0.19510875538967945 and parameters: {'lambda_l1': 1.7967712582796087, 'lambda_l2': 3.476992631419096e-08, 'num_leaves': 79, 'feature_fraction': 0.48098592572492127, 'bagging_fraction': 0.9087120809580569, 'bagging_freq': 3, 'min_child_samples': 89}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:41,512][0m Trial 55 finished with value: -0.19019643480141132 and parameters: {'lambda_l1': 8.280326015170841, 'lambda_l2': 2.0503557907487485e-07, 'num_leaves': 48, 'feature_fraction': 0.4384138076990484, 'bagging_fraction': 0.8707661787139704, 'bagging_freq': 2, 'min_child_samples': 94}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:41,772][0m Trial 56 finished with value: -0.19292528319587093 and parameters: {'lambda_l1': 2.357416449822832, 'lambda_l2': 8.848760900999233e-08, 'num_leaves': 63, 'feature_fraction': 0.5148045520804532, 'bagging_fraction': 0.8310469798474172, 'bagging_freq': 7, 'min_child_samples': 85}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:42,106][0m Trial 57 finished with value: -0.21194998751183164 and parameters: {'lambda_l1': 0.9780930547504394, 'lambda_l2': 6.645148769483056e-07, 'num_leaves': 252, 'feature_fraction': 0.5438830867607268, 'bagging_fraction': 0.9704668702825917, 'bagging_freq': 1, 'min_child_samples': 42}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:42,346][0m Trial 58 finished with value: -0.19209328949393156 and parameters: {'lambda_l1': 5.843065549711569, 'lambda_l2': 9.548785655793609e-06, 'num_leaves': 147, 'feature_fraction': 0.5892520669958116, 'bagging_fraction': 0.8851137110935255, 'bagging_freq': 3, 'min_child_samples': 76}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:42,549][0m Trial 59 finished with value: -0.1881604878877305 and parameters: {'lambda_l1': 8.944050381398624, 'lambda_l2': 1.6842631571273997e-06, 'num_leaves': 124, 'feature_fraction': 0.4924900686813626, 'bagging_fraction': 0.8554048980651837, 'bagging_freq': 4, 'min_child_samples': 89}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:42,761][0m Trial 60 finished with value: -0.19246467572094725 and parameters: {'lambda_l1': 2.6541851999255193, 'lambda_l2': 1.4405317164522683e-05, 'num_leaves': 165, 'feature_fraction': 0.4528531500702908, 'bagging_fraction': 0.8406534590807746, 'bagging_freq': 4, 'min_child_samples': 100}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:42,961][0m Trial 61 finished with value: -0.18871739879774907 and parameters: {'lambda_l1': 9.744604892081783, 'lambda_l2': 1.0440107150875786e-06, 'num_leaves': 113, 'feature_fraction': 0.4876277081154704, 'bagging_fraction': 0.8548527873441348, 'bagging_freq': 4, 'min_child_samples': 89}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:43,183][0m Trial 62 finished with value: -0.19046469167240734 and parameters: {'lambda_l1': 5.35579385338147, 'lambda_l2': 1.5290908550064744e-06, 'num_leaves': 114, 'feature_fraction': 0.48922145978351705, 'bagging_fraction': 0.8056175217274513, 'bagging_freq': 4, 'min_child_samples': 90}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:43,413][0m Trial 63 finished with value: -0.19567569315072236 and parameters: {'lambda_l1': 1.577080441572612, 'lambda_l2': 4.350538229597347e-06, 'num_leaves': 126, 'feature_fraction': 0.5327634850940213, 'bagging_fraction': 0.8490147644763696, 'bagging_freq': 4, 'min_child_samples': 81}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:43,609][0m Trial 64 finished with value: -0.18880410323005725 and parameters: {'lambda_l1': 8.780968722812576, 'lambda_l2': 1.1421972787810105e-06, 'num_leaves': 145, 'feature_fraction': 0.561449464724569, 'bagging_fraction': 0.7808251002379255, 'bagging_freq': 5, 'min_child_samples': 94}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:43,821][0m Trial 65 finished with value: -0.19571298633800632 and parameters: {'lambda_l1': 0.45971476548990137, 'lambda_l2': 3.111795966926938e-06, 'num_leaves': 106, 'feature_fraction': 0.42653090335474153, 'bagging_fraction': 0.8170676536630507, 'bagging_freq': 4, 'min_child_samples': 84}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:44,051][0m Trial 66 finished with value: -0.19714982963560218 and parameters: {'lambda_l1': 1.1991330609634347, 'lambda_l2': 2.1423760726587876e-07, 'num_leaves': 97, 'feature_fraction': 0.4952711351833171, 'bagging_fraction': 0.8801481073636334, 'bagging_freq': 5, 'min_child_samples': 88}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:44,274][0m Trial 67 finished with value: -0.19359930602994013 and parameters: {'lambda_l1': 5.115165376973207, 'lambda_l2': 5.823640547267564e-07, 'num_leaves': 117, 'feature_fraction': 0.4724394365004842, 'bagging_fraction': 0.8508846806138177, 'bagging_freq': 3, 'min_child_samples': 71}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:44,493][0m Trial 68 finished with value: -0.19286966053333074 and parameters: {'lambda_l1': 2.6179813581093776, 'lambda_l2': 2.982967373411695e-05, 'num_leaves': 77, 'feature_fraction': 0.5183518057424958, 'bagging_fraction': 0.9017496403877887, 'bagging_freq': 4, 'min_child_samples': 92}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:44,695][0m Trial 69 finished with value: -0.19634808847728388 and parameters: {'lambda_l1': 0.868905041665357, 'lambda_l2': 8.9294459564812e-06, 'num_leaves': 140, 'feature_fraction': 0.4026664322770201, 'bagging_fraction': 0.834832708084303, 'bagging_freq': 5, 'min_child_samples': 97}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:44,924][0m Trial 70 finished with value: -0.20167049873749715 and parameters: {'lambda_l1': 0.3661711181896284, 'lambda_l2': 2.2394955482502185e-06, 'num_leaves': 153, 'feature_fraction': 0.5603024676141274, 'bagging_fraction': 0.793830308915905, 'bagging_freq': 4, 'min_child_samples': 79}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:45,129][0m Trial 71 finished with value: -0.18890469920143566 and parameters: {'lambda_l1': 8.473100193037775, 'lambda_l2': 1.425202738019845e-06, 'num_leaves': 175, 'feature_fraction': 0.5581517166062143, 'bagging_fraction': 0.7763046639177672, 'bagging_freq': 5, 'min_child_samples': 94}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:45,336][0m Trial 72 finished with value: -0.1892679441628604 and parameters: {'lambda_l1': 9.027682631259186, 'lambda_l2': 1.0027156639161105e-06, 'num_leaves': 156, 'feature_fraction': 0.5301599966986146, 'bagging_fraction': 0.8604218807662194, 'bagging_freq': 6, 'min_child_samples': 89}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:45,552][0m Trial 73 finished with value: -0.19317637068538723 and parameters: {'lambda_l1': 2.755176641014804, 'lambda_l2': 4.232542350383294e-07, 'num_leaves': 124, 'feature_fraction': 0.4985513303507496, 'bagging_fraction': 0.7594010975735422, 'bagging_freq': 5, 'min_child_samples': 84}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:45,758][0m Trial 74 finished with value: -0.19075533935556402 and parameters: {'lambda_l1': 5.3930208015207795, 'lambda_l2': 2.2822521918920767e-07, 'num_leaves': 41, 'feature_fraction': 0.5900342988641938, 'bagging_fraction': 0.7820805672081192, 'bagging_freq': 4, 'min_child_samples': 94}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:45,976][0m Trial 75 finished with value: -0.1887391582498819 and parameters: {'lambda_l1': 9.59396349883297, 'lambda_l2': 8.572215272551369e-07, 'num_leaves': 140, 'feature_fraction': 0.569488666201591, 'bagging_fraction': 0.8212381228560265, 'bagging_freq': 6, 'min_child_samples': 81}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:46,221][0m Trial 76 finished with value: -0.20144944379476354 and parameters: {'lambda_l1': 1.0652492976534884, 'lambda_l2': 5.866884896435223e-07, 'num_leaves': 54, 'feature_fraction': 0.6170638835701067, 'bagging_fraction': 0.8077253086946596, 'bagging_freq': 7, 'min_child_samples': 66}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:46,473][0m Trial 77 finished with value: -0.19904724031314358 and parameters: {'lambda_l1': 2.1788452487568795, 'lambda_l2': 6.668698859766727e-08, 'num_leaves': 131, 'feature_fraction': 0.5103011066195265, 'bagging_fraction': 0.8256067524533742, 'bagging_freq': 6, 'min_child_samples': 61}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:46,739][0m Trial 78 finished with value: -0.19965153056645701 and parameters: {'lambda_l1': 3.330877157893565, 'lambda_l2': 4.913351994850746e-06, 'num_leaves': 120, 'feature_fraction': 0.5751236196215443, 'bagging_fraction': 0.8940437305503629, 'bagging_freq': 6, 'min_child_samples': 50}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:46,977][0m Trial 79 finished with value: -0.20338665295829514 and parameters: {'lambda_l1': 0.17083225215470751, 'lambda_l2': 1.4964705602652852e-07, 'num_leaves': 29, 'feature_fraction': 0.5369141195731922, 'bagging_fraction': 0.8576445608527078, 'bagging_freq': 4, 'min_child_samples': 72}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:47,213][0m Trial 80 finished with value: -0.1993101348805125 and parameters: {'lambda_l1': 0.728720870371931, 'lambda_l2': 2.6444145565407042e-06, 'num_leaves': 8, 'feature_fraction': 0.6015307385886994, 'bagging_fraction': 0.8735507926264936, 'bagging_freq': 3, 'min_child_samples': 81}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:47,430][0m Trial 81 finished with value: -0.18840732554963108 and parameters: {'lambda_l1': 6.666324402544942, 'lambda_l2': 1.1977012595050902e-06, 'num_leaves': 145, 'feature_fraction': 0.5654891736041047, 'bagging_fraction': 0.7923836607871545, 'bagging_freq': 5, 'min_child_samples': 87}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:47,645][0m Trial 82 finished with value: -0.18841413712159652 and parameters: {'lambda_l1': 5.4620363419432, 'lambda_l2': 7.354651106569065e-07, 'num_leaves': 138, 'feature_fraction': 0.5499876830370559, 'bagging_fraction': 0.8220679920820326, 'bagging_freq': 5, 'min_child_samples': 86}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:47,868][0m Trial 83 finished with value: -0.19277631018138108 and parameters: {'lambda_l1': 4.25959973581846, 'lambda_l2': 9.188412245324681e-07, 'num_leaves': 140, 'feature_fraction': 0.5514942997610423, 'bagging_fraction': 0.7998095322737565, 'bagging_freq': 5, 'min_child_samples': 86}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:48,110][0m Trial 84 finished with value: -0.19701841799791459 and parameters: {'lambda_l1': 1.8658360210260008, 'lambda_l2': 3.525134320740674e-07, 'num_leaves': 130, 'feature_fraction': 0.5805915965598338, 'bagging_fraction': 0.8213195652757533, 'bagging_freq': 5, 'min_child_samples': 77}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:48,326][0m Trial 85 finished with value: -0.18947145055955317 and parameters: {'lambda_l1': 5.951537319084063, 'lambda_l2': 1.9053863847368828e-06, 'num_leaves': 159, 'feature_fraction': 0.6029855000559915, 'bagging_fraction': 0.8397410403592347, 'bagging_freq': 6, 'min_child_samples': 92}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:48,548][0m Trial 86 finished with value: -0.1930670838110309 and parameters: {'lambda_l1': 3.4311336647827804, 'lambda_l2': 3.9463700924683325e-06, 'num_leaves': 173, 'feature_fraction': 0.5390937794807324, 'bagging_fraction': 0.7661573365803754, 'bagging_freq': 4, 'min_child_samples': 88}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:48,785][0m Trial 87 finished with value: -0.19921051937255205 and parameters: {'lambda_l1': 1.3301895427292227, 'lambda_l2': 6.497409623484216e-06, 'num_leaves': 137, 'feature_fraction': 0.5640906076173225, 'bagging_fraction': 0.8121885780090763, 'bagging_freq': 5, 'min_child_samples': 75}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:49,007][0m Trial 88 finished with value: -0.18963800522058644 and parameters: {'lambda_l1': 6.664893750264052, 'lambda_l2': 6.631517390391869e-07, 'num_leaves': 150, 'feature_fraction': 0.6358326749497476, 'bagging_fraction': 0.7917780889430569, 'bagging_freq': 3, 'min_child_samples': 98}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:49,228][0m Trial 89 finished with value: -0.19211999527159734 and parameters: {'lambda_l1': 3.7990675348232443, 'lambda_l2': 1.3144389943509205e-06, 'num_leaves': 93, 'feature_fraction': 0.4837331097925356, 'bagging_fraction': 0.7300946417818486, 'bagging_freq': 4, 'min_child_samples': 81}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:49,441][0m Trial 90 finished with value: -0.18949308863325415 and parameters: {'lambda_l1': 9.660575219150033, 'lambda_l2': 2.7819896243839806e-06, 'num_leaves': 108, 'feature_fraction': 0.5058781188025117, 'bagging_fraction': 0.830198895804382, 'bagging_freq': 7, 'min_child_samples': 86}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:49,661][0m Trial 91 finished with value: -0.19701650172434793 and parameters: {'lambda_l1': 2.0814687587157636, 'lambda_l2': 2.488521871447267e-07, 'num_leaves': 21, 'feature_fraction': 0.5248299743917517, 'bagging_fraction': 0.8507814502385852, 'bagging_freq': 6, 'min_child_samples': 83}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:49,889][0m Trial 92 finished with value: -0.19157769535320368 and parameters: {'lambda_l1': 5.772824822344226, 'lambda_l2': 5.567733413811721e-07, 'num_leaves': 160, 'feature_fraction': 0.5490947938430621, 'bagging_fraction': 0.8694744137294237, 'bagging_freq': 4, 'min_child_samples': 79}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:50,086][0m Trial 93 finished with value: -0.1898637524951779 and parameters: {'lambda_l1': 9.937243395109604, 'lambda_l2': 1.2994492783751808e-07, 'num_leaves': 35, 'feature_fraction': 0.5207492922201975, 'bagging_fraction': 0.8820368533887591, 'bagging_freq': 5, 'min_child_samples': 90}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:50,305][0m Trial 94 finished with value: -0.1919846234836716 and parameters: {'lambda_l1': 3.203460467162391, 'lambda_l2': 3.9809580785421286e-07, 'num_leaves': 43, 'feature_fraction': 0.5713901524775946, 'bagging_fraction': 0.8417475946634639, 'bagging_freq': 4, 'min_child_samples': 85}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:50,513][0m Trial 95 finished with value: -0.19027078298847094 and parameters: {'lambda_l1': 6.612664057413739, 'lambda_l2': 9.622105962166755e-07, 'num_leaves': 13, 'feature_fraction': 0.49521510389111917, 'bagging_fraction': 0.8177201834315536, 'bagging_freq': 5, 'min_child_samples': 92}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:50,815][0m Trial 96 finished with value: -0.20502751768537938 and parameters: {'lambda_l1': 1.3618753086736586, 'lambda_l2': 2.555374374248569e-07, 'num_leaves': 68, 'feature_fraction': 0.4661470874384662, 'bagging_fraction': 0.8054515167327627, 'bagging_freq': 2, 'min_child_samples': 35}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:51,052][0m Trial 97 finished with value: -0.1905029712496248 and parameters: {'lambda_l1': 4.090666035787813, 'lambda_l2': 1.8450423075745337e-06, 'num_leaves': 144, 'feature_fraction': 0.5481034068753673, 'bagging_fraction': 0.8636241938834719, 'bagging_freq': 4, 'min_child_samples': 83}. Best is trial 33 with value: -0.18757557690127297.[0m




[32m[I 2023-03-15 16:47:51,437][0m Trial 98 finished with value: -0.20977431770717447 and parameters: {'lambda_l1': 2.1057503044370467, 'lambda_l2': 6.532556519606388e-08, 'num_leaves': 104, 'feature_fraction': 0.5305437036193085, 'bagging_fraction': 0.8257022369051376, 'bagging_freq': 3, 'min_child_samples': 18}. Best is trial 33 with value: -0.18757557690127297.[0m
[32m[I 2023-03-15 16:47:51,655][0m Trial 99 finished with value: -0.19531425943472164 and parameters: {'lambda_l1': 0.7474601890471756, 'lambda_l2': 7.421180366306739e-07, 'num_leaves': 133, 'feature_fraction': 0.5085545239313585, 'bagging_fraction': 0.7911891770806574, 'bagging_freq': 5, 'min_child_samples': 88}. Best is trial 33 with value: -0.18757557690127297.[0m




{'lambda_l1': 8.42578768681397,
 'lambda_l2': 2.4201088034013655e-06,
 'num_leaves': 27,
 'feature_fraction': 0.5536963500833583,
 'bagging_fraction': 0.7844954777732736,
 'bagging_freq': 4,
 'min_child_samples': 81}

In [9]:
model = train_optimized_model(study, X_train, y_train)
preds = model.predict_proba(X_valid)[:,1]
print("Brier Score:", brier_score_loss(y_valid, preds))
print("ROC AUC:", roc_auc_score(y_valid, preds))
print("Parameters")
print(*(f"- {key}: {value}" for key, value in model.get_params(deep=True).items()), sep="\n")
print("Features")
print(*(f"- {name}: {imp}" for name, imp in sorted(zip(model.feature_name_, model.feature_importances_), key=lambda x: x[1], reverse=True)), sep="\n")


Brier Score: 0.19014310675875265
ROC AUC: 0.7866698518872433
Parameters
- boosting_type: gbdt
- class_weight: None
- colsample_bytree: 1.0
- importance_type: split
- learning_rate: 0.1
- max_depth: -1
- min_child_samples: 81
- min_child_weight: 0.001
- min_split_gain: 0.0
- n_estimators: 100
- n_jobs: -1
- num_leaves: 27
- objective: binary
- random_state: None
- reg_alpha: 0.0
- reg_lambda: 0.0
- silent: warn
- subsample: 1.0
- subsample_for_bin: 200000
- subsample_freq: 0
- metric: l2
- verbosity: -1
- lambda_l1: 8.42578768681397
- lambda_l2: 2.4201088034013655e-06
- feature_fraction: 0.5536963500833583
- bagging_fraction: 0.7844954777732736
- bagging_freq: 4
Features
- SeedDiff: 39
- OROppRegDiff: 13
- OrdinalRankDiff: 12
- FGMRegDiff: 11
- DROppRegDiff: 11
- ScoreTouDiff: 10
- TOTouDiff: 9
- OROppTouDiff: 9
- ScoreOppRegDiff: 8
- AstTouDiff: 8
- FGARegDiff: 7
- ORRegDiff: 7
- FGPRegDiff: 7
- FGM3RegDiff: 6
- AstOppRegDiff: 6
- PFTouDiff: 6
- FTAOppTouDiff: 6
- ORTouDiff: 5
- DRTouD

# Submit to the competition

We"ll begin by using the trained model to generate predictions, which we"ll save to a CSV file.

In [10]:
# Use the model to generate predictions
model.fit(X, y)
predictions = model.predict_proba(X_submission)[:,1]

# Save the predictions to a CSV file
output = pd.DataFrame({"ID": X_submission.index,
                       "Pred": predictions})
output.to_csv("submission.csv", index=False)
print(output.shape)
output.describe()

(614319, 2)


Unnamed: 0,Pred
count,614319.0
mean,0.648322
std,0.023777
min,0.066631
25%,0.649453
50%,0.649453
75%,0.649453
max,0.949437
