# Imports and Utilities

In [1]:
from typing import List, Tuple
import numpy as np
import pandas as pd
from IPython.display import Markdown
import optuna
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, brier_score_loss
from sklearn.model_selection import cross_val_score

COMPETITION_NAME = "warmup-round-march-machine-learning-mania-2023"
VERBOSE = False

pd.set_option("display.max_columns", None)

def show_df(df: pd.DataFrame, name: str = "DataFrame", verbose: bool = VERBOSE):
    if verbose:
        display(Markdown(f"## {name}"))
        display(Markdown("### Head"))
        display(df.head())
        display(Markdown("### Tail"))
        display(df.tail())
        display(Markdown("### Description"))
        display(df.describe())


def extract_data(filename: str, competition_name:str = COMPETITION_NAME) -> pd.DataFrame:
    mens_filepath = f"/kaggle/input/{competition_name}/M{filename}.csv"
    womens_filepath = f"/kaggle/input/{competition_name}/W{filename}.csv"
    try:
        df_mens = pd.read_csv(mens_filepath)
    except FileNotFoundError:
        df_mens = None
    try:
        df_womens = pd.read_csv(womens_filepath)
    except FileNotFoundError:
        df_womens = None
    df = pd.concat([df_mens, df_womens])
    show_df(df, filename)
    return df


def get_team_features(detailed_results: pd.DataFrame) -> pd.DataFrame:
    df = detailed_results.copy()
    df = clean_detailed_results(df)
    df = transform_game_to_team(df)
    df = transform_team_results(df)
    show_df(df)
    return df


def get_seed_features(df_in: pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["Season"] > 2002
    df = df[mask]
    df["Seed"] = df["Seed"].str.replace(r"\D+","", regex=True)
    df["Seed"] = df["Seed"].astype(int)
    show_df(df)
    return df


def get_ranking_features(df_in:pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["RankingDayNum"] == df["RankingDayNum"].max()
    df = df[mask]
    df.drop(["SystemName", "RankingDayNum"], axis=1, inplace=True)
    df = df.groupby(["Season", "TeamID"]).agg("median")
    df = df.reset_index()
    show_df(df)
    return df


def get_game_outcomes(df):
    input_rows = df.to_records()
    output_rows = []
    for input_row in input_rows:
        output_rows.extend(parse_row(input_row))
    out_df = pd.DataFrame(output_rows)
    return out_df

def parse_row(row):
    season = row['Season']
    winning_team_id = row['WTeamID']
    losing_team_id = row['LTeamID']
    if winning_team_id < losing_team_id:
        small_id = winning_team_id
        big_id = losing_team_id
        outcome = True
    elif losing_team_id < winning_team_id:
        small_id = losing_team_id
        big_id = winning_team_id
        outcome = False
    records = [
        {
            "ID": f"{season}_{small_id}_{big_id}",
            'Season': season,
            'LowID': small_id,
            'HighID': big_id,
            'Win': outcome
        },
        {
            "ID": f"{season}_{big_id}_{small_id}",
            'Season': season,
            'LowID': big_id,
            'HighID': small_id,
            'Win': not outcome
        },
    ]
    return records


def clean_detailed_results(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(["WLoc", "DayNum"], axis=1)


def transform_game_to_team(game_results: pd.DataFrame) -> pd.DataFrame:
    winners = rename_columns(game_results, "W")
    loosers = rename_columns(game_results, "L")
    team_results = pd.concat((winners, loosers))
    team_results.drop(["TeamIDOpp"], axis=1, inplace=True)
    return team_results


def transform_team_results(df: pd.DataFrame) -> pd.DataFrame:
    df = df.groupby(["Season", "TeamID"]).median()
    df["FGP"] =  df["FGM"] / df["FGA"]
    df["FGP3"] =  df["FGM3"] / df["FGA3"]
    df["FTP"] =  df["FTM"] / df["FTA"]
    df["FGPOpp"] =  df["FGMOpp"] / df["FGAOpp"]
    df["FGP3Opp"] =  df["FGM3Opp"] / df["FGA3Opp"]
    df["FTPOpp"] =  df["FTMOpp"] / df["FTAOpp"]
    return df.reset_index()
    
    
def rename_columns(df: pd.DataFrame, team_prefix: str) -> pd.DataFrame:
    df = df.copy()
    df.columns =  (rename_column(column_name, team_prefix) for column_name in df.columns)
    return df


def rename_column(column_name: str, team_prefix: str) -> pd.DataFrame:
    if team_prefix == "W":
        opponent_prefix = "L"
    elif team_prefix == "L":
        opponent_prefix = "W"
    else:
        raise ValueError
    if column_name.startswith(team_prefix):
        column_name = column_name.lstrip(team_prefix)
    elif column_name.startswith(opponent_prefix):
        column_name = f"{column_name.lstrip(opponent_prefix)}Opp"
    return column_name


def split_winner_and_looser_columns(df: pd.DataFrame) -> Tuple[List[str], List[str]]:
    winner_columns = [name for name in df.columns if not name.startswith("L")]
    looser_columns = [name for name in df.columns if not name.startswith("W")]
    return winner_columns, looser_columns


def merge_features(
    season_features: pd.DataFrame, 
    tournament_features: pd.DataFrame, 
    seed_features: pd.DataFrame, 
    ranking_features: pd.DataFrame
) -> pd.DataFrame:
    features = pd.merge(
        season_features,
        tournament_features,
        how="inner",
        on=["Season", "TeamID"],
        suffixes=("Reg", "Tou")
    )
    features = features.merge(
        seed_features,
        how="inner",
        on=["Season", "TeamID"]
    )
    features = features.merge(
        ranking_features,
        how="left",
        on=["Season", "TeamID"],
    )
    show_df(features)
    return features


def merge_outcomes_with_features(outcomes: pd.DataFrame, features: pd.DataFrame, how: str = "inner") -> pd.DataFrame:
    feature_names = [name for name in features.columns if name not in ["Season", "TeamID", "Gender"]]
    data = pd.merge(
        outcomes, 
        features, 
        how=how, 
        left_on=["Season", "HighID"], 
        right_on=["Season", "TeamID"]
    )
    data = pd.merge(
        data, 
        features, 
        how=how, 
        left_on=["Season", "LowID"], 
        right_on=["Season", "TeamID"],
        suffixes=("High", "Low")
    )
    for name in feature_names:
        data[f"{name}Diff"] = data[f"{name}High"] - data[f"{name}Low"]
        data.drop([f"{name}High", f"{name}Low"], axis=1, inplace=True)
    data.drop(
        ["Season", "HighID", "LowID","TeamIDHigh","TeamIDLow"], 
        axis=1, 
        inplace=True
    )
    data.set_index("ID", inplace=True)
    show_df(data)
    return data


def get_submission_outcomes() -> pd.DataFrame:
    sample_submission = pd.read_csv(f"/kaggle/input/{COMPETITION_NAME}/SampleSubmissionWarmup.csv")
    df = sample_submission.copy()
    df.drop("Pred", axis=1, inplace=True)
    df[["Season", "LowID", "HighID"]] = df["ID"].str.split("_", expand=True)
    df[["Season", "LowID", "HighID"]] = df[["Season", "LowID", "HighID"]].astype(int)
    show_df(df)
    return df


def objective(trial: optuna.Trial, X_train, y_train):
    params = {
        "objective": "binary",
        "metric": "roc_auc",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }
    model = lgb.LGBMClassifier(**params)
    score = np.mean(cross_val_score(model, X_train, y_train, scoring="roc_auc", cv=5))
    return score


def run_study(X_train, y_train):
    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)
    return study


def train_optimized_model(study, X, y):
    params = study.best_params
    model = lgb.LGBMClassifier(
        objective="binary",
        metric="roc_auc",
        verbosity=-1,
        boosting_type="gbdt",
        **params
    )
    model = model.fit(X, y)
    return model

# Load Data

In [2]:
season_results = extract_data("RegularSeasonDetailedResults")
tournament_results = extract_data("NCAATourneyDetailedResults")
seeds = extract_data("NCAATourneySeeds")
rankings = extract_data("MasseyOrdinals")

# Transform Data

In [3]:
season_team_features = get_team_features(season_results)

tournament_results["Season"] += 1
tournament_team_features = get_team_features(tournament_results)

seed_features = get_seed_features(seeds)

ranking_features = get_ranking_features(rankings)

## Merge features

In [4]:
features = merge_features(season_team_features, tournament_team_features, seed_features, ranking_features)

## Build Datasets

In [5]:
from sklearn.model_selection import train_test_split

data = extract_data("NCAATourneyCompactResults")
data_train, data_valid = train_test_split(data, random_state=0, test_size=0.1)

outcomes = get_game_outcomes(data)
outcomes_train = get_game_outcomes(data_train)
outcomes_valid = get_game_outcomes(data_valid)
outcomes_submission = get_submission_outcomes()

In [6]:
features_train = merge_outcomes_with_features(outcomes_train, features)
features_valid = merge_outcomes_with_features(outcomes_valid, features)
features_full = merge_outcomes_with_features(outcomes, features)

In [7]:
y_train = features_train["Win"]
X_train = features_train.drop("Win", axis=1)
y_valid = features_valid["Win"]
X_valid = features_valid.drop("Win", axis=1)
y = features_full["Win"]
X = features_full.drop("Win", axis=1)
X_submission = merge_outcomes_with_features(outcomes_submission, features, how="left").fillna(0)

# Step 4: Train a model


In [8]:
study = run_study(X_train, y_train)
study.best_params

[32m[I 2023-03-15 19:55:34,760][0m A new study created in memory with name: no-name-4107a562-9c1c-41bd-83d2-3e4b6192fad7[0m




[32m[I 2023-03-15 19:55:35,643][0m Trial 0 finished with value: 0.7471700946581329 and parameters: {'lambda_l1': 0.010866154873643726, 'lambda_l2': 1.538255429324344e-08, 'num_leaves': 228, 'feature_fraction': 0.5327030895319131, 'bagging_fraction': 0.6436407427328232, 'bagging_freq': 1, 'min_child_samples': 20}. Best is trial 0 with value: 0.7471700946581329.[0m




[32m[I 2023-03-15 19:55:36,499][0m Trial 1 finished with value: 0.7335838735958353 and parameters: {'lambda_l1': 1.2334837767772882e-06, 'lambda_l2': 2.025211033038898e-07, 'num_leaves': 30, 'feature_fraction': 0.720085190505509, 'bagging_fraction': 0.5607941602810422, 'bagging_freq': 1, 'min_child_samples': 20}. Best is trial 0 with value: 0.7471700946581329.[0m




[32m[I 2023-03-15 19:55:36,972][0m Trial 2 finished with value: 0.7557121971595656 and parameters: {'lambda_l1': 0.0006954861082281284, 'lambda_l2': 0.008115146983156235, 'num_leaves': 207, 'feature_fraction': 0.7478509164927757, 'bagging_fraction': 0.7381442630572352, 'bagging_freq': 6, 'min_child_samples': 76}. Best is trial 2 with value: 0.7557121971595656.[0m




[32m[I 2023-03-15 19:55:37,683][0m Trial 3 finished with value: 0.7201994310845985 and parameters: {'lambda_l1': 8.508645958780645e-08, 'lambda_l2': 3.8245404452720296e-08, 'num_leaves': 91, 'feature_fraction': 0.7839582966434789, 'bagging_fraction': 0.41938438012091983, 'bagging_freq': 5, 'min_child_samples': 20}. Best is trial 2 with value: 0.7557121971595656.[0m




[32m[I 2023-03-15 19:55:37,980][0m Trial 4 finished with value: 0.7628201012172303 and parameters: {'lambda_l1': 3.0349857131909856, 'lambda_l2': 0.00046002783084678076, 'num_leaves': 44, 'feature_fraction': 0.4722288397785271, 'bagging_fraction': 0.5609380086910223, 'bagging_freq': 7, 'min_child_samples': 72}. Best is trial 4 with value: 0.7628201012172303.[0m




[32m[I 2023-03-15 19:55:39,768][0m Trial 5 finished with value: 0.7511859020830313 and parameters: {'lambda_l1': 2.6329504590684797e-08, 'lambda_l2': 0.001621847293419118, 'num_leaves': 23, 'feature_fraction': 0.4649188426438081, 'bagging_fraction': 0.6629714970548459, 'bagging_freq': 7, 'min_child_samples': 6}. Best is trial 4 with value: 0.7628201012172303.[0m




[32m[I 2023-03-15 19:55:41,552][0m Trial 6 finished with value: 0.7461895095866389 and parameters: {'lambda_l1': 0.11117749239557266, 'lambda_l2': 0.04877517635875305, 'num_leaves': 218, 'feature_fraction': 0.8479734198688117, 'bagging_fraction': 0.5445495865255063, 'bagging_freq': 6, 'min_child_samples': 10}. Best is trial 4 with value: 0.7628201012172303.[0m




[32m[I 2023-03-15 19:55:42,218][0m Trial 7 finished with value: 0.7449343054606212 and parameters: {'lambda_l1': 0.00010810366357865439, 'lambda_l2': 0.014397600237340708, 'num_leaves': 111, 'feature_fraction': 0.7257328143322894, 'bagging_fraction': 0.7708162899301712, 'bagging_freq': 5, 'min_child_samples': 44}. Best is trial 4 with value: 0.7628201012172303.[0m




[32m[I 2023-03-15 19:55:42,752][0m Trial 8 finished with value: 0.7464227027624157 and parameters: {'lambda_l1': 7.061590543310019e-05, 'lambda_l2': 0.00011012853212856867, 'num_leaves': 160, 'feature_fraction': 0.7474807425303165, 'bagging_fraction': 0.7661500183199492, 'bagging_freq': 6, 'min_child_samples': 62}. Best is trial 4 with value: 0.7628201012172303.[0m




[32m[I 2023-03-15 19:55:43,046][0m Trial 9 finished with value: 0.7685939193714313 and parameters: {'lambda_l1': 1.805251828281933e-07, 'lambda_l2': 0.11938208489126814, 'num_leaves': 249, 'feature_fraction': 0.6973978865769771, 'bagging_fraction': 0.4625131690418318, 'bagging_freq': 3, 'min_child_samples': 100}. Best is trial 9 with value: 0.7685939193714313.[0m




[32m[I 2023-03-15 19:55:43,643][0m Trial 10 finished with value: 0.7585416709818623 and parameters: {'lambda_l1': 2.0825719403427163e-06, 'lambda_l2': 1.8017009552362744, 'num_leaves': 256, 'feature_fraction': 0.9910354199951891, 'bagging_fraction': 0.9154620934515069, 'bagging_freq': 3, 'min_child_samples': 100}. Best is trial 9 with value: 0.7685939193714313.[0m




[32m[I 2023-03-15 19:55:43,978][0m Trial 11 finished with value: 0.7759498608780906 and parameters: {'lambda_l1': 1.9711662840394, 'lambda_l2': 9.874566469886835, 'num_leaves': 74, 'feature_fraction': 0.5902835637623557, 'bagging_fraction': 0.40168460328717037, 'bagging_freq': 3, 'min_child_samples': 98}. Best is trial 11 with value: 0.7759498608780906.[0m




[32m[I 2023-03-15 19:55:44,246][0m Trial 12 finished with value: 0.7794427528877289 and parameters: {'lambda_l1': 4.113751383349978, 'lambda_l2': 8.047158162474878, 'num_leaves': 76, 'feature_fraction': 0.6024739707607051, 'bagging_fraction': 0.4015078199001848, 'bagging_freq': 3, 'min_child_samples': 99}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:44,526][0m Trial 13 finished with value: 0.777939425008803 and parameters: {'lambda_l1': 6.284295454098079, 'lambda_l2': 8.714114714347936, 'num_leaves': 73, 'feature_fraction': 0.5793975217370986, 'bagging_fraction': 0.4338243885986218, 'bagging_freq': 3, 'min_child_samples': 86}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:44,837][0m Trial 14 finished with value: 0.7727125147579693 and parameters: {'lambda_l1': 7.294613480763004, 'lambda_l2': 0.9872187052457418, 'num_leaves': 148, 'feature_fraction': 0.6104290537906291, 'bagging_fraction': 0.4845688172850146, 'bagging_freq': 2, 'min_child_samples': 84}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:45,121][0m Trial 15 finished with value: 0.7624289718787327 and parameters: {'lambda_l1': 0.16632333066710217, 'lambda_l2': 8.051569319887601, 'num_leaves': 66, 'feature_fraction': 0.4255240132880088, 'bagging_fraction': 0.48312436886145493, 'bagging_freq': 4, 'min_child_samples': 86}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:45,426][0m Trial 16 finished with value: 0.7741978914227718 and parameters: {'lambda_l1': 9.725305987826047, 'lambda_l2': 0.357875903129097, 'num_leaves': 6, 'feature_fraction': 0.6197127475384556, 'bagging_fraction': 0.42320951646206934, 'bagging_freq': 2, 'min_child_samples': 46}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:45,803][0m Trial 17 finished with value: 0.7406006407202579 and parameters: {'lambda_l1': 0.1899464826683312, 'lambda_l2': 0.3978619417161249, 'num_leaves': 119, 'feature_fraction': 0.5444823627425038, 'bagging_fraction': 0.5129885893149955, 'bagging_freq': 4, 'min_child_samples': 61}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:46,105][0m Trial 18 finished with value: 0.7673617583904665 and parameters: {'lambda_l1': 0.7169357520394403, 'lambda_l2': 5.86973253400049, 'num_leaves': 176, 'feature_fraction': 0.6327456464242857, 'bagging_fraction': 0.4054762324455777, 'bagging_freq': 2, 'min_child_samples': 87}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:46,480][0m Trial 19 finished with value: 0.743137457969994 and parameters: {'lambda_l1': 0.026615818096711156, 'lambda_l2': 0.9829190602943103, 'num_leaves': 63, 'feature_fraction': 0.5192104978601391, 'bagging_fraction': 0.6047515682654796, 'bagging_freq': 3, 'min_child_samples': 72}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:46,820][0m Trial 20 finished with value: 0.7611046383175569 and parameters: {'lambda_l1': 0.7622341095225388, 'lambda_l2': 0.08511184590646993, 'num_leaves': 95, 'feature_fraction': 0.6469609476647075, 'bagging_fraction': 0.4693714010935146, 'bagging_freq': 4, 'min_child_samples': 90}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:47,091][0m Trial 21 finished with value: 0.7730364996513321 and parameters: {'lambda_l1': 1.6101730369910592, 'lambda_l2': 7.783854670674628, 'num_leaves': 74, 'feature_fraction': 0.572604099405069, 'bagging_fraction': 0.40251219405719496, 'bagging_freq': 3, 'min_child_samples': 95}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:47,375][0m Trial 22 finished with value: 0.7765151515151516 and parameters: {'lambda_l1': 9.28007557052996, 'lambda_l2': 6.7131521589442515, 'num_leaves': 49, 'feature_fraction': 0.5806011099565709, 'bagging_fraction': 0.45304591541189104, 'bagging_freq': 2, 'min_child_samples': 80}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:47,688][0m Trial 23 finished with value: 0.7700865455650145 and parameters: {'lambda_l1': 6.570775691971798, 'lambda_l2': 1.164521682632154, 'num_leaves': 52, 'feature_fraction': 0.662635765242933, 'bagging_fraction': 0.4689345892712544, 'bagging_freq': 2, 'min_child_samples': 79}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:48,060][0m Trial 24 finished with value: 0.7576556750001726 and parameters: {'lambda_l1': 0.5593065803902535, 'lambda_l2': 0.22364072252918965, 'num_leaves': 97, 'feature_fraction': 0.5721377844178842, 'bagging_fraction': 0.5144733007937641, 'bagging_freq': 1, 'min_child_samples': 65}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:48,335][0m Trial 25 finished with value: 0.7756801611466683 and parameters: {'lambda_l1': 9.408769068572065, 'lambda_l2': 1.6268131937912964, 'num_leaves': 135, 'feature_fraction': 0.5190934024749468, 'bagging_fraction': 0.44990479071522127, 'bagging_freq': 2, 'min_child_samples': 91}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:48,688][0m Trial 26 finished with value: 0.7586214157984493 and parameters: {'lambda_l1': 0.032970628824164745, 'lambda_l2': 3.0573948543555045, 'num_leaves': 22, 'feature_fraction': 0.6649630383708219, 'bagging_fraction': 0.5204778972262477, 'bagging_freq': 3, 'min_child_samples': 81}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:49,135][0m Trial 27 finished with value: 0.739031549258822 and parameters: {'lambda_l1': 1.0209847207584102, 'lambda_l2': 0.37090527816058744, 'num_leaves': 42, 'feature_fraction': 0.5755251690247161, 'bagging_fraction': 0.584189012620728, 'bagging_freq': 5, 'min_child_samples': 56}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:49,552][0m Trial 28 finished with value: 0.7401408997700863 and parameters: {'lambda_l1': 0.004363987255051377, 'lambda_l2': 0.0395975605241203, 'num_leaves': 87, 'feature_fraction': 0.47905096157923865, 'bagging_fraction': 0.44288738712096315, 'bagging_freq': 4, 'min_child_samples': 38}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:49,950][0m Trial 29 finished with value: 0.7510554105649799 and parameters: {'lambda_l1': 0.2399863296103323, 'lambda_l2': 2.626419873471427, 'num_leaves': 8, 'feature_fraction': 0.5282571161080545, 'bagging_fraction': 0.6315931971609589, 'bagging_freq': 1, 'min_child_samples': 69}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:50,277][0m Trial 30 finished with value: 0.7624508930729026 and parameters: {'lambda_l1': 3.0860070820772583, 'lambda_l2': 0.4603633545118012, 'num_leaves': 110, 'feature_fraction': 0.6067975102852747, 'bagging_fraction': 0.4992266778889823, 'bagging_freq': 2, 'min_child_samples': 90}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:50,554][0m Trial 31 finished with value: 0.7732053964111381 and parameters: {'lambda_l1': 1.7382361911952764, 'lambda_l2': 9.622733990292994, 'num_leaves': 78, 'feature_fraction': 0.572212642567684, 'bagging_fraction': 0.4033347074723424, 'bagging_freq': 3, 'min_child_samples': 95}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:50,866][0m Trial 32 finished with value: 0.7755951517913241 and parameters: {'lambda_l1': 1.9535511707719646, 'lambda_l2': 9.69949688209165, 'num_leaves': 56, 'feature_fraction': 0.5902958969687155, 'bagging_fraction': 0.45408708172617435, 'bagging_freq': 3, 'min_child_samples': 96}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:51,172][0m Trial 33 finished with value: 0.7582060350601021 and parameters: {'lambda_l1': 0.3407922052936041, 'lambda_l2': 2.6228423727421797, 'num_leaves': 34, 'feature_fraction': 0.6403841700800476, 'bagging_fraction': 0.40118178296479895, 'bagging_freq': 2, 'min_child_samples': 80}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:51,497][0m Trial 34 finished with value: 0.7685006248403379 and parameters: {'lambda_l1': 3.2033340599373967, 'lambda_l2': 2.5585530210722753, 'num_leaves': 78, 'feature_fraction': 0.548019679762633, 'bagging_fraction': 0.4375582420037643, 'bagging_freq': 1, 'min_child_samples': 93}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:52,102][0m Trial 35 finished with value: 0.7498094409577662 and parameters: {'lambda_l1': 0.08020682525895416, 'lambda_l2': 0.8272117694308864, 'num_leaves': 126, 'feature_fraction': 0.68379921254942, 'bagging_fraction': 0.542246658051968, 'bagging_freq': 4, 'min_child_samples': 75}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:52,816][0m Trial 36 finished with value: 0.7349343572429696 and parameters: {'lambda_l1': 0.5154028725435159, 'lambda_l2': 0.2014643915563808, 'num_leaves': 45, 'feature_fraction': 0.6094569215049166, 'bagging_fraction': 0.4282354454015154, 'bagging_freq': 3, 'min_child_samples': 30}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:53,127][0m Trial 37 finished with value: 0.7680111090398173 and parameters: {'lambda_l1': 2.79648825494061, 'lambda_l2': 3.8001352934940678, 'num_leaves': 102, 'feature_fraction': 0.6740643402906261, 'bagging_fraction': 0.4923687260267892, 'bagging_freq': 4, 'min_child_samples': 100}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:53,442][0m Trial 38 finished with value: 0.7556969213667778 and parameters: {'lambda_l1': 0.007476261132794276, 'lambda_l2': 0.023374442711632012, 'num_leaves': 24, 'feature_fraction': 0.5007108621906994, 'bagging_fraction': 0.43838895467182926, 'bagging_freq': 1, 'min_child_samples': 86}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:53,819][0m Trial 39 finished with value: 0.7453815323432548 and parameters: {'lambda_l1': 0.05377945822267353, 'lambda_l2': 0.004140624859260781, 'num_leaves': 187, 'feature_fraction': 0.5489166680831485, 'bagging_fraction': 0.5482670093935296, 'bagging_freq': 5, 'min_child_samples': 76}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:54,127][0m Trial 40 finished with value: 0.7694091461435958 and parameters: {'lambda_l1': 4.049588091790745, 'lambda_l2': 0.1530971214020632, 'num_leaves': 69, 'feature_fraction': 0.49295370873276645, 'bagging_fraction': 0.4707302521876172, 'bagging_freq': 3, 'min_child_samples': 83}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:54,411][0m Trial 41 finished with value: 0.7725149650986971 and parameters: {'lambda_l1': 7.0103408319694465, 'lambda_l2': 1.5445768802690705, 'num_leaves': 141, 'feature_fraction': 0.5164266187875974, 'bagging_fraction': 0.4430611430469214, 'bagging_freq': 2, 'min_child_samples': 91}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:54,700][0m Trial 42 finished with value: 0.770868890545924 and parameters: {'lambda_l1': 1.4429316951004294, 'lambda_l2': 9.252915610545525, 'num_leaves': 138, 'feature_fraction': 0.44424918869139185, 'bagging_fraction': 0.44792849613178776, 'bagging_freq': 2, 'min_child_samples': 95}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:55,000][0m Trial 43 finished with value: 0.7790785503704164 and parameters: {'lambda_l1': 9.517923564309175, 'lambda_l2': 2.9191277688947808, 'num_leaves': 166, 'feature_fraction': 0.4514878070984584, 'bagging_fraction': 0.42499367939002447, 'bagging_freq': 2, 'min_child_samples': 89}. Best is trial 12 with value: 0.7794427528877289.[0m




[32m[I 2023-03-15 19:55:55,276][0m Trial 44 finished with value: 0.7794709742676249 and parameters: {'lambda_l1': 9.62773295936816, 'lambda_l2': 3.7915251250888033, 'num_leaves': 191, 'feature_fraction': 0.40626437446157904, 'bagging_fraction': 0.40185061254874155, 'bagging_freq': 3, 'min_child_samples': 100}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:55,581][0m Trial 45 finished with value: 0.7603189274839993 and parameters: {'lambda_l1': 0.4112432735029969, 'lambda_l2': 0.6184383522409873, 'num_leaves': 199, 'feature_fraction': 0.4003496380530198, 'bagging_fraction': 0.42690068944684617, 'bagging_freq': 3, 'min_child_samples': 87}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:55,894][0m Trial 46 finished with value: 0.7747783715487064 and parameters: {'lambda_l1': 9.629151956234388, 'lambda_l2': 0.07598752824780904, 'num_leaves': 161, 'feature_fraction': 0.45611314575935985, 'bagging_fraction': 0.4943857954494737, 'bagging_freq': 2, 'min_child_samples': 70}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:56,199][0m Trial 47 finished with value: 0.770898233876703 and parameters: {'lambda_l1': 3.862826028451494, 'lambda_l2': 3.929295494127823, 'num_leaves': 227, 'feature_fraction': 0.44801926291797545, 'bagging_fraction': 0.520165553803302, 'bagging_freq': 3, 'min_child_samples': 100}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:56,529][0m Trial 48 finished with value: 0.7573860615726644 and parameters: {'lambda_l1': 0.13380817684077434, 'lambda_l2': 0.8456694062398918, 'num_leaves': 170, 'feature_fraction': 0.48036149274790396, 'bagging_fraction': 0.467981682487095, 'bagging_freq': 2, 'min_child_samples': 77}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:56,847][0m Trial 49 finished with value: 0.7649792525390613 and parameters: {'lambda_l1': 1.0326971078091314, 'lambda_l2': 3.3604105141051033, 'num_leaves': 215, 'feature_fraction': 0.42881844837049027, 'bagging_fraction': 0.42277419620083323, 'bagging_freq': 1, 'min_child_samples': 89}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:57,148][0m Trial 50 finished with value: 0.774246739438127 and parameters: {'lambda_l1': 9.771305172603446, 'lambda_l2': 0.1852793138435572, 'num_leaves': 183, 'feature_fraction': 0.46901826013333725, 'bagging_fraction': 0.5704467394605106, 'bagging_freq': 4, 'min_child_samples': 83}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:57,433][0m Trial 51 finished with value: 0.7735705482715052 and parameters: {'lambda_l1': 3.428534342074072, 'lambda_l2': 4.840722519248651, 'num_leaves': 87, 'feature_fraction': 0.4173079930834841, 'bagging_fraction': 0.4055956979562526, 'bagging_freq': 3, 'min_child_samples': 96}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:57,722][0m Trial 52 finished with value: 0.7731818354426011 and parameters: {'lambda_l1': 1.3027527002395436, 'lambda_l2': 1.5770890668264124, 'num_leaves': 200, 'feature_fraction': 0.507409449701685, 'bagging_fraction': 0.4245116766587865, 'bagging_freq': 3, 'min_child_samples': 99}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:58,037][0m Trial 53 finished with value: 0.762044142725961 and parameters: {'lambda_l1': 0.2794132404525471, 'lambda_l2': 4.945252510107113, 'num_leaves': 59, 'feature_fraction': 0.5468592587556474, 'bagging_fraction': 0.4791782641395437, 'bagging_freq': 2, 'min_child_samples': 93}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:58,320][0m Trial 54 finished with value: 0.776229399255715 and parameters: {'lambda_l1': 4.604166502793297, 'lambda_l2': 0.6709476254406367, 'num_leaves': 157, 'feature_fraction': 0.6290685080533548, 'bagging_fraction': 0.4001706198564959, 'bagging_freq': 3, 'min_child_samples': 98}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:58,863][0m Trial 55 finished with value: 0.7519733389948702 and parameters: {'lambda_l1': 4.79550483553394, 'lambda_l2': 0.5928888231241758, 'num_leaves': 151, 'feature_fraction': 0.6299809336894411, 'bagging_fraction': 0.4575141044820871, 'bagging_freq': 4, 'min_child_samples': 15}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:59,183][0m Trial 56 finished with value: 0.7618206155885581 and parameters: {'lambda_l1': 0.638672758554968, 'lambda_l2': 1.2951935428884085, 'num_leaves': 158, 'feature_fraction': 0.7070682073946101, 'bagging_fraction': 0.4176847158990964, 'bagging_freq': 3, 'min_child_samples': 86}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:59,497][0m Trial 57 finished with value: 0.7707665341038548 and parameters: {'lambda_l1': 4.620493757890624, 'lambda_l2': 0.33457712944595386, 'num_leaves': 188, 'feature_fraction': 0.6518128653224756, 'bagging_fraction': 0.4615158918305049, 'bagging_freq': 5, 'min_child_samples': 93}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:55:59,775][0m Trial 58 finished with value: 0.7697520315941369 and parameters: {'lambda_l1': 2.0456484430536728, 'lambda_l2': 1.6799687619206554, 'num_leaves': 174, 'feature_fraction': 0.40094558247153933, 'bagging_fraction': 0.49052831106050143, 'bagging_freq': 7, 'min_child_samples': 98}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:56:00,084][0m Trial 59 finished with value: 0.7614303492892011 and parameters: {'lambda_l1': 0.9371359897650698, 'lambda_l2': 5.455413050601203, 'num_leaves': 118, 'feature_fraction': 0.5945556860742928, 'bagging_fraction': 0.4215814973523705, 'bagging_freq': 2, 'min_child_samples': 84}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:56:00,431][0m Trial 60 finished with value: 0.7595240684355516 and parameters: {'lambda_l1': 5.238793511709803, 'lambda_l2': 0.08253584370724337, 'num_leaves': 163, 'feature_fraction': 0.6286278731921081, 'bagging_fraction': 0.40095103635577783, 'bagging_freq': 4, 'min_child_samples': 49}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:56:00,731][0m Trial 61 finished with value: 0.7738753736959478 and parameters: {'lambda_l1': 1.9994810804637138, 'lambda_l2': 5.651409412073777, 'num_leaves': 50, 'feature_fraction': 0.5879278410699544, 'bagging_fraction': 0.4417869278315539, 'bagging_freq': 3, 'min_child_samples': 97}. Best is trial 44 with value: 0.7794709742676249.[0m




[32m[I 2023-03-15 19:56:00,991][0m Trial 62 finished with value: 0.7824044443063581 and parameters: {'lambda_l1': 9.25460943999764, 'lambda_l2': 2.178249332211151, 'num_leaves': 38, 'feature_fraction': 0.56329562301042, 'bagging_fraction': 0.4013740644347293, 'bagging_freq': 3, 'min_child_samples': 90}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:01,262][0m Trial 63 finished with value: 0.777969113555238 and parameters: {'lambda_l1': 7.799910714539653, 'lambda_l2': 0.8881992812813099, 'num_leaves': 36, 'feature_fraction': 0.5602319640444355, 'bagging_fraction': 0.43085972093167446, 'bagging_freq': 3, 'min_child_samples': 89}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:01,540][0m Trial 64 finished with value: 0.7769609112312461 and parameters: {'lambda_l1': 9.159703912259477, 'lambda_l2': 2.2365273242116728, 'num_leaves': 35, 'feature_fraction': 0.5384563686991543, 'bagging_fraction': 0.4592726997674984, 'bagging_freq': 2, 'min_child_samples': 89}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:01,818][0m Trial 65 finished with value: 0.7787570682905611 and parameters: {'lambda_l1': 9.89480101877158, 'lambda_l2': 2.1702919015847817, 'num_leaves': 35, 'feature_fraction': 0.49017163982726797, 'bagging_fraction': 0.47360088086185803, 'bagging_freq': 3, 'min_child_samples': 88}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:02,098][0m Trial 66 finished with value: 0.7639193541705505 and parameters: {'lambda_l1': 0.9646124761299821, 'lambda_l2': 0.35026126663236873, 'num_leaves': 17, 'feature_fraction': 0.48882891802521433, 'bagging_fraction': 0.4289721320764082, 'bagging_freq': 4, 'min_child_samples': 92}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:02,378][0m Trial 67 finished with value: 0.7776542768767649 and parameters: {'lambda_l1': 2.442633905826746, 'lambda_l2': 1.0861008787826327, 'num_leaves': 3, 'feature_fraction': 0.5612852322551691, 'bagging_fraction': 0.4794293416914662, 'bagging_freq': 3, 'min_child_samples': 88}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:02,726][0m Trial 68 finished with value: 0.7556167450306206 and parameters: {'lambda_l1': 0.3794304129462838, 'lambda_l2': 1.5249570707811133, 'num_leaves': 31, 'feature_fraction': 0.5260983597944512, 'bagging_fraction': 0.5027625137987765, 'bagging_freq': 3, 'min_child_samples': 73}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:03,388][0m Trial 69 finished with value: 0.770348736855914 and parameters: {'lambda_l1': 5.441871317649665, 'lambda_l2': 2.8945543953768524, 'num_leaves': 12, 'feature_fraction': 0.46590341139751257, 'bagging_fraction': 0.4391018234455605, 'bagging_freq': 4, 'min_child_samples': 83}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:03,702][0m Trial 70 finished with value: 0.752532156838377 and parameters: {'lambda_l1': 0.19009234054509788, 'lambda_l2': 0.25911976820145166, 'num_leaves': 41, 'feature_fraction': 0.5048085950350633, 'bagging_fraction': 0.4159853769077955, 'bagging_freq': 3, 'min_child_samples': 79}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:04,032][0m Trial 71 finished with value: 0.7678939946284444 and parameters: {'lambda_l1': 2.314431843292163, 'lambda_l2': 0.7279516238129788, 'num_leaves': 18, 'feature_fraction': 0.5614219544715695, 'bagging_fraction': 0.46876757135374275, 'bagging_freq': 3, 'min_child_samples': 88}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:04,329][0m Trial 72 finished with value: 0.7710805077431873 and parameters: {'lambda_l1': 2.396796832145401, 'lambda_l2': 9.521828445102278, 'num_leaves': 5, 'feature_fraction': 0.5619485231807668, 'bagging_fraction': 0.48232417777642, 'bagging_freq': 3, 'min_child_samples': 94}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:04,618][0m Trial 73 finished with value: 0.7777254776058603 and parameters: {'lambda_l1': 9.974363331868929, 'lambda_l2': 0.9672672545133528, 'num_leaves': 27, 'feature_fraction': 0.5294365438262438, 'bagging_fraction': 0.5272498993305845, 'bagging_freq': 3, 'min_child_samples': 91}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:04,894][0m Trial 74 finished with value: 0.7774768360294676 and parameters: {'lambda_l1': 5.160532978562448, 'lambda_l2': 3.9853812073604113, 'num_leaves': 26, 'feature_fraction': 0.5282173727238787, 'bagging_fraction': 0.41613729903499513, 'bagging_freq': 3, 'min_child_samples': 91}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:05,202][0m Trial 75 finished with value: 0.7604261169452557 and parameters: {'lambda_l1': 1.1273991662088183, 'lambda_l2': 2.4711058801653283, 'num_leaves': 39, 'feature_fraction': 0.4863691407149025, 'bagging_fraction': 0.4462325661668042, 'bagging_freq': 4, 'min_child_samples': 86}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:05,497][0m Trial 76 finished with value: 0.772361171523851 and parameters: {'lambda_l1': 7.0481290587334735, 'lambda_l2': 0.48947201649641425, 'num_leaves': 64, 'feature_fraction': 0.5107243550751667, 'bagging_fraction': 0.5301983467889266, 'bagging_freq': 3, 'min_child_samples': 96}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:05,849][0m Trial 77 finished with value: 0.7779458114984431 and parameters: {'lambda_l1': 9.405487799330126, 'lambda_l2': 6.2678403959710645, 'num_leaves': 29, 'feature_fraction': 0.6032181509532079, 'bagging_fraction': 0.43449430768046104, 'bagging_freq': 2, 'min_child_samples': 29}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:06,247][0m Trial 78 finished with value: 0.75789568618516 and parameters: {'lambda_l1': 1.4446655069378005, 'lambda_l2': 5.82876399954654, 'num_leaves': 54, 'feature_fraction': 0.6148874842523384, 'bagging_fraction': 0.43175564187762655, 'bagging_freq': 2, 'min_child_samples': 42}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:06,861][0m Trial 79 finished with value: 0.7439153151473725 and parameters: {'lambda_l1': 0.6886759894121708, 'lambda_l2': 2.1562902334930176, 'num_leaves': 71, 'feature_fraction': 0.6046218974226701, 'bagging_fraction': 0.5057373425982254, 'bagging_freq': 2, 'min_child_samples': 23}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:07,279][0m Trial 80 finished with value: 0.7681741371334672 and parameters: {'lambda_l1': 3.9127549281075185, 'lambda_l2': 9.552607213308246, 'num_leaves': 47, 'feature_fraction': 0.5804518177591231, 'bagging_fraction': 0.4591721202032756, 'bagging_freq': 6, 'min_child_samples': 35}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:07,568][0m Trial 81 finished with value: 0.7784803779421005 and parameters: {'lambda_l1': 8.866465171209942, 'lambda_l2': 1.0604149191611592, 'num_leaves': 27, 'feature_fraction': 0.5317818529017175, 'bagging_fraction': 0.41485504501466675, 'bagging_freq': 3, 'min_child_samples': 66}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:07,873][0m Trial 82 finished with value: 0.7723153441454877 and parameters: {'lambda_l1': 6.174959495155805, 'lambda_l2': 4.6296518151288, 'num_leaves': 18, 'feature_fraction': 0.46077549851298505, 'bagging_fraction': 0.41220950531861933, 'bagging_freq': 3, 'min_child_samples': 55}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:08,192][0m Trial 83 finished with value: 0.7611904244081277 and parameters: {'lambda_l1': 2.99646602724385, 'lambda_l2': 2.005833176707583, 'num_leaves': 34, 'feature_fraction': 0.5958735832313478, 'bagging_fraction': 0.43249368704212987, 'bagging_freq': 2, 'min_child_samples': 82}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:08,496][0m Trial 84 finished with value: 0.7722681359044995 and parameters: {'lambda_l1': 6.768875399869932, 'lambda_l2': 3.1288501181763686, 'num_leaves': 80, 'feature_fraction': 0.5422329360455869, 'bagging_fraction': 0.4140947653159871, 'bagging_freq': 2, 'min_child_samples': 65}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:08,983][0m Trial 85 finished with value: 0.7557264373053847 and parameters: {'lambda_l1': 3.0273842195270864, 'lambda_l2': 1.194394659106173, 'num_leaves': 13, 'feature_fraction': 0.4793401761815154, 'bagging_fraction': 0.45400600029987415, 'bagging_freq': 1, 'min_child_samples': 23}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:09,320][0m Trial 86 finished with value: 0.7552107714189054 and parameters: {'lambda_l1': 1.63577609297589, 'lambda_l2': 7.01588167124862, 'num_leaves': 106, 'feature_fraction': 0.4371824272864986, 'bagging_fraction': 0.436632538831545, 'bagging_freq': 4, 'min_child_samples': 52}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:10,210][0m Trial 87 finished with value: 0.7355663608055952 and parameters: {'lambda_l1': 0.5613472447798623, 'lambda_l2': 0.14033221094796083, 'num_leaves': 40, 'feature_fraction': 0.49226097704764826, 'bagging_fraction': 0.41779402974326446, 'bagging_freq': 3, 'min_child_samples': 8}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:11,343][0m Trial 88 finished with value: 0.7728443008347314 and parameters: {'lambda_l1': 6.796558089392016, 'lambda_l2': 0.5644085985451285, 'num_leaves': 60, 'feature_fraction': 0.5643986533531316, 'bagging_fraction': 0.4758264728654672, 'bagging_freq': 3, 'min_child_samples': 64}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:11,664][0m Trial 89 finished with value: 0.773730037904679 and parameters: {'lambda_l1': 9.842902967724292, 'lambda_l2': 3.563139034900169, 'num_leaves': 30, 'feature_fraction': 0.5146365811946195, 'bagging_fraction': 0.4017750291964485, 'bagging_freq': 2, 'min_child_samples': 31}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:11,997][0m Trial 90 finished with value: 0.7657387269827461 and parameters: {'lambda_l1': 3.6529539265985953, 'lambda_l2': 0.9676341068774862, 'num_leaves': 246, 'feature_fraction': 0.45262304015776406, 'bagging_fraction': 0.4519396472118823, 'bagging_freq': 3, 'min_child_samples': 68}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:12,291][0m Trial 91 finished with value: 0.7767959844514868 and parameters: {'lambda_l1': 9.56497081222117, 'lambda_l2': 1.3239431023877786, 'num_leaves': 25, 'feature_fraction': 0.535899218221667, 'bagging_fraction': 0.4300220409634823, 'bagging_freq': 3, 'min_child_samples': 59}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:12,573][0m Trial 92 finished with value: 0.7794324827219563 and parameters: {'lambda_l1': 9.723816861028173, 'lambda_l2': 5.762098677344314, 'num_leaves': 45, 'feature_fraction': 0.5792303034607308, 'bagging_fraction': 0.4490554279842221, 'bagging_freq': 3, 'min_child_samples': 85}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:12,897][0m Trial 93 finished with value: 0.7742268895378943 and parameters: {'lambda_l1': 4.947330133515466, 'lambda_l2': 5.837877907528585, 'num_leaves': 54, 'feature_fraction': 0.5791172112629979, 'bagging_fraction': 0.44552936202735915, 'bagging_freq': 3, 'min_child_samples': 78}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:13,188][0m Trial 94 finished with value: 0.7640756505589043 and parameters: {'lambda_l1': 1.5570029441767677, 'lambda_l2': 2.181243495056649, 'num_leaves': 47, 'feature_fraction': 0.5553801185060969, 'bagging_fraction': 0.413130019945476, 'bagging_freq': 3, 'min_child_samples': 85}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:13,542][0m Trial 95 finished with value: 0.7689090149616465 and parameters: {'lambda_l1': 2.972722773165681, 'lambda_l2': 7.094886517979191, 'num_leaves': 209, 'feature_fraction': 0.5935262928271007, 'bagging_fraction': 0.4917513999408041, 'bagging_freq': 2, 'min_child_samples': 81}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:14,108][0m Trial 96 finished with value: 0.7774112450547858 and parameters: {'lambda_l1': 6.065335409411812, 'lambda_l2': 4.071723047609254, 'num_leaves': 93, 'feature_fraction': 0.6131986900367893, 'bagging_fraction': 0.4664347130377661, 'bagging_freq': 3, 'min_child_samples': 100}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:14,514][0m Trial 97 finished with value: 0.7640632227952803 and parameters: {'lambda_l1': 2.2721048296547055, 'lambda_l2': 0.2537792267716766, 'num_leaves': 38, 'feature_fraction': 0.5532211110962141, 'bagging_fraction': 0.42970060671910515, 'bagging_freq': 4, 'min_child_samples': 94}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:14,851][0m Trial 98 finished with value: 0.7593082223464999 and parameters: {'lambda_l1': 0.9240776592953627, 'lambda_l2': 1.8292057486251017, 'num_leaves': 195, 'feature_fraction': 0.5753992356565527, 'bagging_fraction': 0.40896443419915307, 'bagging_freq': 3, 'min_child_samples': 75}. Best is trial 62 with value: 0.7824044443063581.[0m




[32m[I 2023-03-15 19:56:15,156][0m Trial 99 finished with value: 0.767373409418864 and parameters: {'lambda_l1': 4.185169175636314, 'lambda_l2': 2.7525862759465975, 'num_leaves': 84, 'feature_fraction': 0.5429792429790566, 'bagging_fraction': 0.448485641853016, 'bagging_freq': 2, 'min_child_samples': 89}. Best is trial 62 with value: 0.7824044443063581.[0m




{'lambda_l1': 9.25460943999764,
 'lambda_l2': 2.178249332211151,
 'num_leaves': 38,
 'feature_fraction': 0.56329562301042,
 'bagging_fraction': 0.4013740644347293,
 'bagging_freq': 3,
 'min_child_samples': 90}

In [9]:
model = train_optimized_model(study, X_train, y_train)
preds = model.predict_proba(X_valid)[:,1]
print("Brier Score:", brier_score_loss(y_valid, preds))
print("ROC AUC:", roc_auc_score(y_valid, preds))
print("Parameters")
print(*(f"- {key}: {value}" for key, value in model.get_params(deep=True).items()), sep="\n")
print("Features")
print(*(f"- {name}: {imp}" for name, imp in sorted(zip(model.feature_name_, model.feature_importances_), key=lambda x: x[1], reverse=True)), sep="\n")


Brier Score: 0.15803938620525926
ROC AUC: 0.8614969135802469
Parameters
- boosting_type: gbdt
- class_weight: None
- colsample_bytree: 1.0
- importance_type: split
- learning_rate: 0.1
- max_depth: -1
- min_child_samples: 90
- min_child_weight: 0.001
- min_split_gain: 0.0
- n_estimators: 100
- n_jobs: -1
- num_leaves: 38
- objective: binary
- random_state: None
- reg_alpha: 0.0
- reg_lambda: 0.0
- silent: warn
- subsample: 1.0
- subsample_for_bin: 200000
- subsample_freq: 0
- metric: roc_auc
- verbosity: -1
- lambda_l1: 9.25460943999764
- lambda_l2: 2.178249332211151
- feature_fraction: 0.56329562301042
- bagging_fraction: 0.4013740644347293
- bagging_freq: 3
Features
- OrdinalRankDiff: 28
- SeedDiff: 26
- FGMRegDiff: 13
- TOTouDiff: 13
- ScoreOppTouDiff: 11
- OROppTouDiff: 11
- ScoreOppRegDiff: 10
- ScoreTouDiff: 10
- ScoreRegDiff: 9
- DROppRegDiff: 9
- FGARegDiff: 8
- FGPRegDiff: 8
- FGAOppTouDiff: 7
- FGA3RegDiff: 6
- BlkRegDiff: 6
- AstTouDiff: 6
- AstRegDiff: 5
- AstOppRegDiff: 5


# Submit to the competition

We"ll begin by using the trained model to generate predictions, which we"ll save to a CSV file.

In [10]:
# Use the model to generate predictions
model.fit(X, y)
predictions = model.predict_proba(X_submission)[:,1]

# Save the predictions to a CSV file
output = pd.DataFrame({"ID": X_submission.index,
                       "Pred": predictions})
output.to_csv("submission.csv", index=False)
print(output.shape)
output.describe()

(614319, 2)


Unnamed: 0,Pred
count,614319.0
mean,0.500449
std,0.02156
min,0.061142
25%,0.50043
50%,0.50043
75%,0.50043
max,0.948891
