# Imports and Utilities

In [1]:
from typing import List, Tuple
import numpy as np
import pandas as pd
from IPython.display import Markdown
import optuna
import lightgbm as lgb
from sklearn.metrics import roc_auc_score, brier_score_loss
from sklearn.model_selection import cross_val_score

COMPETITION_NAME = "march-machine-learning-mania-2023"
VERBOSE = False

pd.set_option("display.max_columns", None)

def show_df(df: pd.DataFrame, name: str = "DataFrame", verbose: bool = VERBOSE):
    if verbose:
        display(Markdown(f"## {name}"))
        display(Markdown("### Head"))
        display(df.head())
        display(Markdown("### Tail"))
        display(df.tail())
        display(Markdown("### Description"))
        display(df.describe())


def extract_data(filename: str, competition_name:str = COMPETITION_NAME) -> pd.DataFrame:
    mens_filepath = f"/kaggle/input/{competition_name}/M{filename}.csv"
    womens_filepath = f"/kaggle/input/{competition_name}/W{filename}.csv"
    try:
        df_mens = pd.read_csv(mens_filepath)
    except FileNotFoundError:
        df_mens = None
    try:
        df_womens = pd.read_csv(womens_filepath)
    except FileNotFoundError:
        df_womens = None
    df = pd.concat([df_mens, df_womens])
    show_df(df, filename)
    return df


def get_team_features(detailed_results: pd.DataFrame) -> pd.DataFrame:
    df = detailed_results.copy()
    df = clean_detailed_results(df)
    df = transform_game_to_team(df)
    df = transform_team_results(df)
    show_df(df)
    return df


def get_seed_features(df_in: pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["Season"] > 2002
    df = df[mask]
    df["Seed"] = df["Seed"].str.replace(r"\D+","", regex=True)
    df["Seed"] = df["Seed"].astype(int)
    show_df(df)
    return df


def get_ranking_features(df_in:pd.DataFrame) -> pd.DataFrame:
    df = df_in.copy()
    mask = df["RankingDayNum"] == df["RankingDayNum"].max()
    df = df[mask]
    df.drop(["SystemName", "RankingDayNum"], axis=1, inplace=True)
    df = df.groupby(["Season", "TeamID"]).agg("median")
    df = df.reset_index()
    show_df(df)
    return df


def get_game_outcomes(df):
    input_rows = df.to_records()
    output_rows = []
    for input_row in input_rows:
        output_rows.extend(parse_row(input_row))
    out_df = pd.DataFrame(output_rows)
    return out_df

def parse_row(row):
    season = row['Season']
    winning_team_id = row['WTeamID']
    losing_team_id = row['LTeamID']
    if winning_team_id < losing_team_id:
        small_id = winning_team_id
        big_id = losing_team_id
        outcome = True
    elif losing_team_id < winning_team_id:
        small_id = losing_team_id
        big_id = winning_team_id
        outcome = False
    records = [
        {
            "ID": f"{season}_{small_id}_{big_id}",
            'Season': season,
            'LowID': small_id,
            'HighID': big_id,
            'Win': outcome
        },
        {
            "ID": f"{season}_{big_id}_{small_id}",
            'Season': season,
            'LowID': big_id,
            'HighID': small_id,
            'Win': not outcome
        },
    ]
    return records


def clean_detailed_results(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(["WLoc", "DayNum"], axis=1)


def transform_game_to_team(game_results: pd.DataFrame) -> pd.DataFrame:
    winners = rename_columns(game_results, "W")
    loosers = rename_columns(game_results, "L")
    team_results = pd.concat((winners, loosers))
    team_results.drop(["TeamIDOpp"], axis=1, inplace=True)
    return team_results


def transform_team_results(df: pd.DataFrame) -> pd.DataFrame:
    df = df.groupby(["Season", "TeamID"]).median()
    df["FGP"] =  df["FGM"] / df["FGA"]
    df["FGP3"] =  df["FGM3"] / df["FGA3"]
    df["FTP"] =  df["FTM"] / df["FTA"]
    df["FGPOpp"] =  df["FGMOpp"] / df["FGAOpp"]
    df["FGP3Opp"] =  df["FGM3Opp"] / df["FGA3Opp"]
    df["FTPOpp"] =  df["FTMOpp"] / df["FTAOpp"]
    return df.reset_index()
    
    
def rename_columns(df: pd.DataFrame, team_prefix: str) -> pd.DataFrame:
    df = df.copy()
    df.columns =  (rename_column(column_name, team_prefix) for column_name in df.columns)
    return df


def rename_column(column_name: str, team_prefix: str) -> pd.DataFrame:
    if team_prefix == "W":
        opponent_prefix = "L"
    elif team_prefix == "L":
        opponent_prefix = "W"
    else:
        raise ValueError
    if column_name.startswith(team_prefix):
        column_name = column_name.lstrip(team_prefix)
    elif column_name.startswith(opponent_prefix):
        column_name = f"{column_name.lstrip(opponent_prefix)}Opp"
    return column_name


def split_winner_and_looser_columns(df: pd.DataFrame) -> Tuple[List[str], List[str]]:
    winner_columns = [name for name in df.columns if not name.startswith("L")]
    looser_columns = [name for name in df.columns if not name.startswith("W")]
    return winner_columns, looser_columns


def merge_features(
    season_features: pd.DataFrame, 
    tournament_features: pd.DataFrame, 
    seed_features: pd.DataFrame, 
    ranking_features: pd.DataFrame
) -> pd.DataFrame:
    features = pd.merge(
        season_features,
        tournament_features,
        how="inner",
        on=["Season", "TeamID"],
        suffixes=("Reg", "Tou")
    )
    features = features.merge(
        seed_features,
        how="inner",
        on=["Season", "TeamID"]
    )
    features = features.merge(
        ranking_features,
        how="left",
        on=["Season", "TeamID"],
    )
    show_df(features)
    return features


def merge_outcomes_with_features(outcomes: pd.DataFrame, features: pd.DataFrame, how: str = "inner") -> pd.DataFrame:
    feature_names = [name for name in features.columns if name not in ["Season", "TeamID", "Gender"]]
    data = pd.merge(
        outcomes, 
        features, 
        how=how, 
        left_on=["Season", "HighID"], 
        right_on=["Season", "TeamID"]
    )
    data = pd.merge(
        data, 
        features, 
        how=how, 
        left_on=["Season", "LowID"], 
        right_on=["Season", "TeamID"],
        suffixes=("High", "Low")
    )
    for name in feature_names:
        data[f"{name}Diff"] = data[f"{name}High"] - data[f"{name}Low"]
        data.drop([f"{name}High", f"{name}Low"], axis=1, inplace=True)
    data.drop(
        ["Season", "HighID", "LowID","TeamIDHigh","TeamIDLow"], 
        axis=1, 
        inplace=True
    )
    data.set_index("ID", inplace=True)
    show_df(data)
    return data


def get_submission_outcomes() -> pd.DataFrame:
    sample_submission = pd.read_csv(f"/kaggle/input/{COMPETITION_NAME}/SampleSubmission2023.csv")
    df = sample_submission.copy()
    df.drop("Pred", axis=1, inplace=True)
    df[["Season", "LowID", "HighID"]] = df["ID"].str.split("_", expand=True)
    df[["Season", "LowID", "HighID"]] = df[["Season", "LowID", "HighID"]].astype(int)
    show_df(df)
    return df


def objective(trial: optuna.Trial, X_train, y_train):
    params = {
        "objective": "binary",
        "metric": "roc_auc",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
        "lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
    }
    model = lgb.LGBMClassifier(**params)
    score = np.mean(cross_val_score(model, X_train, y_train, scoring="roc_auc", cv=5))
    return score


def run_study(X_train, y_train):
    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=100)
    return study


def train_optimized_model(study, X, y):
    params = study.best_params
    model = lgb.LGBMClassifier(
        objective="binary",
        metric="roc_auc",
        verbosity=-1,
        boosting_type="gbdt",
        **params
    )
    model = model.fit(X, y)
    return model

# Load Data

In [2]:
season_results = extract_data("RegularSeasonDetailedResults")
tournament_results = extract_data("NCAATourneyDetailedResults")
seeds = extract_data("NCAATourneySeeds")
rankings = extract_data("MasseyOrdinals_thru_Season2023_Day128")

# Transform Data

In [3]:
season_team_features = get_team_features(season_results)

tournament_results["Season"] += 1
tournament_team_features = get_team_features(tournament_results)

seed_features = get_seed_features(seeds)

ranking_features = get_ranking_features(rankings)

## Merge features

In [4]:
features = merge_features(season_team_features, tournament_team_features, seed_features, ranking_features)

## Build Datasets

In [5]:
from sklearn.model_selection import train_test_split

data = extract_data("NCAATourneyCompactResults")
data_train, data_valid = train_test_split(data, random_state=0, test_size=0.1)

outcomes = get_game_outcomes(data)
outcomes_train = get_game_outcomes(data_train)
outcomes_valid = get_game_outcomes(data_valid)
outcomes_submission = get_submission_outcomes()

In [6]:
features_train = merge_outcomes_with_features(outcomes_train, features)
features_valid = merge_outcomes_with_features(outcomes_valid, features)
features_full = merge_outcomes_with_features(outcomes, features)

In [7]:
y_train = features_train["Win"]
X_train = features_train.drop("Win", axis=1)
y_valid = features_valid["Win"]
X_valid = features_valid.drop("Win", axis=1)
y = features_full["Win"]
X = features_full.drop("Win", axis=1)
X_submission = merge_outcomes_with_features(outcomes_submission, features, how="left").fillna(0)

# Step 4: Train a model


In [8]:
study = run_study(X_train, y_train)
study.best_params

[32m[I 2023-03-15 19:53:12,641][0m A new study created in memory with name: no-name-f36b7f1a-7481-4c17-b702-a68c31a685d2[0m




[32m[I 2023-03-15 19:53:13,364][0m Trial 0 finished with value: 0.7459189468160761 and parameters: {'lambda_l1': 0.005336196346899993, 'lambda_l2': 5.8279407579683e-05, 'num_leaves': 85, 'feature_fraction': 0.6878851478876501, 'bagging_fraction': 0.9790623191528176, 'bagging_freq': 3, 'min_child_samples': 75}. Best is trial 0 with value: 0.7459189468160761.[0m




[32m[I 2023-03-15 19:53:14,239][0m Trial 1 finished with value: 0.7450804179871164 and parameters: {'lambda_l1': 0.34549202613780483, 'lambda_l2': 0.03235389294398412, 'num_leaves': 83, 'feature_fraction': 0.9455395056642674, 'bagging_fraction': 0.5416017760438439, 'bagging_freq': 1, 'min_child_samples': 29}. Best is trial 0 with value: 0.7459189468160761.[0m




[32m[I 2023-03-15 19:53:15,280][0m Trial 2 finished with value: 0.7682617356062332 and parameters: {'lambda_l1': 9.228562114324456, 'lambda_l2': 0.13944629542127346, 'num_leaves': 102, 'feature_fraction': 0.6424671812046476, 'bagging_fraction': 0.8144566614963602, 'bagging_freq': 2, 'min_child_samples': 20}. Best is trial 2 with value: 0.7682617356062332.[0m




[32m[I 2023-03-15 19:53:16,019][0m Trial 3 finished with value: 0.7417063664671321 and parameters: {'lambda_l1': 1.2973743707499544e-08, 'lambda_l2': 9.436165351428764e-06, 'num_leaves': 93, 'feature_fraction': 0.6997522252430173, 'bagging_fraction': 0.9931829028505946, 'bagging_freq': 2, 'min_child_samples': 69}. Best is trial 2 with value: 0.7682617356062332.[0m




[32m[I 2023-03-15 19:53:16,482][0m Trial 4 finished with value: 0.7535161940664332 and parameters: {'lambda_l1': 0.4919463646329871, 'lambda_l2': 0.016137882087548776, 'num_leaves': 82, 'feature_fraction': 0.9753837922509475, 'bagging_fraction': 0.5541056132174602, 'bagging_freq': 1, 'min_child_samples': 95}. Best is trial 2 with value: 0.7682617356062332.[0m




[32m[I 2023-03-15 19:53:17,035][0m Trial 5 finished with value: 0.7514059770638718 and parameters: {'lambda_l1': 4.565896688530319e-08, 'lambda_l2': 3.500489431994821e-08, 'num_leaves': 247, 'feature_fraction': 0.6323114298958775, 'bagging_fraction': 0.7456212045636127, 'bagging_freq': 1, 'min_child_samples': 74}. Best is trial 2 with value: 0.7682617356062332.[0m




[32m[I 2023-03-15 19:53:18,004][0m Trial 6 finished with value: 0.7411743028369822 and parameters: {'lambda_l1': 2.71848914854202, 'lambda_l2': 0.00015741768932184318, 'num_leaves': 223, 'feature_fraction': 0.513457565324115, 'bagging_fraction': 0.6954253894336063, 'bagging_freq': 5, 'min_child_samples': 9}. Best is trial 2 with value: 0.7682617356062332.[0m




[32m[I 2023-03-15 19:53:18,923][0m Trial 7 finished with value: 0.7350856480043084 and parameters: {'lambda_l1': 0.0004278745341905984, 'lambda_l2': 1.452895902650947e-06, 'num_leaves': 51, 'feature_fraction': 0.75517447734934, 'bagging_fraction': 0.5957686412704597, 'bagging_freq': 6, 'min_child_samples': 26}. Best is trial 2 with value: 0.7682617356062332.[0m




[32m[I 2023-03-15 19:53:20,467][0m Trial 8 finished with value: 0.7479131713581475 and parameters: {'lambda_l1': 2.41645177667437, 'lambda_l2': 0.03491186696189261, 'num_leaves': 213, 'feature_fraction': 0.9821049388632659, 'bagging_fraction': 0.7912092973657112, 'bagging_freq': 7, 'min_child_samples': 8}. Best is trial 2 with value: 0.7682617356062332.[0m




[32m[I 2023-03-15 19:53:20,827][0m Trial 9 finished with value: 0.7586971906349897 and parameters: {'lambda_l1': 0.00039256277394450565, 'lambda_l2': 8.264644241896802e-06, 'num_leaves': 231, 'feature_fraction': 0.49016643778108027, 'bagging_fraction': 0.4579443319807814, 'bagging_freq': 3, 'min_child_samples': 82}. Best is trial 2 with value: 0.7682617356062332.[0m




[32m[I 2023-03-15 19:53:21,193][0m Trial 10 finished with value: 0.7745865179477619 and parameters: {'lambda_l1': 0.024156813461944183, 'lambda_l2': 8.051393659073694, 'num_leaves': 3, 'feature_fraction': 0.4245661966071606, 'bagging_fraction': 0.857454300435189, 'bagging_freq': 4, 'min_child_samples': 48}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:21,758][0m Trial 11 finished with value: 0.7446123055572815 and parameters: {'lambda_l1': 0.023854273662583993, 'lambda_l2': 8.082774227000215, 'num_leaves': 11, 'feature_fraction': 0.4057363529816282, 'bagging_fraction': 0.8522739231453806, 'bagging_freq': 4, 'min_child_samples': 47}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:22,252][0m Trial 12 finished with value: 0.7606763982960155 and parameters: {'lambda_l1': 7.599657480789207, 'lambda_l2': 2.682361489876599, 'num_leaves': 157, 'feature_fraction': 0.40932084768866267, 'bagging_fraction': 0.8777969701221726, 'bagging_freq': 4, 'min_child_samples': 52}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:23,147][0m Trial 13 finished with value: 0.7455702790032934 and parameters: {'lambda_l1': 0.04304296513733013, 'lambda_l2': 0.6385701950511337, 'num_leaves': 147, 'feature_fraction': 0.567076299520156, 'bagging_fraction': 0.8418258680239835, 'bagging_freq': 3, 'min_child_samples': 34}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:23,651][0m Trial 14 finished with value: 0.7681711164964753 and parameters: {'lambda_l1': 9.097254433240508, 'lambda_l2': 0.4805995522743526, 'num_leaves': 19, 'feature_fraction': 0.5824966508731512, 'bagging_fraction': 0.9123919766670572, 'bagging_freq': 5, 'min_child_samples': 43}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:24,327][0m Trial 15 finished with value: 0.7408959727141545 and parameters: {'lambda_l1': 0.055791974758637314, 'lambda_l2': 0.002194291945414916, 'num_leaves': 182, 'feature_fraction': 0.7945147329151871, 'bagging_fraction': 0.7118066135520447, 'bagging_freq': 2, 'min_child_samples': 63}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:25,467][0m Trial 16 finished with value: 0.7495387918832895 and parameters: {'lambda_l1': 0.0035365942662842044, 'lambda_l2': 7.302570978314088, 'num_leaves': 115, 'feature_fraction': 0.503419124428247, 'bagging_fraction': 0.791384812629222, 'bagging_freq': 5, 'min_child_samples': 12}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:27,578][0m Trial 17 finished with value: 0.7497679287751058 and parameters: {'lambda_l1': 2.5090462785564806e-05, 'lambda_l2': 0.2886749537356372, 'num_leaves': 49, 'feature_fraction': 0.6309106324584643, 'bagging_fraction': 0.9450592466250605, 'bagging_freq': 2, 'min_child_samples': 19}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:28,367][0m Trial 18 finished with value: 0.7467389203035136 and parameters: {'lambda_l1': 0.40857367058020316, 'lambda_l2': 0.17462703285588696, 'num_leaves': 45, 'feature_fraction': 0.4576954978147938, 'bagging_fraction': 0.9273107632748526, 'bagging_freq': 7, 'min_child_samples': 36}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:28,998][0m Trial 19 finished with value: 0.7463552131016246 and parameters: {'lambda_l1': 0.2079892783180344, 'lambda_l2': 2.2340937207188776, 'num_leaves': 127, 'feature_fraction': 0.5541211240117581, 'bagging_fraction': 0.8092838991313358, 'bagging_freq': 3, 'min_child_samples': 60}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:29,928][0m Trial 20 finished with value: 0.7381900860967846 and parameters: {'lambda_l1': 1.196514491626357, 'lambda_l2': 0.001892112245283727, 'num_leaves': 171, 'feature_fraction': 0.4567610130084637, 'bagging_fraction': 0.8857274222611122, 'bagging_freq': 6, 'min_child_samples': 21}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:30,689][0m Trial 21 finished with value: 0.7470952691646472 and parameters: {'lambda_l1': 1.8302997388583893, 'lambda_l2': 0.4100260873232301, 'num_leaves': 15, 'feature_fraction': 0.6043092331442392, 'bagging_fraction': 0.9258897282169257, 'bagging_freq': 5, 'min_child_samples': 44}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:31,131][0m Trial 22 finished with value: 0.7741341991341992 and parameters: {'lambda_l1': 8.18653215645121, 'lambda_l2': 1.4822076821161398, 'num_leaves': 5, 'feature_fraction': 0.5596302470034582, 'bagging_fraction': 0.8866323940163737, 'bagging_freq': 6, 'min_child_samples': 41}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:31,629][0m Trial 23 finished with value: 0.7688801894543522 and parameters: {'lambda_l1': 9.03817685864414, 'lambda_l2': 1.731497770645872, 'num_leaves': 32, 'feature_fraction': 0.5547571575729559, 'bagging_fraction': 0.8525786228918251, 'bagging_freq': 6, 'min_child_samples': 38}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:32,243][0m Trial 24 finished with value: 0.7566626621650545 and parameters: {'lambda_l1': 0.3116085304348016, 'lambda_l2': 9.129051294011564, 'num_leaves': 31, 'feature_fraction': 0.5499162882882637, 'bagging_fraction': 0.8622751346384462, 'bagging_freq': 6, 'min_child_samples': 54}. Best is trial 10 with value: 0.7745865179477619.[0m




[32m[I 2023-03-15 19:53:32,543][0m Trial 25 finished with value: 0.7906928133004688 and parameters: {'lambda_l1': 0.09627901158633201, 'lambda_l2': 1.4296485728772927, 'num_leaves': 2, 'feature_fraction': 0.5301366572925041, 'bagging_fraction': 0.9609568602709685, 'bagging_freq': 6, 'min_child_samples': 38}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:32,883][0m Trial 26 finished with value: 0.777205065004108 and parameters: {'lambda_l1': 0.09993473494083976, 'lambda_l2': 1.3427880043064337, 'num_leaves': 3, 'feature_fraction': 0.4565042877411428, 'bagging_fraction': 0.9698503320969145, 'bagging_freq': 7, 'min_child_samples': 53}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:33,157][0m Trial 27 finished with value: 0.7854715128040487 and parameters: {'lambda_l1': 0.00757787092584181, 'lambda_l2': 0.0993432640811918, 'num_leaves': 2, 'feature_fraction': 0.40125951298834384, 'bagging_fraction': 0.9749686100263983, 'bagging_freq': 7, 'min_child_samples': 51}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:33,824][0m Trial 28 finished with value: 0.7403353424884525 and parameters: {'lambda_l1': 0.005636474816392404, 'lambda_l2': 0.07692809591046756, 'num_leaves': 54, 'feature_fraction': 0.45950304523818836, 'bagging_fraction': 0.9967084544743334, 'bagging_freq': 7, 'min_child_samples': 58}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:34,322][0m Trial 29 finished with value: 0.7423564075477952 and parameters: {'lambda_l1': 0.004962009582349095, 'lambda_l2': 0.008011282958498438, 'num_leaves': 68, 'feature_fraction': 0.4045115072391363, 'bagging_fraction': 0.9765087210609075, 'bagging_freq': 7, 'min_child_samples': 82}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:34,996][0m Trial 30 finished with value: 0.7394995753847429 and parameters: {'lambda_l1': 0.06903914080152311, 'lambda_l2': 0.08852656920118034, 'num_leaves': 30, 'feature_fraction': 0.5080004989934623, 'bagging_fraction': 0.9633939449968779, 'bagging_freq': 7, 'min_child_samples': 54}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:36,429][0m Trial 31 finished with value: 0.7586644814515628 and parameters: {'lambda_l1': 0.01052048123566727, 'lambda_l2': 0.9599018050104693, 'num_leaves': 6, 'feature_fraction': 0.46305576415575694, 'bagging_fraction': 0.9558207307680147, 'bagging_freq': 4, 'min_child_samples': 49}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:37,727][0m Trial 32 finished with value: 0.7506609153738341 and parameters: {'lambda_l1': 0.0017219344992449172, 'lambda_l2': 2.5465171269826223, 'num_leaves': 29, 'feature_fraction': 0.43779649566190204, 'bagging_fraction': 0.996386826567812, 'bagging_freq': 7, 'min_child_samples': 29}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:37,993][0m Trial 33 finished with value: 0.7830343765750464 and parameters: {'lambda_l1': 0.12552580053913562, 'lambda_l2': 0.2624121129442522, 'num_leaves': 2, 'feature_fraction': 0.42770576465174814, 'bagging_fraction': 0.9131615717887283, 'bagging_freq': 6, 'min_child_samples': 66}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:38,555][0m Trial 34 finished with value: 0.7445429172103812 and parameters: {'lambda_l1': 0.07625961102491582, 'lambda_l2': 0.07589639959948645, 'num_leaves': 71, 'feature_fraction': 0.4783005289660172, 'bagging_fraction': 0.9212155707079073, 'bagging_freq': 6, 'min_child_samples': 70}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:39,147][0m Trial 35 finished with value: 0.7453650482956704 and parameters: {'lambda_l1': 0.10824126694468386, 'lambda_l2': 0.20227187620510753, 'num_leaves': 23, 'feature_fraction': 0.4384529275528374, 'bagging_fraction': 0.9678162843439648, 'bagging_freq': 7, 'min_child_samples': 65}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:39,711][0m Trial 36 finished with value: 0.7519685922795971 and parameters: {'lambda_l1': 0.17693594437226684, 'lambda_l2': 0.013513425458662607, 'num_leaves': 67, 'feature_fraction': 0.5200008235322199, 'bagging_fraction': 0.9400176209723689, 'bagging_freq': 6, 'min_child_samples': 76}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:40,168][0m Trial 37 finished with value: 0.7463098172428315 and parameters: {'lambda_l1': 0.6107752712465531, 'lambda_l2': 0.6742560251140485, 'num_leaves': 41, 'feature_fraction': 0.40347961551177036, 'bagging_fraction': 0.9995288030545597, 'bagging_freq': 7, 'min_child_samples': 97}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:40,839][0m Trial 38 finished with value: 0.7485002105815501 and parameters: {'lambda_l1': 0.017645826288945544, 'lambda_l2': 0.03353382185872461, 'num_leaves': 99, 'feature_fraction': 0.47871454517239803, 'bagging_fraction': 0.9015702613166646, 'bagging_freq': 5, 'min_child_samples': 58}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:41,445][0m Trial 39 finished with value: 0.7525731511975531 and parameters: {'lambda_l1': 0.7633046343137426, 'lambda_l2': 0.17177075560691596, 'num_leaves': 18, 'feature_fraction': 0.5268366467206576, 'bagging_fraction': 0.9578476561072105, 'bagging_freq': 6, 'min_child_samples': 68}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:41,716][0m Trial 40 finished with value: 0.7858204395285735 and parameters: {'lambda_l1': 0.13891714820080564, 'lambda_l2': 0.04595130356030333, 'num_leaves': 2, 'feature_fraction': 0.43124166359241184, 'bagging_fraction': 0.9048931366164854, 'bagging_freq': 7, 'min_child_samples': 88}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:41,985][0m Trial 41 finished with value: 0.7859073475700271 and parameters: {'lambda_l1': 0.19691421059256184, 'lambda_l2': 0.055945033189447525, 'num_leaves': 2, 'feature_fraction': 0.4402839308185982, 'bagging_fraction': 0.9054680274531512, 'bagging_freq': 7, 'min_child_samples': 93}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:42,460][0m Trial 42 finished with value: 0.7498570807183247 and parameters: {'lambda_l1': 0.2140684217021822, 'lambda_l2': 0.04461270667159215, 'num_leaves': 38, 'feature_fraction': 0.42907804266990196, 'bagging_fraction': 0.9028295443276899, 'bagging_freq': 7, 'min_child_samples': 90}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:42,978][0m Trial 43 finished with value: 0.7557651877627954 and parameters: {'lambda_l1': 0.04219016393769883, 'lambda_l2': 0.01452426138046203, 'num_leaves': 15, 'feature_fraction': 0.485165171762459, 'bagging_fraction': 0.933057953798697, 'bagging_freq': 6, 'min_child_samples': 94}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:43,461][0m Trial 44 finished with value: 0.7522275903256765 and parameters: {'lambda_l1': 0.748031324958581, 'lambda_l2': 0.00615069509869276, 'num_leaves': 54, 'feature_fraction': 0.4321500750414692, 'bagging_fraction': 0.8988147219393238, 'bagging_freq': 7, 'min_child_samples': 86}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:43,728][0m Trial 45 finished with value: 0.7840255770279694 and parameters: {'lambda_l1': 0.022741397680585997, 'lambda_l2': 0.032669128587040235, 'num_leaves': 2, 'feature_fraction': 0.40287169685845853, 'bagging_fraction': 0.8170417469273994, 'bagging_freq': 6, 'min_child_samples': 99}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:44,143][0m Trial 46 finished with value: 0.7471151190648799 and parameters: {'lambda_l1': 0.010879272817705968, 'lambda_l2': 0.028674661763788, 'num_leaves': 86, 'feature_fraction': 0.4031288459847192, 'bagging_fraction': 0.8377262433102248, 'bagging_freq': 7, 'min_child_samples': 99}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:44,632][0m Trial 47 finished with value: 0.7496747205479263 and parameters: {'lambda_l1': 0.028048397018412153, 'lambda_l2': 0.0033739432268597457, 'num_leaves': 21, 'feature_fraction': 0.4999129673386224, 'bagging_fraction': 0.8757619827316581, 'bagging_freq': 5, 'min_child_samples': 91}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:45,129][0m Trial 48 finished with value: 0.748332090556971 and parameters: {'lambda_l1': 0.0010006023716065363, 'lambda_l2': 0.0009844181013368796, 'num_leaves': 18, 'feature_fraction': 0.47917307158080114, 'bagging_fraction': 0.8234075887526582, 'bagging_freq': 6, 'min_child_samples': 80}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:45,652][0m Trial 49 finished with value: 0.7453158550646589 and parameters: {'lambda_l1': 0.021101672607572973, 'lambda_l2': 0.0005667685002274954, 'num_leaves': 60, 'feature_fraction': 0.5286288770333174, 'bagging_fraction': 0.9407564517683281, 'bagging_freq': 7, 'min_child_samples': 100}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:46,112][0m Trial 50 finished with value: 0.7533334023764646 and parameters: {'lambda_l1': 3.0706862191203417, 'lambda_l2': 0.02038796808829609, 'num_leaves': 37, 'feature_fraction': 0.4448145093910847, 'bagging_fraction': 0.872080504032676, 'bagging_freq': 6, 'min_child_samples': 87}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:46,396][0m Trial 51 finished with value: 0.7847149726934416 and parameters: {'lambda_l1': 0.10956079405451159, 'lambda_l2': 0.2748215717630676, 'num_leaves': 2, 'feature_fraction': 0.4224209431532046, 'bagging_fraction': 0.9133442372505717, 'bagging_freq': 6, 'min_child_samples': 91}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:46,690][0m Trial 52 finished with value: 0.7861137865324468 and parameters: {'lambda_l1': 0.26937110031326217, 'lambda_l2': 0.085432503570527, 'num_leaves': 2, 'feature_fraction': 0.40149064564997955, 'bagging_fraction': 0.9744037422032691, 'bagging_freq': 5, 'min_child_samples': 94}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:47,197][0m Trial 53 finished with value: 0.7423565801556233 and parameters: {'lambda_l1': 0.28816376585566583, 'lambda_l2': 0.09027776612662033, 'num_leaves': 13, 'feature_fraction': 0.43099512673606366, 'bagging_fraction': 0.9787673273276398, 'bagging_freq': 5, 'min_child_samples': 93}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:48,175][0m Trial 54 finished with value: 0.7440805871427882 and parameters: {'lambda_l1': 0.35753746620882, 'lambda_l2': 0.4145682916996255, 'num_leaves': 25, 'feature_fraction': 0.42606908404123545, 'bagging_fraction': 0.9446017775077289, 'bagging_freq': 5, 'min_child_samples': 87}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:48,733][0m Trial 55 finished with value: 0.7424897470950104 and parameters: {'lambda_l1': 0.0647353007979224, 'lambda_l2': 0.134319146988983, 'num_leaves': 11, 'feature_fraction': 0.457052806876763, 'bagging_fraction': 0.9784314464241451, 'bagging_freq': 4, 'min_child_samples': 77}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:49,223][0m Trial 56 finished with value: 0.7493522891250165 and parameters: {'lambda_l1': 1.2545442614608644, 'lambda_l2': 0.4878143860586446, 'num_leaves': 42, 'feature_fraction': 0.49306006418702103, 'bagging_fraction': 0.9181235802886141, 'bagging_freq': 7, 'min_child_samples': 90}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:49,729][0m Trial 57 finished with value: 0.74623507805326 and parameters: {'lambda_l1': 0.19487381387554437, 'lambda_l2': 4.236611731117561, 'num_leaves': 202, 'feature_fraction': 0.4182801204804511, 'bagging_fraction': 0.9514161604673746, 'bagging_freq': 5, 'min_child_samples': 83}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:50,351][0m Trial 58 finished with value: 0.7530011323073523 and parameters: {'lambda_l1': 0.048188350405041595, 'lambda_l2': 0.7863735089209836, 'num_leaves': 12, 'feature_fraction': 0.40025937436660075, 'bagging_fraction': 0.9059528056502169, 'bagging_freq': 6, 'min_child_samples': 30}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:50,924][0m Trial 59 finished with value: 0.7509532267307387 and parameters: {'lambda_l1': 0.5440268834275294, 'lambda_l2': 0.2176808319425092, 'num_leaves': 256, 'feature_fraction': 0.4720377405349707, 'bagging_fraction': 0.8747882938020682, 'bagging_freq': 7, 'min_child_samples': 71}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:51,398][0m Trial 60 finished with value: 0.7538519162921077 and parameters: {'lambda_l1': 3.231215151348018, 'lambda_l2': 0.04752323987176947, 'num_leaves': 25, 'feature_fraction': 0.45019012221074955, 'bagging_fraction': 0.981108542026993, 'bagging_freq': 6, 'min_child_samples': 95}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:51,749][0m Trial 61 finished with value: 0.7666996347618357 and parameters: {'lambda_l1': 0.034505988951037694, 'lambda_l2': 0.05118494431631793, 'num_leaves': 4, 'feature_fraction': 0.41611700430996523, 'bagging_fraction': 0.891087403899241, 'bagging_freq': 6, 'min_child_samples': 97}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:52,080][0m Trial 62 finished with value: 0.7763557481858918 and parameters: {'lambda_l1': 0.01197828670118359, 'lambda_l2': 0.12130899861770876, 'num_leaves': 3, 'feature_fraction': 0.4442747410488294, 'bagging_fraction': 0.934830825394083, 'bagging_freq': 6, 'min_child_samples': 85}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:52,513][0m Trial 63 finished with value: 0.747273573051085 and parameters: {'lambda_l1': 0.1261202551245165, 'lambda_l2': 0.009008465472442379, 'num_leaves': 11, 'feature_fraction': 0.4003471861669688, 'bagging_fraction': 0.8310065095892947, 'bagging_freq': 1, 'min_child_samples': 91}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:52,786][0m Trial 64 finished with value: 0.7821469997307318 and parameters: {'lambda_l1': 0.08603679898237114, 'lambda_l2': 0.02575211232393331, 'num_leaves': 2, 'feature_fraction': 0.4205230570594731, 'bagging_fraction': 0.8602533957408962, 'bagging_freq': 5, 'min_child_samples': 98}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:53,291][0m Trial 65 finished with value: 0.7465645000932082 and parameters: {'lambda_l1': 0.026482091643043177, 'lambda_l2': 0.3747031558966844, 'num_leaves': 32, 'feature_fraction': 0.4600984935913785, 'bagging_fraction': 0.9223057524892404, 'bagging_freq': 7, 'min_child_samples': 93}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:53,797][0m Trial 66 finished with value: 0.7531257551592481 and parameters: {'lambda_l1': 0.28520588566062494, 'lambda_l2': 1.0716195286660901, 'num_leaves': 21, 'feature_fraction': 0.49392560942171815, 'bagging_fraction': 0.7749776155764586, 'bagging_freq': 6, 'min_child_samples': 79}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:54,393][0m Trial 67 finished with value: 0.743558966286239 and parameters: {'lambda_l1': 0.007633333842077221, 'lambda_l2': 0.06846671055351944, 'num_leaves': 11, 'feature_fraction': 0.4670023465751432, 'bagging_fraction': 0.9579389957146095, 'bagging_freq': 7, 'min_child_samples': 73}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:55,060][0m Trial 68 finished with value: 0.7486132687089625 and parameters: {'lambda_l1': 0.0031249094956552937, 'lambda_l2': 3.228122941608874, 'num_leaves': 149, 'feature_fraction': 0.443250061263644, 'bagging_fraction': 0.8467183806949067, 'bagging_freq': 4, 'min_child_samples': 46}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:56,074][0m Trial 69 finished with value: 0.7434601483046459 and parameters: {'lambda_l1': 0.013608663772441715, 'lambda_l2': 0.2991881094017534, 'num_leaves': 119, 'feature_fraction': 0.41687401071071023, 'bagging_fraction': 0.8898927142712145, 'bagging_freq': 7, 'min_child_samples': 23}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:57,069][0m Trial 70 finished with value: 0.7398653313725084 and parameters: {'lambda_l1': 0.04074738818201876, 'lambda_l2': 0.13315123496181522, 'num_leaves': 30, 'feature_fraction': 0.5145818889226002, 'bagging_fraction': 0.9830106256416876, 'bagging_freq': 6, 'min_child_samples': 33}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:57,361][0m Trial 71 finished with value: 0.7855735240304618 and parameters: {'lambda_l1': 0.10320300589192742, 'lambda_l2': 0.20079454511195163, 'num_leaves': 2, 'feature_fraction': 0.42516521175502187, 'bagging_fraction': 0.9331398356241977, 'bagging_freq': 6, 'min_child_samples': 89}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:57,882][0m Trial 72 finished with value: 0.7510168327153973 and parameters: {'lambda_l1': 0.14627746837151215, 'lambda_l2': 1.4995393103977501, 'num_leaves': 9, 'feature_fraction': 0.4174136888426874, 'bagging_fraction': 0.9591408340851837, 'bagging_freq': 5, 'min_child_samples': 88}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:58,380][0m Trial 73 finished with value: 0.7469999033396162 and parameters: {'lambda_l1': 0.0677628855674045, 'lambda_l2': 0.6272237888505731, 'num_leaves': 20, 'feature_fraction': 0.4422796761704893, 'bagging_fraction': 0.9161774481437758, 'bagging_freq': 6, 'min_child_samples': 97}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:59,415][0m Trial 74 finished with value: 0.7573699227407362 and parameters: {'lambda_l1': 0.4078203086688506, 'lambda_l2': 0.02391350609825566, 'num_leaves': 9, 'feature_fraction': 0.4626632730562678, 'bagging_fraction': 0.9391123741230714, 'bagging_freq': 6, 'min_child_samples': 41}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:53:59,772][0m Trial 75 finished with value: 0.7756785213723013 and parameters: {'lambda_l1': 0.14915951828262075, 'lambda_l2': 0.052687284695185625, 'num_leaves': 3, 'feature_fraction': 0.4295811666933647, 'bagging_fraction': 0.9933623863393883, 'bagging_freq': 7, 'min_child_samples': 16}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:00,279][0m Trial 76 finished with value: 0.7464825113748559 and parameters: {'lambda_l1': 0.021214046621847574, 'lambda_l2': 0.18672646795609613, 'num_leaves': 49, 'feature_fraction': 0.40000300233349856, 'bagging_fraction': 0.9662770474267051, 'bagging_freq': 7, 'min_child_samples': 83}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:00,793][0m Trial 77 finished with value: 0.7507763037069257 and parameters: {'lambda_l1': 0.08426155731171901, 'lambda_l2': 0.08830364937945852, 'num_leaves': 35, 'feature_fraction': 0.48261650137635465, 'bagging_fraction': 0.9024326806576126, 'bagging_freq': 6, 'min_child_samples': 95}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:01,422][0m Trial 78 finished with value: 0.7479495916098786 and parameters: {'lambda_l1': 1.061356885315915, 'lambda_l2': 0.883148036190111, 'num_leaves': 26, 'feature_fraction': 0.44368415809502654, 'bagging_fraction': 0.9271159402179591, 'bagging_freq': 7, 'min_child_samples': 51}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:01,879][0m Trial 79 finished with value: 0.7467669690755814 and parameters: {'lambda_l1': 0.4731240686727858, 'lambda_l2': 0.014389894804855112, 'num_leaves': 20, 'feature_fraction': 0.41558093770028265, 'bagging_fraction': 0.8640242357417224, 'bagging_freq': 5, 'min_child_samples': 100}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:02,403][0m Trial 80 finished with value: 0.7522232751299737 and parameters: {'lambda_l1': 0.23536681608482637, 'lambda_l2': 0.039726809427994995, 'num_leaves': 15, 'feature_fraction': 0.4783945155390559, 'bagging_fraction': 0.9446824322629415, 'bagging_freq': 6, 'min_child_samples': 90}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:02,715][0m Trial 81 finished with value: 0.7832693821330186 and parameters: {'lambda_l1': 0.13217694773530378, 'lambda_l2': 0.281554214300364, 'num_leaves': 2, 'feature_fraction': 0.4286819925040938, 'bagging_fraction': 0.9111647988565881, 'bagging_freq': 6, 'min_child_samples': 93}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:02,997][0m Trial 82 finished with value: 0.7850982483757603 and parameters: {'lambda_l1': 0.05403094523673016, 'lambda_l2': 0.2895611813565662, 'num_leaves': 2, 'feature_fraction': 0.4326274397358509, 'bagging_fraction': 0.8879042565677975, 'bagging_freq': 6, 'min_child_samples': 94}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:03,512][0m Trial 83 finished with value: 0.7494052797282462 and parameters: {'lambda_l1': 0.048553441173599624, 'lambda_l2': 0.12318542605687241, 'num_leaves': 9, 'feature_fraction': 0.4539600853597048, 'bagging_fraction': 0.8896828587667386, 'bagging_freq': 6, 'min_child_samples': 85}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:03,998][0m Trial 84 finished with value: 0.7493044767566298 and parameters: {'lambda_l1': 0.032323492267637144, 'lambda_l2': 0.45291531058369794, 'num_leaves': 17, 'feature_fraction': 0.4148873088264771, 'bagging_fraction': 0.9312170111357425, 'bagging_freq': 5, 'min_child_samples': 89}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:04,515][0m Trial 85 finished with value: 0.747730811187749 and parameters: {'lambda_l1': 0.09511503500210061, 'lambda_l2': 1.821265244095636, 'num_leaves': 9, 'feature_fraction': 0.4688634155873549, 'bagging_fraction': 0.9687335604799263, 'bagging_freq': 7, 'min_child_samples': 96}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:05,004][0m Trial 86 finished with value: 0.7517283221828677 and parameters: {'lambda_l1': 0.2693834623444289, 'lambda_l2': 0.23629653310382392, 'num_leaves': 26, 'feature_fraction': 0.43637621649436364, 'bagging_fraction': 0.8820425964987089, 'bagging_freq': 6, 'min_child_samples': 92}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:05,668][0m Trial 87 finished with value: 0.7427045575370934 and parameters: {'lambda_l1': 0.05616091814125676, 'lambda_l2': 0.07106863938343458, 'num_leaves': 16, 'feature_fraction': 0.4973958589164525, 'bagging_fraction': 0.9522927232590053, 'bagging_freq': 7, 'min_child_samples': 58}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:06,135][0m Trial 88 finished with value: 0.7642936542458074 and parameters: {'lambda_l1': 0.01667808073209151, 'lambda_l2': 0.6908308802351795, 'num_leaves': 7, 'feature_fraction': 0.40915164204129634, 'bagging_fraction': 0.8488802863666525, 'bagging_freq': 6, 'min_child_samples': 37}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:06,682][0m Trial 89 finished with value: 0.7571684031014174 and parameters: {'lambda_l1': 0.00718504925428862, 'lambda_l2': 4.827844644930566, 'num_leaves': 44, 'feature_fraction': 0.44994525489157916, 'bagging_fraction': 0.9202458328890509, 'bagging_freq': 7, 'min_child_samples': 81}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:08,084][0m Trial 90 finished with value: 0.7448175362649048 and parameters: {'lambda_l1': 0.18914988192547053, 'lambda_l2': 0.03443129271477132, 'num_leaves': 35, 'feature_fraction': 0.42815267001380247, 'bagging_fraction': 0.8689662342397222, 'bagging_freq': 4, 'min_child_samples': 95}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:08,424][0m Trial 91 finished with value: 0.772758859959817 and parameters: {'lambda_l1': 0.12045374642464439, 'lambda_l2': 0.28949478220498637, 'num_leaves': 3, 'feature_fraction': 0.43316647699416777, 'bagging_fraction': 0.8989052222797795, 'bagging_freq': 6, 'min_child_samples': 93}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:08,719][0m Trial 92 finished with value: 0.7847027175376458 and parameters: {'lambda_l1': 0.7080741592783104, 'lambda_l2': 0.15883373620936223, 'num_leaves': 2, 'feature_fraction': 0.42360009739590543, 'bagging_fraction': 0.9076716720648489, 'bagging_freq': 6, 'min_child_samples': 85}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:09,233][0m Trial 93 finished with value: 0.7456790219350028 and parameters: {'lambda_l1': 0.7106323866592915, 'lambda_l2': 0.15664081019975473, 'num_leaves': 15, 'feature_fraction': 0.411328972630086, 'bagging_fraction': 0.9482057203445127, 'bagging_freq': 6, 'min_child_samples': 85}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:10,179][0m Trial 94 finished with value: 0.7436589062187148 and parameters: {'lambda_l1': 0.3871124192177969, 'lambda_l2': 0.10365545408568562, 'num_leaves': 24, 'feature_fraction': 0.46864306128967903, 'bagging_fraction': 0.9845048233253753, 'bagging_freq': 6, 'min_child_samples': 98}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:10,665][0m Trial 95 finished with value: 0.7554129814895366 and parameters: {'lambda_l1': 1.6446227515476315, 'lambda_l2': 0.5095301661028133, 'num_leaves': 7, 'feature_fraction': 0.4488750550752302, 'bagging_fraction': 0.9119150062251645, 'bagging_freq': 5, 'min_child_samples': 88}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:11,213][0m Trial 96 finished with value: 0.7426545875708555 and parameters: {'lambda_l1': 0.03254718965285599, 'lambda_l2': 1.2036323240915492, 'num_leaves': 14, 'feature_fraction': 0.422575629757421, 'bagging_fraction': 0.9350839387382577, 'bagging_freq': 6, 'min_child_samples': 79}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:11,673][0m Trial 97 finished with value: 0.7448485193700505 and parameters: {'lambda_l1': 0.22319356727871045, 'lambda_l2': 0.018872924336249847, 'num_leaves': 21, 'feature_fraction': 0.4014551497114172, 'bagging_fraction': 0.96812512847953, 'bagging_freq': 7, 'min_child_samples': 100}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:12,177][0m Trial 98 finished with value: 0.7549873305854168 and parameters: {'lambda_l1': 0.8507837592962434, 'lambda_l2': 0.06850490340488277, 'num_leaves': 188, 'feature_fraction': 0.48938406284554786, 'bagging_fraction': 0.8914468969486267, 'bagging_freq': 7, 'min_child_samples': 91}. Best is trial 25 with value: 0.7906928133004688.[0m




[32m[I 2023-03-15 19:54:12,685][0m Trial 99 finished with value: 0.7457908718076182 and parameters: {'lambda_l1': 0.06996858910921865, 'lambda_l2': 0.17232320521776442, 'num_leaves': 8, 'feature_fraction': 0.43736816576727583, 'bagging_fraction': 0.9986741552156729, 'bagging_freq': 5, 'min_child_samples': 84}. Best is trial 25 with value: 0.7906928133004688.[0m




{'lambda_l1': 0.09627901158633201,
 'lambda_l2': 1.4296485728772927,
 'num_leaves': 2,
 'feature_fraction': 0.5301366572925041,
 'bagging_fraction': 0.9609568602709685,
 'bagging_freq': 6,
 'min_child_samples': 38}

In [9]:
model = train_optimized_model(study, X_train, y_train)
preds = model.predict_proba(X_valid)[:,1]
print("Brier Score:", brier_score_loss(y_valid, preds))
print("ROC AUC:", roc_auc_score(y_valid, preds))
print("Parameters")
print(*(f"- {key}: {value}" for key, value in model.get_params(deep=True).items()), sep="\n")
print("Features")
print(*(f"- {name}: {imp}" for name, imp in sorted(zip(model.feature_name_, model.feature_importances_), key=lambda x: x[1], reverse=True)), sep="\n")


Brier Score: 0.15521970815645744
ROC AUC: 0.8686342592592592
Parameters
- boosting_type: gbdt
- class_weight: None
- colsample_bytree: 1.0
- importance_type: split
- learning_rate: 0.1
- max_depth: -1
- min_child_samples: 38
- min_child_weight: 0.001
- min_split_gain: 0.0
- n_estimators: 100
- n_jobs: -1
- num_leaves: 2
- objective: binary
- random_state: None
- reg_alpha: 0.0
- reg_lambda: 0.0
- silent: warn
- subsample: 1.0
- subsample_for_bin: 200000
- subsample_freq: 0
- metric: roc_auc
- verbosity: -1
- lambda_l1: 0.09627901158633201
- lambda_l2: 1.4296485728772927
- feature_fraction: 0.5301366572925041
- bagging_fraction: 0.9609568602709685
- bagging_freq: 6
Features
- SeedDiff: 35
- FGMRegDiff: 13
- FGMOppRegDiff: 11
- FGPRegDiff: 7
- TOTouDiff: 7
- OROppTouDiff: 7
- FTAOppRegDiff: 6
- AstTouDiff: 4
- DROppRegDiff: 2
- ScoreTouDiff: 2
- TOOppTouDiff: 2
- OrdinalRankDiff: 2
- ScoreRegDiff: 1
- OROppRegDiff: 1
- ScoreOppRegDiff: 0
- NumOTRegDiff: 0
- FGARegDiff: 0
- FGM3RegDiff: 0

# Submit to the competition

We"ll begin by using the trained model to generate predictions, which we"ll save to a CSV file.

In [10]:
# Use the model to generate predictions
model.fit(X, y)
predictions = model.predict_proba(X_submission)[:,1]

# Save the predictions to a CSV file
output = pd.DataFrame({"ID": X_submission.index,
                       "Pred": predictions})
output.to_csv("submission.csv", index=False)
print(output.shape)
output.describe()

(130683, 2)


Unnamed: 0,Pred
count,130683.0
mean,0.496223
std,0.022257
min,0.054388
25%,0.496158
50%,0.496158
75%,0.496158
max,0.948906
