In [None]:
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt
import lightgbm as lgbm
from tools import make_submission

In [None]:
DATA_PATH = "../data/data_remove_redundant.pkl"
SUB_FILE = "lgbm_remove_redundant.csv"

TEST_PATH = "../data/UnlabeledWiDS2021.csv"
SUBMISSION_PATH = "../submissions"

## Loading data

In [None]:
with open(DATA_PATH, "rb") as f:
    data = pickle.load(f)

x_train_val = data["x_train_val"]
y_train_val = data["y_train_val"]
x_test = data["x_test"]

x_train = data["x_train"]
y_train = data["y_train"]
x_val = data["x_val"]
y_val = data["y_val"]

In [None]:
lgbm_train_val = lgbm.Dataset(data=x_train_val, label=y_train_val)

lgbm_train = lgbm.Dataset(data=x_train, label=y_train)
lgbm_val = lgbm.Dataset(data=x_val, label=y_val)

## Training in train/validation data

In [None]:
EARLY_STOPPING = 100
N_TREES = 1000
VERBOSE = 200
SEED = 1337

final_params = {'learning_rate': 0.1,
                'objective': 'binary',
                'metric': 'auc',
                'boosting_type': 'gbdt',
                'max_depth': 15,
                'subsample': 0.2,
                'colsample_bytree': 0.3,
                'reg_alpha': 0.54,
                'reg_lambda': 0.4,
                'min_split_gain': 0.7,
                'min_child_weight': 26,
                'nthread':-1,
                'seed': SEED,
                'feature_fraction_seed': SEED,
                'bagging_seed': SEED,
                'drop_seed': SEED, 
                'data_random_seed': SEED,
                'verbose': -1, 
                'is_unbalance': True
}

model = lgbm.train(final_params, train_set=lgbm_train, num_boost_round=N_TREES, valid_sets=lgbm_val,
                  early_stopping_rounds=EARLY_STOPPING, verbose_eval=VERBOSE)

## Training with all data

In [None]:
model = lgbm.train(final_params, train_set=lgbm_train_val, num_boost_round=424, verbose_eval=VERBOSE)

test_pred = model.predict(x_test)

In [None]:
sub_name = os.path.join(SUBMISSION_PATH, SUB_FILE)
make_submission(test_pred, TEST_PATH, sub_name)