In [71]:
import time
import datetime

import numpy as np
import pandas as pd
import optgbm as opt
from sklearn.metrics import log_loss
from sklearn.preprocessing import StandardScaler

In [72]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

train_features = train_df.drop(['target', 'id'], 1)
test_features = test_df.drop('id', 1)

In [73]:
def do_transform(df, scaler):
    copy = df.copy()
    copy['sum_no_scale'] = copy.sum(axis=1)
    copy['mean_no_scale'] = copy.mean(axis=1)
    copy['median_no_scale'] = copy.median(axis=1)
    copy['gtr_1'] = (train_features.transpose() > 1).sum()
    copy['gtr_5'] = (train_features.transpose() > 5).sum()
    copy['gtr_10'] = (train_features.transpose() > 10).sum()
    copy['gtr_20'] = (train_features.transpose() > 20).sum()
    
    scaled_df = scaler.transform(df)
    copy['sum_scale'] = scaled_df.sum(axis=1)
    copy['mean_scale'] = scaled_df.mean(axis=1)
    copy['median_scale'] = np.median(scaled_df)
    return copy

In [74]:
scaler = StandardScaler().fit(train_features)

train_transformed = do_transform(train_features, scaler)
test_trainsformed = do_transform(test_features, scaler)

In [75]:
print(datetime.datetime.now())
start = time.time()

X_train = train_transformed
y_train = train_df['target']

opt_obgm = opt.OGBMClassifier()
opt_obgm.fit(X_train, y_train)

minutes = (time.time() - start) / 60
print(round(minutes, 2))
print(datetime.datetime.now())

[32m[I 2021-06-27 21:34:58,502][0m A new study created in memory with name: no-name-15061b80-69be-40f3-bccb-21f1604ccb87[0m
Searching the best hyperparameters...


2021-06-27 21:34:58.451324


[32m[I 2021-06-27 21:36:00,460][0m Trial 0 finished with value: 1.7626761927222432 and parameters: {'feature_fraction': 0.35, 'max_depth': 5, 'num_leaves': 2, 'min_data_in_leaf': 9400, 'lambda_l1': 2.5789016605368165e-06, 'lambda_l2': 4.363344108437796e-07, 'bagging_fraction': 0.7, 'bagging_freq': 5}. Best is trial 0 with value: 1.7626761927222432.[0m
[32m[I 2021-06-27 21:37:04,636][0m Trial 1 finished with value: 1.7498471771526116 and parameters: {'feature_fraction': 0.35, 'max_depth': 4, 'num_leaves': 7, 'min_data_in_leaf': 9014, 'lambda_l1': 0.0032065562837036814, 'lambda_l2': 2.975173865031074e-06, 'bagging_fraction': 0.5, 'bagging_freq': 2}. Best is trial 1 with value: 1.7498471771526116.[0m
[32m[I 2021-06-27 21:37:40,427][0m Trial 2 finished with value: 1.7646888311746376 and parameters: {'feature_fraction': 0.65, 'max_depth': 1, 'num_leaves': 2, 'min_data_in_leaf': 33369, 'lambda_l1': 6.221553161507119e-09, 'lambda_l2': 3.896369015445055, 'bagging_fraction': 0.850000000

[32m[I 2021-06-27 22:05:40,101][0m Trial 23 finished with value: 1.746968365801764 and parameters: {'feature_fraction': 0.15000000000000002, 'max_depth': 7, 'num_leaves': 59, 'min_data_in_leaf': 2924, 'lambda_l1': 0.00968872653090523, 'lambda_l2': 0.0028843425853577595, 'bagging_fraction': 0.95, 'bagging_freq': 7}. Best is trial 23 with value: 1.746968365801764.[0m
[32m[I 2021-06-27 22:07:01,808][0m Trial 24 finished with value: 1.7495104376190853 and parameters: {'feature_fraction': 0.1, 'max_depth': 7, 'num_leaves': 60, 'min_data_in_leaf': 2883, 'lambda_l1': 0.010059666188863267, 'lambda_l2': 0.0029909809403917214, 'bagging_fraction': 0.95, 'bagging_freq': 7}. Best is trial 23 with value: 1.746968365801764.[0m
[32m[I 2021-06-27 22:08:22,671][0m Trial 25 finished with value: 1.7478593337830155 and parameters: {'feature_fraction': 0.2, 'max_depth': 7, 'num_leaves': 82, 'min_data_in_leaf': 2406, 'lambda_l1': 0.002092996456881035, 'lambda_l2': 0.3241706939721242, 'bagging_fractio

52.84
2021-06-27 22:27:49.003867


In [80]:
print(datetime.datetime.now())
X_test = test_trainsformed
test_preds = opt_obgm.predict_proba(X_test)
print(datetime.datetime.now())

2021-06-27 22:30:54.153827
2021-06-27 22:30:55.475946


In [81]:
submission = pd.DataFrame(test_preds)
submission.columns = ['Class_1', 'Class_2', 'Class_3', 'Class_4', 'Class_5', 'Class_6', 'Class_7', 'Class_8', 'Class_9']
submission['id'] = test_df['id']

submission.to_csv("submission_fe_opt_obgm.csv", index=False)