In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score
from lightgbm import LGBMRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline


In [5]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

categorical_features = ['district']
numerical_features = [col for col in train_df.columns if col.startswith('SRP_')] + ['self_eval', 'teacher_eval', 'extracurricular']


X = train_df[numerical_features + categorical_features]
y = train_df['y']
X_test = test_df[numerical_features + categorical_features]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features),
    ], remainder='passthrough')


model = LGBMRegressor()

my_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                              ('model', model)])


In [7]:
my_pipeline.fit(X_train, y_train)

predictions = my_pipeline.predict(X_val)
print(f'R^2 Score on Validation Set: {r2_score(y_val, predictions)}')


python(24815) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000795 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 6400, number of used features: 60
[LightGBM] [Info] Start training from score 0.549380
R^2 Score on Validation Set: 0.7294274034465796


In [8]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'model__num_leaves': [31, 50, 100],
    'model__max_depth': [-1, 10, 20],
    'model__learning_rate': [0.05, 0.1, 0.2],
    'model__n_estimators': [100, 200, 500],
}

grid_search = GridSearchCV(my_pipeline, param_grid, cv=5, scoring='r2', n_jobs=-1)

grid_search.fit(X_train, y_train)

print(f'Best R^2 Score: {grid_search.best_score_}')
print(f'Best Parameters: {grid_search.best_params_}')


python(24822) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24823) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24824) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24825) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24826) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24827) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24828) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24829) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24830) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(24831) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004255 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001436 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001138 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001431 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005893 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bi

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004353 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004993 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001427 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004460 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001445 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001455 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006853 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se







[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004364 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007836 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001199 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005428 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009813 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004756 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009811 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009789 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005074 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013223 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005808 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001136 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001131 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006675 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001504 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013084 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001434 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGB

[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005436 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001422 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead o

[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005757 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010799 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001431 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[L

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001444 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001563 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005222 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001424 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001141 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enoug

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001413 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006961 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004922 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750






[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001566 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001471 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009924 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005710 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005281 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006200 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004480 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start tra



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001451 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004712 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001733 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001409 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001064 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004643 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enoug

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006375 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001168 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004880 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001433 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001418 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004601 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001425 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004787 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003744 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001616 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008925 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006444 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005658 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004445 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001489 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750






[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002658 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 6400, number of used features: 60
[LightGBM] [Info] Start training from score 0.549380
Best R^2 Score: 0.779009820682319
Best Parameters: {'model__learning_rate': 0.05, 'model__max_depth': 10, 'model__n_estimators': 500, 'model__num_leaves': 31}


In [9]:
predictions = grid_search.predict(X_test)



In [12]:
submission = pd.DataFrame({
    'SEQN': test_df['SEQN'],
    'y': predictions
})
submission.to_csv('submit_2.csv', index=False)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001452 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006170 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006175 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002559 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.556176
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005672 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004937 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004723 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002940 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006495 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005340 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004617 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008301 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001435 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.546045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004896 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004773 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001674 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001450 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bi

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004703 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.551750
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001407 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001628 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.547618
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001408 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12787
[LightGBM] [Info] Number of data points in the train set: 5120, number of used features: 60
[LightGBM] [Info] Start training from score 0.545313
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005186 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bi

In [15]:
predictions.shape

(4000,)