### Load libraries and data

In [1]:
import gc
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from xgboost import plot_importance

def plot_features(booster, figsize):    
    fig, ax = plt.subplots(1,1,figsize=figsize)
    return plot_importance(booster=booster, ax=ax)

data_folder = "../input/"
grid = pd.read_pickle(data_folder+'grid.pkl')

### Feature selection

In [2]:
grid = grid.iloc[:, :40]

In [3]:
#above_40 = list(grid.iloc[:, 40:].columns)

In [None]:
#grid = grid[imp_cols + above_40 + ['item_cnt_month']]

In [None]:
X_train = grid[grid.date_block_num < 33].drop(['item_cnt_month'], axis=1)
Y_train = grid[grid.date_block_num < 33]['item_cnt_month']
X_valid = grid[grid.date_block_num == 33].drop(['item_cnt_month'], axis=1)
Y_valid = grid[grid.date_block_num == 33]['item_cnt_month']
X_test = grid[grid.date_block_num == 34].drop(['item_cnt_month'], axis=1)

In [None]:
del grid
gc.collect()

In [None]:
model = XGBRegressor(
    max_depth=8,
    n_estimators=1000,
    min_child_weight=300, 
    colsample_bytree=0.8, 
    subsample=0.8, 
    eta=0.3,    
    seed=42)

model.fit(
    X_train, 
    Y_train, 
    eval_metric="rmse", 
    eval_set=[(X_train, Y_train), (X_valid, Y_valid)], 
    verbose=True, 
    early_stopping_rounds = 10)

### Plot the feature importance

In [None]:
plot_features(model, (10,14))

In [None]:
med = np.median(model.feature_importances_)
imp_cols = list(X_train.columns[model.feature_importances_ > med])

In [None]:
imp_cols

### Final trainning and Submission generation

In [None]:
model = XGBRegressor(
    max_depth=8,
    n_estimators=62,
    min_child_weight=300, 
    colsample_bytree=0.8, 
    subsample=0.8, 
    eta=0.3,    
    seed=42)
model.fit(grid[grid.date_block_num < 34].drop(['item_cnt_month'], axis=1), grid[grid.date_block_num < 34]['item_cnt_month'])

In [None]:
preds = model.predict(X_test).clip(0,20)
submission = pd.DataFrame({
    "ID": range(214200), 
    "item_cnt_month": preds
    })
submission.to_csv('xgb.csv')