# Stacking model
**A simple notebook to train a stacking model**

# preprocessingData

In [None]:
import pandas as pd
df = pd.read_pickle('../input/preprocessingdata1/df.pkl')

In [None]:
df.info()

In [None]:
X_train = df[df.date_block_num < 33].drop(['item_cnt_month'], axis=1)
Y_train = df[df.date_block_num < 33]['item_cnt_month']
X_valid = df[df.date_block_num == 33].drop(['item_cnt_month'], axis=1)
Y_valid = df[df.date_block_num == 33]['item_cnt_month']
X_test = df[df.date_block_num == 34].drop(['item_cnt_month'], axis=1)
del df

**train xgb model**

In [None]:
from xgboost import XGBRegressor
from xgboost import plot_importance

xgb_model = XGBRegressor(
    n_estimators=300,
    max_depth = 10,
    min_child_weight=1, 
    colsample_bytree=0.8, 
    subsample=0.8, 
    eta=0.1,
#     tree_method='gpu_hist',
    seed=42)
xgb_model.fit(X_train, 
              Y_train, 
              eval_metric="rmse", 
              eval_set=[(X_train, Y_train), (X_valid, Y_valid)], 
              verbose=True, 
              early_stopping_rounds=30)

**train randomforest model**

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(n_estimators=50, max_depth=7, random_state=0, n_jobs=-1)
rf_model.fit(X_train, Y_train)

**get valid, test predict value of two model**

In [None]:
feature_name = X_train.columns.tolist()
xgb_Y_valid = xgb_model.predict(X_valid[feature_name]).clip(0, 20)
rf_Y_valid = rf_model.predict(X_valid[feature_name]).clip(0, 20)

xgb_Y_test = xgb_model.predict(X_test[feature_name]).clip(0, 20)
rf_Y_test = rf_model.predict(X_test[feature_name]).clip(0, 20)

**see two output of models**

In [None]:
valid_df = pd.DataFrame(rf_Y_valid, columns=['rf'])
valid_df['xgbm'] = xgb_Y_valid
valid_df.head(20)

In [None]:
test_df = pd.DataFrame(rf_Y_test, columns=['rf'])
test_df['xgbm'] = xgb_Y_test
test_df.head(20)

**using ridge regression model for second model**

In [None]:
from sklearn.linear_model import Ridge
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(valid_df, Y_valid)

In [None]:
final_ridge_predictions = ridge_model.predict(test_df).clip(0, 20)

**output the stack model submission**

In [None]:
test = pd.read_csv('../input/competitive-data-science-predict-future-sales/test.csv')
stack_submission = pd.DataFrame({
    "ID": test.index, 
    "item_cnt_month": final_ridge_predictions 
})


stack_submission.to_csv('stack_submission.csv', index=False)

stack_submission.head(10)