In this notebook I want to share init_model parameter of a lightgbm model. Since we compete in 4th and even 5th decimals in this competition, we need to be able to squeeze from the data as much as we can. So I wanted to share this trick, which I also used for the first time.

In [None]:
import pandas as pd
import numpy as np
import lightgbm as lgbm
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error


In [None]:
train = pd.read_csv("../input/tabular-playground-series-jan-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-jan-2021/test.csv")

In [None]:
cont_features = [col for col in train.columns if col.startswith("cont")]
len(cont_features)

In [None]:
#X = X.abs()
y = train["target"]
kf = KFold(n_splits=5, shuffle=True, random_state=1)
oof = np.zeros(len(train))
score_list = []
fold = 1
test_preds = []


for train_index, test_index in kf.split(train):
    X_train, X_val = train.iloc[train_index], train.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    
    

    X_train = X_train.abs()

    
    y_pred_list = []
    for seed in [1]:
        dtrain = lgbm.Dataset(X_train[cont_features], y_train)
        dvalid = lgbm.Dataset(X_val[cont_features], y_val)
        print(seed)
        params = {"objective": "regression",
              "metric": "rmse",
              "verbosity": -1,
              "boosting_type": "gbdt",
              "feature_fraction":0.5,
              "num_leaves": 200,
              "lambda_l1":2,
              "lambda_l2":2,
              "learning_rate":0.01,
              'min_child_samples': 50,
              "bagging_fraction":0.7,
              "bagging_freq":1}
        params["seed"] = seed
        model = lgbm.train(params,
                        dtrain,
                        valid_sets=[dtrain, dvalid],
                        verbose_eval=100,
                        num_boost_round=100000,
                        early_stopping_rounds=100
                    )
    
        y_pred_list.append(model.predict(X_val[cont_features]))
        test_preds.append(model.predict(test[cont_features]))
        
    
   
    
    oof[test_index] = np.mean(y_pred_list,axis=0)    
    score = np.sqrt(mean_squared_error(y_val, oof[test_index]))
    score_list.append(score)
    print(f"RMSE Fold-{fold} : {score}")
    fold+=1

np.mean(score_list)

In [None]:
print(score_list)
print(np.mean(score_list))

## Now we'll run 2nd models on top of 1st models with a lower learning rate thanks to lgbm init_model method.

In [None]:
y = train["target"]
kf = KFold(n_splits=5, shuffle=True, random_state=1)
oof = np.zeros(len(train))
score_list = []
fold = 1
test_preds = []


for train_index, test_index in kf.split(train):
    X_train, X_val = train.iloc[train_index], train.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]
    
    

    X_train = X_train.abs()

    
    y_pred_list = []
    for seed in [1]:
        dtrain = lgbm.Dataset(X_train[cont_features], y_train)
        dvalid = lgbm.Dataset(X_val[cont_features], y_val)
        print(seed)
        params = {"objective": "regression",
              "metric": "rmse",
              "verbosity": -1,
              "boosting_type": "gbdt",
              "feature_fraction":0.5,
              "num_leaves": 200,
              "lambda_l1":2,
              "lambda_l2":2,
              "learning_rate":0.01,
              'min_child_samples': 50,
              "bagging_fraction":0.7,
              "bagging_freq":1}
        params["seed"] = seed
        model = lgbm.train(params,
                        dtrain,
                        valid_sets=[dtrain, dvalid],
                        verbose_eval=100,
                        num_boost_round=100000,
                        early_stopping_rounds=100
                    )
        
        # Extra boosting.
        dtrain = lgbm.Dataset(X_train[cont_features], y_train)
        dvalid = lgbm.Dataset(X_val[cont_features], y_val)
        params = {"objective": "regression",
              "metric": "rmse",
              "verbosity": -1,
              "boosting_type": "gbdt",
              "feature_fraction":0.5,
              "num_leaves": 300,
              "lambda_l1":2,
              "lambda_l2":2,
              "learning_rate":0.003,
              'min_child_samples': 50,
              "bagging_fraction":0.7,
              "bagging_freq":1}

        params["seed"] = seed
        model = lgbm.train(params,
                            dtrain,
                            valid_sets=[dtrain, dvalid],
                            verbose_eval=100,
                            num_boost_round=1000,
                           early_stopping_rounds=100,
                           init_model = model
                        )

    
    
        y_pred_list.append(model.predict(X_val[cont_features]))
        test_preds.append(model.predict(test[cont_features]))
    
   
    
    oof[test_index] = np.mean(y_pred_list,axis=0)    
    score = np.sqrt(mean_squared_error(y_val, oof[test_index]))
    score_list.append(score)
    print(f"RMSE Fold-{fold} : {score}")
    fold+=1

np.mean(score_list)

In [None]:
print(score_list)
print(np.mean(score_list))

### We can see that results have improved.
* 0.6957978243963224 -> 0.695761435448342
* 3 points improvement in 5th decimal. Not so bad for this competition :)

## What can be done more with this method?
1. Now you can also try tuning your extra boosting model's parameters :)
2. You can try stopping your first model earlier (without early stopping) and running extra boosting model longer. 
3. You can try 3rd extra boosting model on top of the 2nd.
