In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# ** Install pycaret **

In [None]:
!pip install pycaret

# ** Install shap **

In [None]:
!pip install shap

# ** Read the training data **

In [None]:
import pandas as pd

dataset = pd.read_csv('/kaggle/input/credit-score-prediction/CreditScore_train.csv', sep=',', encoding='latin-1')
dataset.head()

In [None]:
#check the shape of data
dataset.shape

# ** Separate out the Train/Test and Validation Data **

In [None]:
data = dataset.sample(frac=0.9, random_state=786)
data_unseen = dataset.drop(data.index)

data.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

# ** Setup the Model **

In [None]:
from pycaret.regression import *
exp_reg102 = setup(data = data, target = 'y', session_id=123,
                  normalize = True, transformation = True, transform_target = True, 
                  combine_rare_levels = True, rare_level_threshold = 0.05,
                  remove_multicollinearity = True, multicollinearity_threshold = 0.95, 
                  log_experiment = True, experiment_name = 'Credit_Score_1')

# ** Create a Light GBM Model **

In [None]:
light_gbm = create_model('lightgbm', round = 6, fold=10)

# ** Tune the Light GBM Model **

In [None]:
tuned_light_gbm = tune_model(light_gbm,  round = 6, fold=10)

# ** Plot the Light GBM Model - Error **

In [None]:
plot_model(tuned_light_gbm, plot = 'error')

# ** Plot the Light GBM Model - Feature Importance **

In [None]:
plot_model(tuned_light_gbm, plot='feature')

# ** Evaluate the Tuned Light GBM Model **

In [None]:
evaluate_model(tuned_light_gbm)

# ** Interpret the Tuned Light GBM Model **

In [None]:
interpret_model(tuned_light_gbm, plot='reason', observation=10)

# ** Ensemble Model - Light GBM - Bagging **

In [None]:
bagged_light_gbm = ensemble_model(tuned_light_gbm, method = 'Bagging', n_estimators=100)

# ** Predict on Test / Hold-out Sample - Bagged Light GBM **

In [None]:
predict_model(bagged_light_gbm)

# ** Finalize Model for Deployment  - Bagged Light GBM **

In [None]:
final_bagged_light_gbm = finalize_model(bagged_light_gbm)

In [None]:
print(final_bagged_light_gbm)

# ** Predict on Unseen Data  - Bagged Light GBM **

In [None]:
unseen_predictions_bagged_light_gbm = predict_model(final_bagged_light_gbm, data=data_unseen)
unseen_predictions_bagged_light_gbm.head()

# ** Evalute metrics - Bagged Light GBM **

In [None]:
from pycaret.utils import check_metric
check_metric(unseen_predictions_bagged_light_gbm.y, unseen_predictions_bagged_light_gbm.Label, 'R2')

In [None]:
check_metric(unseen_predictions_bagged_light_gbm.y, unseen_predictions_bagged_light_gbm.Label, 'MSE')

# ** Save the Final Model  - Bagged Light GBM **

In [None]:
save_model(final_bagged_light_gbm,'/kaggle/working/Final Bagged Light GBM Model 14Nov2020')

# ** Load the Final Model  - Bagged Light GBM  **

In [None]:
saved_final_bagged_light_gbm = load_model('/kaggle/working/Final Bagged Light GBM Model 14Nov2020')

# ** Read the Brand New Test Data  - Bagged Light GBM **

In [None]:
test_dataset = pd.read_csv('/kaggle/input/credit-score-prediction/CreditScore_test.csv', sep=',', encoding='latin-1')
test_dataset.head()

# ** Predict Brand New Unseen Data - Bagged Light GBM ** 

In [None]:
brand_new_unseen_predictions_bagged_light_gbm = predict_model(saved_final_bagged_light_gbm, data=test_dataset)
brand_new_unseen_predictions_bagged_light_gbm.head()

# ** Evaluate the results on brand new unseen data  - Bagged Light GBM **

In [None]:
from pycaret.utils import check_metric
check_metric(brand_new_unseen_predictions_bagged_light_gbm.y, brand_new_unseen_predictions_bagged_light_gbm.Label, 'R2')

In [None]:
mse = check_metric(brand_new_unseen_predictions_bagged_light_gbm.y, brand_new_unseen_predictions_bagged_light_gbm.Label, 'MSE')
mse

In [None]:
rmse = np.sqrt(mse)
rmse

In [None]:
check_metric(brand_new_unseen_predictions_bagged_light_gbm.y, brand_new_unseen_predictions_bagged_light_gbm.Label, 'MAPE')

In [None]:
check_metric(brand_new_unseen_predictions_bagged_light_gbm.y, brand_new_unseen_predictions_bagged_light_gbm.Label, 'MAE')

In [None]:
check_metric(brand_new_unseen_predictions_bagged_light_gbm.y, brand_new_unseen_predictions_bagged_light_gbm.Label, 'RMSLE')