In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
data=pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv')
data.head()

In [None]:
data.describe()

In [None]:
round(data.claim.value_counts()*100/len(data),2)

In [None]:
data.info(verbose=True)

In [None]:
round(data.isnull().sum().sort_values(ascending=False)*100/len(data),2)

In [None]:
data.nunique()

In [None]:
data.shape

In [None]:
data.skew()

In [None]:
!pip install pycaret

In [None]:
from pycaret.classification import *

In [None]:
data.columns

In [None]:
claim= setup(data = data, target = 'claim', session_id=42,
                  normalize = True, 
                  transformation = True, 
                  log_experiment = True,
                  handle_unknown_categorical = True, 
                  unknown_categorical_method = 'most_frequent',
                  remove_multicollinearity = True, #drop one of the two features that are highly correlated with each other
                  ignore_low_variance = True,#all categorical features with statistically insignificant variances are removed from the dataset.
                  combine_rare_levels = True,# all levels in categorical features below the threshold defined in rare_level_threshold param are combined together as a single level,
             normalize_method = 'robust',
            train_size = 0.8,
             ignore_features=['id']
          )

In [None]:
catboost = create_model('catboost')

In [None]:
print(catboost)

In [None]:
tuned_catboost = tune_model(catboost,optimize = 'AUC')

In [None]:
plot_model(tuned_catboost, plot = 'auc')

In [None]:
plot_model(tuned_catboost, plot = 'pr')

In [None]:
plot_model(tuned_catboost, plot='feature')

In [None]:
plot_model(tuned_catboost, plot = 'confusion_matrix')

In [None]:
calibrated_dt = calibrate_model(tuned_catboost)

In [None]:
plot_model(calibrated_dt, plot = 'confusion_matrix')

In [None]:
evaluate_model(tuned_catboost)

In [None]:
predict_model(tuned_catboost)

In [None]:
data_unseen=pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')
data_unseen.head()

In [None]:
unseen_predictions = predict_model(tuned_catboost, data=data_unseen)
unseen_predictions.head()

In [None]:
print("Confidence Score :   {}".format(round(unseen_predictions.Score.mean(),2)))#Confidence Score

In [None]:
unseen_predictions=unseen_predictions[['id','Label']]
unseen_predictions.head()

In [None]:
unseen_predictions.columns = ['id', 'claim']
unseen_predictions.head()

In [None]:
unseen_predictions.to_csv('unseen_predictions1.csv', index=False)