In [2]:
import pandas as pd
import pickle as pkl
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split

In [3]:
ratings = pd.read_csv("Cleansed Set B Corporate Rating.csv")

df = pd.DataFrame(ratings)

x = ratings.drop(columns=['Risk Rating', 'Bond Rating'])
y = ratings['Risk Rating']

custom_mapping = {
    'In Default': 5,
    'Highest Risk': 4,
    'High Risk': 3,
    'Medium Risk': 2,
    'Low Risk': 1,
    'Lowest Risk': 0
}

ros = RandomOverSampler()

x_resampled, y_resampled = ros.fit_resample(x, y)
ratings_resampled = pd.concat([pd.DataFrame(x_resampled), pd.DataFrame(y_resampled, columns=['Risk Rating'])], axis=1)
ratings_resampled['Risk Rating'] = ratings_resampled['Risk Rating'].map(custom_mapping)
#print(ratings_resampled)

# Import the ClassificationExperiment
from pycaret.classification import ClassificationExperiment

# Create an experiment instance
exp = ClassificationExperiment()

# Initialize the experiment
exp.setup(ratings_resampled, target='Risk Rating', session_id=123)

# Compare models
best_model = exp.compare_models()
XGBoost_model = exp.create_model('xgboost')
# View results
print(best_model)


Unnamed: 0,Description,Value
0,Session id,123
1,Target,Risk Rating
2,Target type,Multiclass
3,Original data shape,"(7938, 18)"
4,Transformed data shape,"(7938, 18)"
5,Transformed train set shape,"(5556, 18)"
6,Transformed test set shape,"(2382, 18)"
7,Numeric features,17
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9671,0.9985,0.9671,0.9677,0.9671,0.9605,0.9606,0.043
lightgbm,Light Gradient Boosting Machine,0.9617,0.9976,0.9617,0.9623,0.9615,0.954,0.9542,0.506
rf,Random Forest Classifier,0.96,0.9978,0.96,0.9609,0.9599,0.9521,0.9523,0.099
xgboost,Extreme Gradient Boosting,0.9591,0.997,0.9591,0.9598,0.9589,0.951,0.9512,0.148
dt,Decision Tree Classifier,0.9366,0.962,0.9366,0.9366,0.936,0.924,0.9242,0.01
gbc,Gradient Boosting Classifier,0.9244,0.0,0.9244,0.9249,0.9242,0.9093,0.9095,0.923
knn,K Neighbors Classifier,0.8717,0.9739,0.8717,0.8738,0.8684,0.846,0.8478,0.138
lda,Linear Discriminant Analysis,0.6821,0.0,0.6821,0.7019,0.6857,0.6186,0.6211,0.006
ridge,Ridge Classifier,0.6334,0.0,0.6334,0.616,0.6103,0.56,0.5681,0.007
lr,Logistic Regression,0.6219,0.0,0.6219,0.6314,0.6098,0.5462,0.5531,0.372


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.973,0.9976,0.973,0.9734,0.9729,0.9676,0.9678
1,0.9568,0.9976,0.9568,0.9567,0.9565,0.9482,0.9483
2,0.9712,0.9976,0.9712,0.9723,0.9712,0.9655,0.9657
3,0.9406,0.9969,0.9406,0.9413,0.9403,0.9288,0.929
4,0.9514,0.9962,0.9514,0.9544,0.9518,0.9417,0.9422
5,0.9532,0.9967,0.9532,0.9555,0.9531,0.9439,0.9444
6,0.9622,0.9967,0.9622,0.9622,0.9619,0.9546,0.9547
7,0.9586,0.9965,0.9586,0.9582,0.9581,0.9503,0.9504
8,0.9658,0.9972,0.9658,0.9659,0.9656,0.9589,0.959
9,0.9586,0.9972,0.9586,0.9584,0.9582,0.9503,0.9504


ExtraTreesClassifier(bootstrap=False, ccp_alpha=0.0, class_weight=None,
                     criterion='gini', max_depth=None, max_features='sqrt',
                     max_leaf_nodes=None, max_samples=None,
                     min_impurity_decrease=0.0, min_samples_leaf=1,
                     min_samples_split=2, min_weight_fraction_leaf=0.0,
                     monotonic_cst=None, n_estimators=100, n_jobs=-1,
                     oob_score=False, random_state=123, verbose=0,
                     warm_start=False)


In [None]:
# to export data to pickle file
#ratings_resampled.to_pickle('df.pkl')