In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Kabita//Input//kabita_dataset_labels.csv")

In [2]:
# Function of Train-test split, MinMax Scaling
def minmax_scaling(x_data, y_data):
    x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=0.30,random_state=21,stratify=y_data)
    # MinMax scaling of train data
    minmax_model = MinMaxScaler(feature_range=(0,5))
    np.set_printoptions(precision=3)
    scaled_data_train = minmax_model.fit_transform(x_train)
    # MinMax scaling of test data
    scaled_data_test = minmax_model.fit_transform(x_test)
    return scaled_data_train, scaled_data_test, y_train, y_test

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" after MinMax Scaling is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tfidf_500_vectors.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.7306122448979592
Confusion Matrix of Logistic Regression is:
 [[160   1   1   4  29  12   3]
 [  0 158  11  10  12  19   0]
 [  1   2 181  13   0  13   0]
 [  1  10  16 151  11  17   4]
 [ 25  13   9  17 139   2   5]
 [  5   9   5  30   4 119  38]
 [  1   1   0   4   0  38 166]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.83      0.76      0.79       210
           2       0.81      0.75      0.78       210
           3       0.81      0.86      0.84       210
           4       0.66      0.72      0.69       210
           5       0.71      0.66      0.69       210
           6       0.54      0.57      0.55       210
           7       0.77      0.79      0.78       210

    accuracy                           0.73      1470
   macro avg       0.73      0.73      0.73      1470
weighted avg       0.73      0.73      0.73      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.717687074829932
Confusion Matrix of SVM is:
 [[163   3   0   7  24  11   2]
 [  1 166   9  11   4  19   0]
 [  1   7 180   6   1  15   0]
 [  5  17  17 144   6  19   2]
 [ 30  16  14  15 125   4   6]
 [  7  15   5  24   3 121  35]
 [  2   2   0   6   3  41 156]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.78      0.78      0.78       210
           2       0.73      0.79      0.76       210
           3       0.80      0.86      0.83       210
           4       0.68      0.69      0.68       210
           5       0.75      0.60      0.66       210
           6       0.53      0.58      0.55       210
           7       0.78      0.74      0.76       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scalin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3122448979591837
Confusion Matrix of Decision Tree is:
 [[112   0   0   0  43  55   0]
 [  1   0   2   0   1 206   0]
 [  0   0  95   0   0 115   0]
 [  0   0   1   0   1 208   0]
 [ 78   0   2   0  42  88   0]
 [  0   0   0   0   0 210   0]
 [  2   0   0   0   0 208   0]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.53      0.56       210
           2       0.00      0.00      0.00       210
           3       0.95      0.45      0.61       210
           4       0.00      0.00      0.00       210
           5       0.48      0.20      0.28       210
           6       0.19      1.00      0.32       210
           7       0.00      0.00      0.00       210

    accuracy                           0.31      1470
   macro avg       0.32      0.31      0.25      1470
weighted avg       0.32      0.31      0.25      1470

Decision Tree with 4 max_depth
A

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3979591836734694
Confusion Matrix of Decision Tree is:
 [[109   2   0  53  46   0   0]
 [  0  54   2 152   2   0   0]
 [  0   0  95 115   0   0   0]
 [  1   6   1 202   0   0   0]
 [ 40   4   2  81  80   0   3]
 [  0   3   0 203   0   0   4]
 [  0   0   0 163   2   0  45]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.73      0.52      0.61       210
           2       0.78      0.26      0.39       210
           3       0.95      0.45      0.61       210
           4       0.21      0.96      0.34       210
           5       0.62      0.38      0.47       210
           6       0.00      0.00      0.00       210
           7       0.87      0.21      0.34       210

    accuracy                           0.40      1470
   macro avg       0.59      0.40      0.39      1470
weighted avg       0.59      0.40      0.39      1470

Decision Tree with 6 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.5503401360544218
Confusion Matrix of Decision Tree is:
 [[156   2   0  42  10   0   0]
 [  0 134   4  63   9   0   0]
 [  0  17 125  64   4   0   0]
 [  0  10   4 185   7   0   4]
 [ 53  12   6  43  95   1   0]
 [  0   4   1 179   2   1  23]
 [  2   1   0  93   0   1 113]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.74      0.74      0.74       210
           2       0.74      0.64      0.69       210
           3       0.89      0.60      0.71       210
           4       0.28      0.88      0.42       210
           5       0.75      0.45      0.56       210
           6       0.33      0.00      0.01       210
           7       0.81      0.54      0.65       210

    accuracy                           0.55      1470
   macro avg       0.65      0.55      0.54      1470
weighted avg       0.65      0.55      0.54      1470

Decision Tree with 14 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6061224489795919
Confusion Matrix of Random Forest is:
 [[155   3   0  19   5  19   9]
 [  1 111   7  13  35  43   0]
 [  0   0 151  23   0  35   1]
 [  0  11   8 125  10  49   7]
 [ 62  15   6  29  72  12  14]
 [  0   7   5  24   4 129  41]
 [  0   0   1  18   0  43 148]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.74      0.72       210
           2       0.76      0.53      0.62       210
           3       0.85      0.72      0.78       210
           4       0.50      0.60      0.54       210
           5       0.57      0.34      0.43       210
           6       0.39      0.61      0.48       210
           7       0.67      0.70      0.69       210

    accuracy                           0.61      1470
   macro avg       0.64      0.61      0.61      1470
weighted avg       0.64      0.61      0.61      1470

Random Forest with 2 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6727891156462585
Confusion Matrix of Random Forest is:
 [[159   2   0  16   8  24   1]
 [  1 140   7  14  12  36   0]
 [  0   0 162  13   0  35   0]
 [  0   6   9 145   3  43   4]
 [ 51  17   7  21  87  16  11]
 [  0   3   4  20   2 143  38]
 [  0   0   1   8   1  47 153]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.75      0.76      0.76       210
           2       0.83      0.67      0.74       210
           3       0.85      0.77      0.81       210
           4       0.61      0.69      0.65       210
           5       0.77      0.41      0.54       210
           6       0.42      0.68      0.52       210
           7       0.74      0.73      0.73       210

    accuracy                           0.67      1470
   macro avg       0.71      0.67      0.68      1470
weighted avg       0.71      0.67      0.68      1470

Random Forest with 10 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6891156462585034
Confusion Matrix of Random Forest is:
 [[153   2   0  10  16  28   1]
 [  0 148   7   9  14  32   0]
 [  0   5 161  10   0  34   0]
 [  0   9   9 142   5  41   4]
 [ 35  14   7  21 109  14  10]
 [  0   3   4  15   2 144  42]
 [  0   0   0   5   2  47 156]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.81      0.73      0.77       210
           2       0.82      0.70      0.76       210
           3       0.86      0.77      0.81       210
           4       0.67      0.68      0.67       210
           5       0.74      0.52      0.61       210
           6       0.42      0.69      0.52       210
           7       0.73      0.74      0.74       210

    accuracy                           0.69      1470
   macro avg       0.72      0.69      0.70      1470
weighted avg       0.72      0.69      0.70      1470

Random Forest with 18 max_depth


In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//cv_500_vectors.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.6931972789115646
Confusion Matrix of Logistic Regression is:
 [[155   2   1   8  26  16   2]
 [  1 117  41  11  24  16   0]
 [  1   1 182   9   3  14   0]
 [  1   9  19 139  20  18   4]
 [ 23  11  10   7 150   6   3]
 [  4  12   5  22   3 119  45]
 [  1   3   0   5   2  42 157]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.83      0.74      0.78       210
           2       0.75      0.56      0.64       210
           3       0.71      0.87      0.78       210
           4       0.69      0.66      0.68       210
           5       0.66      0.71      0.68       210
           6       0.52      0.57      0.54       210
           7       0.74      0.75      0.75       210

    accuracy                           0.69      1470
   macro avg       0.70      0.69      0.69      1470
weighted avg       0.70      0.69      0.69      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.7170068027210884
Confusion Matrix of SVM is:
 [[157   2   2   7  22  18   2]
 [  2 155  18   8  11  16   0]
 [  2   1 185   6   1  15   0]
 [  5  13  22 131  15  20   4]
 [ 30  10  10   7 146   4   3]
 [  4  11   5  15   6 132  37]
 [  1   5   0  10   4  42 148]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.78      0.75      0.76       210
           2       0.79      0.74      0.76       210
           3       0.76      0.88      0.82       210
           4       0.71      0.62      0.66       210
           5       0.71      0.70      0.70       210
           6       0.53      0.63      0.58       210
           7       0.76      0.70      0.73       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.35034013605442177
Confusion Matrix of Decision Tree is:
 [[155   0   1   0   0  54   0]
 [  0   0  52   0   2 156   0]
 [  0   0 132   0   0  78   0]
 [  1   0  17   0   0 192   0]
 [100   0  26   0  20  64   0]
 [  0   0   2   0   0 208   0]
 [  2   0   0   0   0 208   0]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.74      0.66       210
           2       0.00      0.00      0.00       210
           3       0.57      0.63      0.60       210
           4       0.00      0.00      0.00       210
           5       0.91      0.10      0.17       210
           6       0.22      0.99      0.36       210
           7       0.00      0.00      0.00       210

    accuracy                           0.35      1470
   macro avg       0.33      0.35      0.26      1470
weighted avg       0.33      0.35      0.26      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.4312925170068027
Confusion Matrix of Decision Tree is:
 [[137   0   1   0  18  54   0]
 [  0  70  17   0   2 121   0]
 [  0   0 132   0   0  78   0]
 [  0   5  13   0   1 191   0]
 [ 75  13  22   0  46  54   0]
 [  0   1   1   0   0 204   4]
 [  2   1   0   0   0 162  45]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.64      0.65      0.65       210
           2       0.78      0.33      0.47       210
           3       0.71      0.63      0.67       210
           4       0.00      0.00      0.00       210
           5       0.69      0.22      0.33       210
           6       0.24      0.97      0.38       210
           7       0.92      0.21      0.35       210

    accuracy                           0.43      1470
   macro avg       0.57      0.43      0.41      1470
weighted avg       0.57      0.43      0.41      1470

Decision Tree with 6 max_depth
A

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.4816326530612245
Confusion Matrix of Decision Tree is:
 [[155   0   1   0   0  54   0]
 [  0 106  17   3   4  80   0]
 [  0   0 132   0   0  78   0]
 [  1   4   8   6   1 188   2]
 [ 75  18  22   0  50  45   0]
 [  0   2   1   1   0 188  18]
 [  2   1   0   0   0 136  71]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.67      0.74      0.70       210
           2       0.81      0.50      0.62       210
           3       0.73      0.63      0.68       210
           4       0.60      0.03      0.05       210
           5       0.91      0.24      0.38       210
           6       0.24      0.90      0.38       210
           7       0.78      0.34      0.47       210

    accuracy                           0.48      1470
   macro avg       0.68      0.48      0.47      1470
weighted avg       0.68      0.48      0.47      1470

Decision Tree with 8 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.5666666666666667
Confusion Matrix of Decision Tree is:
 [[159   0   1   0   6  44   0]
 [  0 115  15   4   7  69   0]
 [  0   0 150   0   0  60   0]
 [  0   5  18  37   2 144   4]
 [ 55  19  16   3  81  35   1]
 [  0   2   3   1   0 178  26]
 [  2   1   1   2   3  88 113]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.74      0.76      0.75       210
           2       0.81      0.55      0.65       210
           3       0.74      0.71      0.72       210
           4       0.79      0.18      0.29       210
           5       0.82      0.39      0.52       210
           6       0.29      0.85      0.43       210
           7       0.78      0.54      0.64       210

    accuracy                           0.57      1470
   macro avg       0.71      0.57      0.57      1470
weighted avg       0.71      0.57      0.57      1470

Decision Tree with 16 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.5714285714285714
Confusion Matrix of Random Forest is:
 [[153   5   1  10   6  29   6]
 [  1  98  48  12  16  35   0]
 [  0   0 135   6   0  69   0]
 [  0  11  15 103   4  73   4]
 [ 62  22  30  19  52  12  13]
 [  0   7   2  11   3 145  42]
 [  0   1   0   6   0  49 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.73      0.72       210
           2       0.68      0.47      0.55       210
           3       0.58      0.64      0.61       210
           4       0.62      0.49      0.55       210
           5       0.64      0.25      0.36       210
           6       0.35      0.69      0.47       210
           7       0.70      0.73      0.72       210

    accuracy                           0.57      1470
   macro avg       0.61      0.57      0.57      1470
weighted avg       0.61      0.57      0.57      1470

Random Forest with 4 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6401360544217687
Confusion Matrix of Random Forest is:
 [[155   5   2   8  10  30   0]
 [  0 138  21  10   8  33   0]
 [  0   0 165   6   0  39   0]
 [  0  13  19 113   4  58   3]
 [ 56  23  25  14  73  10   9]
 [  0   8   4  12   2 140  44]
 [  1   2   0   5   1  44 157]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.73      0.74      0.73       210
           2       0.73      0.66      0.69       210
           3       0.70      0.79      0.74       210
           4       0.67      0.54      0.60       210
           5       0.74      0.35      0.47       210
           6       0.40      0.67      0.50       210
           7       0.74      0.75      0.74       210

    accuracy                           0.64      1470
   macro avg       0.67      0.64      0.64      1470
weighted avg       0.67      0.64      0.64      1470

Random Forest with 12 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6585034013605442
Confusion Matrix of Random Forest is:
 [[159   5   2   9   6  29   0]
 [  0 149  16   5   6  34   0]
 [  0   0 165   8   0  37   0]
 [  0  12  19 118   5  53   3]
 [ 54  24  21  18  78   8   7]
 [  0   9   4  13   1 144  39]
 [  1   2   1   5   1  45 155]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.76      0.75       210
           2       0.74      0.71      0.73       210
           3       0.72      0.79      0.75       210
           4       0.67      0.56      0.61       210
           5       0.80      0.37      0.51       210
           6       0.41      0.69      0.51       210
           7       0.76      0.74      0.75       210

    accuracy                           0.66      1470
   macro avg       0.69      0.66      0.66      1470
weighted avg       0.69      0.66      0.66      1470

Random Forest with 20 max_depth


In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tf_500_vectors.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.7421768707482993
Confusion Matrix of Logistic Regression is:
 [[159   1   1   4  35   9   1]
 [  0 161  10  11   9  18   1]
 [  1   1 183  12   0  13   0]
 [  1   9  17 151  12  16   4]
 [ 21  11   9  15 148   0   6]
 [  5  10   5  24   6 122  38]
 [  2   1   0   5   2  33 167]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.84      0.76      0.80       210
           2       0.83      0.77      0.80       210
           3       0.81      0.87      0.84       210
           4       0.68      0.72      0.70       210
           5       0.70      0.70      0.70       210
           6       0.58      0.58      0.58       210
           7       0.77      0.80      0.78       210

    accuracy                           0.74      1470
   macro avg       0.74      0.74      0.74      1470
weighted avg       0.74      0.74      0.74      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.726530612244898
Confusion Matrix of SVM is:
 [[158   3   0   8  29   9   3]
 [  1 167  10   9   6  17   0]
 [  1   2 185   5   1  16   0]
 [  6  16  18 140  10  18   2]
 [ 27  13  12  16 135   3   4]
 [  6  12   4  20   6 124  38]
 [  4   3   0   7   2  35 159]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.78      0.75      0.77       210
           2       0.77      0.80      0.78       210
           3       0.81      0.88      0.84       210
           4       0.68      0.67      0.67       210
           5       0.71      0.64      0.68       210
           6       0.56      0.59      0.57       210
           7       0.77      0.76      0.76       210

    accuracy                           0.73      1470
   macro avg       0.73      0.73      0.73      1470
weighted avg       0.73      0.73      0.73      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scalin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3047619047619048
Confusion Matrix of Decision Tree is:
 [[100   0   0   0  55  55   0]
 [  1   0   2   0   1 206   0]
 [  0   0  86   0   0 124   0]
 [  0   0   1   0   1 208   0]
 [ 68   0   1   0  52  89   0]
 [  0   0   0   0   0 210   0]
 [  2   0   0   0   0 208   0]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.48      0.52       210
           2       0.00      0.00      0.00       210
           3       0.96      0.41      0.57       210
           4       0.00      0.00      0.00       210
           5       0.48      0.25      0.33       210
           6       0.19      1.00      0.32       210
           7       0.00      0.00      0.00       210

    accuracy                           0.30      1470
   macro avg       0.32      0.30      0.25      1470
weighted avg       0.32      0.30      0.25      1470

Decision Tree with 4 max_depth
A

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.4061224489795918
Confusion Matrix of Decision Tree is:
 [[133   5   0   0  22  50   0]
 [  0  58   2   0   2 148   0]
 [  0   0  86   0   0 124   0]
 [  1   6   1   0   0 202   0]
 [ 49   5   1   0  72  81   2]
 [  0   3   0   0   0 203   4]
 [  0   0   0   0   2 163  45]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.73      0.63      0.68       210
           2       0.75      0.28      0.40       210
           3       0.96      0.41      0.57       210
           4       0.00      0.00      0.00       210
           5       0.73      0.34      0.47       210
           6       0.21      0.97      0.34       210
           7       0.88      0.21      0.34       210

    accuracy                           0.41      1470
   macro avg       0.61      0.41      0.40      1470
weighted avg       0.61      0.41      0.40      1470

Decision Tree with 6 max_depth
A

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.4557823129251701
Confusion Matrix of Decision Tree is:
 [[119   5   0   0  36  50   0]
 [  0 131   2   0   2  75   0]
 [  0   0  86   0   0 124   0]
 [  0   6   1   0   1 202   0]
 [ 34  22   1   0  87  64   2]
 [  0   3   0   0   0 203   4]
 [  0   1   0   0   2 163  44]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.78      0.57      0.66       210
           2       0.78      0.62      0.69       210
           3       0.96      0.41      0.57       210
           4       0.00      0.00      0.00       210
           5       0.68      0.41      0.51       210
           6       0.23      0.97      0.37       210
           7       0.88      0.21      0.34       210

    accuracy                           0.46      1470
   macro avg       0.61      0.46      0.45      1470
weighted avg       0.61      0.46      0.45      1470

Decision Tree with 8 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.5517006802721088
Confusion Matrix of Decision Tree is:
 [[134   5   0  40  31   0   0]
 [  0 141   7  54   8   0   0]
 [  0  12 147  50   1   0   0]
 [  0  13   8 179   6   0   4]
 [ 33  22   6  40 107   1   1]
 [  0   4   3 177   0   1  25]
 [  0   1   0  94   5   8 102]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.80      0.64      0.71       210
           2       0.71      0.67      0.69       210
           3       0.86      0.70      0.77       210
           4       0.28      0.85      0.42       210
           5       0.68      0.51      0.58       210
           6       0.10      0.00      0.01       210
           7       0.77      0.49      0.60       210

    accuracy                           0.55      1470
   macro avg       0.60      0.55      0.54      1470
weighted avg       0.60      0.55      0.54      1470

Decision Tree with 16 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6197278911564625
Confusion Matrix of Random Forest is:
 [[151   4   2  15  11  22   5]
 [  0 128  17  13  15  36   1]
 [  0   0 161  13   0  36   0]
 [  1  12  11 131   6  45   4]
 [ 66  19  13  33  56   8  15]
 [  0   7   8  20   3 130  42]
 [  0   0   1   9   0  46 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.72      0.71       210
           2       0.75      0.61      0.67       210
           3       0.76      0.77      0.76       210
           4       0.56      0.62      0.59       210
           5       0.62      0.27      0.37       210
           6       0.40      0.62      0.49       210
           7       0.70      0.73      0.71       210

    accuracy                           0.62      1470
   macro avg       0.64      0.62      0.62      1470
weighted avg       0.64      0.62      0.62      1470

Random Forest with 4 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.672108843537415
Confusion Matrix of Random Forest is:
 [[146   5   1   9  21  28   0]
 [  0 140  17   9  14  30   0]
 [  0   0 173  13   0  24   0]
 [  0  11  10 135   9  42   3]
 [ 42  15  11  21 100  11  10]
 [  0   6   6  15   4 139  40]
 [  0   0   0   5   2  48 155]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.70      0.73       210
           2       0.79      0.67      0.72       210
           3       0.79      0.82      0.81       210
           4       0.65      0.64      0.65       210
           5       0.67      0.48      0.56       210
           6       0.43      0.66      0.52       210
           7       0.75      0.74      0.74       210

    accuracy                           0.67      1470
   macro avg       0.69      0.67      0.68      1470
weighted avg       0.69      0.67      0.68      1470

Random Forest with 12 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6863945578231293
Confusion Matrix of Random Forest is:
 [[142   5   1   9  26  27   0]
 [  0 143  13   9  14  31   0]
 [  0   4 175   6   1  24   0]
 [  0   7  10 137  10  43   3]
 [ 31  21   8  17 114  11   8]
 [  0   4   4  11   6 144  41]
 [  1   1   0   5   3  46 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.68      0.74       210
           2       0.77      0.68      0.72       210
           3       0.83      0.83      0.83       210
           4       0.71      0.65      0.68       210
           5       0.66      0.54      0.59       210
           6       0.44      0.69      0.54       210
           7       0.75      0.73      0.74       210

    accuracy                           0.69      1470
   macro avg       0.71      0.69      0.69      1470
weighted avg       0.71      0.69      0.69      1470

Random Forest with 20 max_depth


### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")

# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.6938775510204082
Confusion Matrix of Logistic Regression is:
 [[183   2   2   8   2  13   0]
 [  3 186   7   7   0   7   0]
 [  2   6 195   2   0   5   0]
 [  4  15   1 168   0  18   4]
 [ 95  43   8  25  29   8   2]
 [  5  12   3  18   0 153  19]
 [ 10   1   2   7   1  83 106]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.61      0.87      0.71       210
           2       0.70      0.89      0.78       210
           3       0.89      0.93      0.91       210
           4       0.71      0.80      0.76       210
           5       0.91      0.14      0.24       210
           6       0.53      0.73      0.62       210
           7       0.81      0.50      0.62       210

    accuracy                           0.69      1470
   macro avg       0.74      0.69      0.66      1470
weighted avg       0.74      0.69      0.66      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.7346938775510204
Confusion Matrix of SVM is:
 [[174   5   1   9  11  10   0]
 [  4 174  10  10   1  10   1]
 [  2   2 196   5   0   5   0]
 [  4  16   3 164   4  15   4]
 [ 55  37   6  18  88   5   1]
 [  6   9   2  20   1 131  41]
 [  4   1   0   3   2  47 153]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.70      0.83      0.76       210
           2       0.71      0.83      0.77       210
           3       0.90      0.93      0.92       210
           4       0.72      0.78      0.75       210
           5       0.82      0.42      0.56       210
           6       0.59      0.62      0.61       210
           7       0.77      0.73      0.75       210

    accuracy                           0.73      1470
   macro avg       0.74      0.73      0.73      1470
weighted avg       0.74      0.73      0.73      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.33741496598639453
Confusion Matrix of Decision Tree is:
 [[  0   0   0   0 172   0  38]
 [  0   0   0  11 173   0  26]
 [  0   0  98  26  65   0  21]
 [  0   0   3   7 142   0  58]
 [  0   0   0   2 194   0  14]
 [  0   0   3   2  52   0 153]
 [  0   0   0   0  13   0 197]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.94      0.47      0.62       210
           4       0.15      0.03      0.05       210
           5       0.24      0.92      0.38       210
           6       0.00      0.00      0.00       210
           7       0.39      0.94      0.55       210

    accuracy                           0.34      1470
   macro avg       0.25      0.34      0.23      1470
weighted avg       0.25      0.34      0.23      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.4421768707482993
Confusion Matrix of Decision Tree is:
 [[143  29   0   0   0  26  12]
 [  8 168   0   8   0  23   3]
 [  3  65  98  23   0  21   0]
 [ 25 119   3   5   0  29  29]
 [ 41 154   0   1   0   4  10]
 [ 19  33   3   2   0  66  87]
 [  6   7   0   0   0  27 170]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.68      0.63       210
           2       0.29      0.80      0.43       210
           3       0.94      0.47      0.62       210
           4       0.13      0.02      0.04       210
           5       0.00      0.00      0.00       210
           6       0.34      0.31      0.33       210
           7       0.55      0.81      0.65       210

    accuracy                           0.44      1470
   macro avg       0.40      0.44      0.39      1470
weighted avg       0.40      0.44      0.39      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.49727891156462584
Confusion Matrix of Decision Tree is:
 [[143   7   2   0  22  30   6]
 [  8 137   3   8  29  22   3]
 [  3  60 105  23   2  17   0]
 [ 25  52   5   5  65  33  25]
 [ 41  40   1   1 113   5   9]
 [ 19  16   9   4  17  62  83]
 [  6   2   2   0   5  29 166]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.68      0.63       210
           2       0.44      0.65      0.52       210
           3       0.83      0.50      0.62       210
           4       0.12      0.02      0.04       210
           5       0.45      0.54      0.49       210
           6       0.31      0.30      0.30       210
           7       0.57      0.79      0.66       210

    accuracy                           0.50      1470
   macro avg       0.47      0.50      0.47      1470
weighted avg       0.47      0.50      0.47      1470

Decision Tree with 5 max_depth


Accuracy of Decision Tree after MinMax Scaling is: 0.48707482993197276
Confusion Matrix of Decision Tree is:
 [[105  11   6  26  38  14  10]
 [  9  98  32  33  26   4   8]
 [  7   3 152  36   3   7   2]
 [ 12  23  11  91  18  32  23]
 [ 26  39   9  41  77  11   7]
 [ 19  15  15  27  14  59  61]
 [  9   3   7   9  17  31 134]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.56      0.50      0.53       210
           2       0.51      0.47      0.49       210
           3       0.66      0.72      0.69       210
           4       0.35      0.43      0.38       210
           5       0.40      0.37      0.38       210
           6       0.37      0.28      0.32       210
           7       0.55      0.64      0.59       210

    accuracy                           0.49      1470
   macro avg       0.48      0.49      0.48      1470
weighted avg       0.48      0.49      0.48      1470

Decision Tree with 13 max_depth

Accuracy of Decision Tree after MinMax Scaling is: 0.46938775510204084
Confusion Matrix of Decision Tree is:
 [[113   4   7  28  32  16  10]
 [ 10  92  13  34  46  10   5]
 [  8  12 139  31  11   7   2]
 [  9  16  11  94  28  34  18]
 [ 28  38   7  40  74  14   9]
 [ 19  15  13  31  14  69  49]
 [  9   3   8  13  14  54 109]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.54      0.56       210
           2       0.51      0.44      0.47       210
           3       0.70      0.66      0.68       210
           4       0.35      0.45      0.39       210
           5       0.34      0.35      0.34       210
           6       0.34      0.33      0.33       210
           7       0.54      0.52      0.53       210

    accuracy                           0.47      1470
   macro avg       0.48      0.47      0.47      1470
weighted avg       0.48      0.47      0.47      1470

Random Forest with 1 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after MinMax Scaling is: 0.5591836734693878
Confusion Matrix of Random Forest is:
 [[126   2   0   0  44  16  22]
 [  2 156  11   1  17  17   6]
 [  3  16 173   0   4  14   0]
 [ 12  39  26   5  66  24  38]
 [ 18  49   6   0 118   2  17]
 [  9  15  12   0  12  43 119]
 [  0   0   0   0   3   6 201]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.60      0.66       210
           2       0.56      0.74      0.64       210
           3       0.76      0.82      0.79       210
           4       0.83      0.02      0.05       210
           5       0.45      0.56      0.50       210
           6       0.35      0.20      0.26       210
           7       0.50      0.96      0.66       210

    accuracy                           0.56      1470
   macro avg       0.60      0.56      0.51      1470
weighted avg       0.60      0.56      0.51      1470

Random Forest with 4 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6993197278911565
Confusion Matrix of Random Forest is:
 [[136   1   0   7  35  27   4]
 [  3 145   7  11  26  16   2]
 [  1   6 184   4   5  10   0]
 [  5   4   6 113  28  36  18]
 [ 20  16   0  16 148   4   6]
 [  2   4   0  11  11 120  62]
 [  0   0   0   1   3  24 182]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.81      0.65      0.72       210
           2       0.82      0.69      0.75       210
           3       0.93      0.88      0.90       210
           4       0.69      0.54      0.61       210
           5       0.58      0.70      0.64       210
           6       0.51      0.57      0.54       210
           7       0.66      0.87      0.75       210

    accuracy                           0.70      1470
   macro avg       0.72      0.70      0.70      1470
weighted avg       0.72      0.70      0.70      1470

Random Forest with 12 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6993197278911565
Confusion Matrix of Random Forest is:
 [[139   1   0   7  29  31   3]
 [  3 150   8  12  21  12   4]
 [  2   2 185   6   4  11   0]
 [  6   3   4 128  18  38  13]
 [ 20  21   2  22 133   5   7]
 [  5   6   1  14   4 117  63]
 [  0   1   0   0   4  29 176]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.66      0.72       210
           2       0.82      0.71      0.76       210
           3       0.93      0.88      0.90       210
           4       0.68      0.61      0.64       210
           5       0.62      0.63      0.63       210
           6       0.48      0.56      0.52       210
           7       0.66      0.84      0.74       210

    accuracy                           0.70      1470
   macro avg       0.71      0.70      0.70      1470
weighted avg       0.71      0.70      0.70      1470

Random Forest with 20 max_depth


In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_gkb.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.3598639455782313
Confusion Matrix of Logistic Regression is:
 [[ 54  19  22   1  88  26   0]
 [  0  34 117   2  36  20   1]
 [  0  15 191   0   1   3   0]
 [  0  20  83   9  55  42   1]
 [  0  12  61   2 125   8   2]
 [  0  43  24   2  38 103   0]
 [  0  23   6   2  78  88  13]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       1.00      0.26      0.41       210
           2       0.20      0.16      0.18       210
           3       0.38      0.91      0.54       210
           4       0.50      0.04      0.08       210
           5       0.30      0.60      0.40       210
           6       0.36      0.49      0.41       210
           7       0.76      0.06      0.11       210

    accuracy                           0.36      1470
   macro avg       0.50      0.36      0.30      1470
weighted avg       0.50      0.36      0.30      1470

KNN with 3 Nei

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after MinMax Scaling is: 0.2673469387755102
Confusion Matrix of SVM is:
 [[ 52  20  86   1  10  40   1]
 [  1  17 154   1  15  21   1]
 [  0  14 196   0   0   0   0]
 [  1  38 122   7  10  31   1]
 [  8  21 140   2  23  14   2]
 [  1  47  65   5   8  83   1]
 [  2  26  15   1  22 129  15]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.80      0.25      0.38       210
           2       0.09      0.08      0.09       210
           3       0.25      0.93      0.40       210
           4       0.41      0.03      0.06       210
           5       0.26      0.11      0.15       210
           6       0.26      0.40      0.31       210
           7       0.71      0.07      0.13       210

    accuracy                           0.27      1470
   macro avg       0.40      0.27      0.22      1470
weighted avg       0.40      0.27      0.22      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.1510204081632653
Confusion Matrix of Decision Tree is:
 [[  0   0   1   0   0   0 209]
 [  0   0   3   0   0   0 207]
 [  0   0  12   0   0   0 198]
 [  0   0   0   0   0   0 210]
 [  0   0   0   0   0   0 210]
 [  0   0   0   0   0   0 210]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.75      0.06      0.11       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.14      1.00      0.25       210

    accuracy                           0.15      1470
   macro avg       0.13      0.15      0.05      1470
weighted avg       0.13      0.15      0.05      1470

Decision Tree with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.23469387755102042
Confusion Matrix of Decision Tree is:
 [[  0   1   0   0 169   0  40]
 [  0   3   0   0 153   0  54]
 [  0  12   0   0 182   0  16]
 [  0   0   0   0 177   0  33]
 [  0   0   0   0 179   0  31]
 [  0   0   0   0  81   0 129]
 [  0   0   0   0  47   0 163]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.19      0.01      0.03       210
           3       0.00      0.00      0.00       210
           4       0.00      0.00      0.00       210
           5       0.18      0.85      0.30       210
           6       0.00      0.00      0.00       210
           7       0.35      0.78      0.48       210

    accuracy                           0.23      1470
   macro avg       0.10      0.23      0.12      1470
weighted avg       0.10      0.23      0.12      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.2836734693877551
Confusion Matrix of Decision Tree is:
 [[136   0   1   0  33  32   8]
 [ 67   3   0   0  86  46   8]
 [101   8   4   0  81   4  12]
 [111   0   0   0  66  21  12]
 [ 68   0   0   0 111  17  14]
 [ 65   0   0   0  16  66  63]
 [ 46   0   0   0   1  66  97]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.65      0.34       210
           2       0.27      0.01      0.03       210
           3       0.80      0.02      0.04       210
           4       0.00      0.00      0.00       210
           5       0.28      0.53      0.37       210
           6       0.26      0.31      0.29       210
           7       0.45      0.46      0.46       210

    accuracy                           0.28      1470
   macro avg       0.33      0.28      0.22      1470
weighted avg       0.33      0.28      0.22      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.29183673469387755
Confusion Matrix of Decision Tree is:
 [[136  30   1   0  28   7   8]
 [ 67  73   0   0  59   3   8]
 [101  37  13   0  54   2   3]
 [111  27   0   0  57   3  12]
 [ 68  25   0   0 103   0  14]
 [ 65  62   0   0  13   7  63]
 [ 46  60   0   0   1   6  97]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.65      0.34       210
           2       0.23      0.35      0.28       210
           3       0.93      0.06      0.12       210
           4       0.00      0.00      0.00       210
           5       0.33      0.49      0.39       210
           6       0.25      0.03      0.06       210
           7       0.47      0.46      0.47       210

    accuracy                           0.29      1470
   macro avg       0.35      0.29      0.24      1470
weighted avg       0.35      0.29      0.24      1470

Decision Tree with 5 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.282312925170068
Confusion Matrix of Decision Tree is:
 [[135   7   2  27   3  28   8]
 [ 66  30   1  55   4  46   8]
 [ 92  34  22  55   0   4   3]
 [108  10   3  51   6  20  12]
 [ 68   8   0  88  17  15  14]
 [ 61   4   4  15   0  63  63]
 [ 39   2   7   1   0  64  97]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.24      0.64      0.35       210
           2       0.32      0.14      0.20       210
           3       0.56      0.10      0.18       210
           4       0.17      0.24      0.20       210
           5       0.57      0.08      0.14       210
           6       0.26      0.30      0.28       210
           7       0.47      0.46      0.47       210

    accuracy                           0.28      1470
   macro avg       0.37      0.28      0.26      1470
weighted avg       0.37      0.28      0.26      1470

Decision Tree with 6 max_depth
Ac

Accuracy of Decision Tree after MinMax Scaling is: 0.23197278911564626
Confusion Matrix of Decision Tree is:
 [[92  9 16 22 34 23 14]
 [47 22 16 33 51 36  5]
 [76  9 38 20 52  9  6]
 [69  9 27 16 39 35 15]
 [39 15 19 14 73 40 10]
 [49 22 12 31 15 42 39]
 [35 27  5 24 10 51 58]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.44      0.30       210
           2       0.19      0.10      0.14       210
           3       0.29      0.18      0.22       210
           4       0.10      0.08      0.09       210
           5       0.27      0.35      0.30       210
           6       0.18      0.20      0.19       210
           7       0.39      0.28      0.32       210

    accuracy                           0.23      1470
   macro avg       0.24      0.23      0.22      1470
weighted avg       0.24      0.23      0.22      1470

Decision Tree with 14 max_depth
Accuracy of Decision Tree after MinMax Scaling i

Accuracy of Random Forest after MinMax Scaling is: 0.2748299319727891
Confusion Matrix of Random Forest is:
 [[  3   0  66   4  58   0  79]
 [  1   0 123   1  29   0  56]
 [  0   0 176   3   4   0  27]
 [  1   1 106   1  56   0  45]
 [  3   1 127   0  43   0  36]
 [  3   1  28   2  28   0 148]
 [  7   1   7   0  14   0 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.17      0.01      0.03       210
           2       0.00      0.00      0.00       210
           3       0.28      0.84      0.42       210
           4       0.09      0.00      0.01       210
           5       0.19      0.20      0.19       210
           6       0.00      0.00      0.00       210
           7       0.32      0.86      0.46       210

    accuracy                           0.27      1470
   macro avg       0.15      0.27      0.16      1470
weighted avg       0.15      0.27      0.16      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after MinMax Scaling is: 0.2979591836734694
Confusion Matrix of Random Forest is:
 [[ 34   0  66  11  46   0  53]
 [  4   0 119   8  24   0  55]
 [  4   0 173   2   4   0  27]
 [  6   1 108  32  21   0  42]
 [  8   0 127  20  23   0  32]
 [ 18   0  27  12  18   0 135]
 [ 15   0   7   1  11   0 176]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.38      0.16      0.23       210
           2       0.00      0.00      0.00       210
           3       0.28      0.82      0.41       210
           4       0.37      0.15      0.22       210
           5       0.16      0.11      0.13       210
           6       0.00      0.00      0.00       210
           7       0.34      0.84      0.48       210

    accuracy                           0.30      1470
   macro avg       0.22      0.30      0.21      1470
weighted avg       0.22      0.30      0.21      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after MinMax Scaling is: 0.37482993197278913
Confusion Matrix of Random Forest is:
 [[102  24  18  22   7  13  24]
 [ 34  49  62   9   4   9  43]
 [ 21  10 158   5   0   3  13]
 [ 41  46  34  44   7   3  35]
 [ 35  85  17  24  17   5  27]
 [ 49  10  14   7   2  17 111]
 [ 32   3   4   1   0   6 164]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.32      0.49      0.39       210
           2       0.22      0.23      0.22       210
           3       0.51      0.75      0.61       210
           4       0.39      0.21      0.27       210
           5       0.46      0.08      0.14       210
           6       0.30      0.08      0.13       210
           7       0.39      0.78      0.52       210

    accuracy                           0.37      1470
   macro avg       0.37      0.37      0.33      1470
weighted avg       0.37      0.37      0.33      1470

Random Forest with 4 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.43333333333333335
Confusion Matrix of Random Forest is:
 [[ 84  10   9  47  30  19  11]
 [ 18  43  56  43  16  24  10]
 [  8  16 152  20   3  11   0]
 [ 14  15  24  85  30  20  22]
 [ 16  16  16  51  73  17  21]
 [ 24   7   6  27  10  75  61]
 [ 18   0   3  12   1  51 125]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.46      0.40      0.43       210
           2       0.40      0.20      0.27       210
           3       0.57      0.72      0.64       210
           4       0.30      0.40      0.34       210
           5       0.45      0.35      0.39       210
           6       0.35      0.36      0.35       210
           7       0.50      0.60      0.54       210

    accuracy                           0.43      1470
   macro avg       0.43      0.43      0.42      1470
weighted avg       0.43      0.43      0.42      1470

Random Forest with 12 max_depth

Accuracy of Random Forest after MinMax Scaling is: 0.4170068027210884
Confusion Matrix of Random Forest is:
 [[ 76  17   6  45  19  40   7]
 [ 11  65  28  51  17  34   4]
 [  6  23 156  11   7   7   0]
 [ 10  27  14  85  25  32  17]
 [ 13  47   9  49  48  31  13]
 [ 20   9  10  29   2 111  29]
 [ 12   2   3  12   4 105  72]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.51      0.36      0.42       210
           2       0.34      0.31      0.33       210
           3       0.69      0.74      0.72       210
           4       0.30      0.40      0.35       210
           5       0.39      0.23      0.29       210
           6       0.31      0.53      0.39       210
           7       0.51      0.34      0.41       210

    accuracy                           0.42      1470
   macro avg       0.44      0.42      0.41      1470
weighted avg       0.44      0.42      0.41      1470

Random Forest with 20 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_ndisbert.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.7231292517006803
Confusion Matrix of Logistic Regression is:
 [[175   6   1  10  12   4   2]
 [  4 156  11  22   9   5   3]
 [  0   2 191  16   1   0   0]
 [  5   5   7 175   2   6  10]
 [ 41  14  10  34 102   2   7]
 [  7  15   6  25   2  64  91]
 [  2   0   1   3   1   3 200]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.75      0.83      0.79       210
           2       0.79      0.74      0.76       210
           3       0.84      0.91      0.87       210
           4       0.61      0.83      0.71       210
           5       0.79      0.49      0.60       210
           6       0.76      0.30      0.44       210
           7       0.64      0.95      0.76       210

    accuracy                           0.72      1470
   macro avg       0.74      0.72      0.71      1470
weighted avg       0.74      0.72      0.71      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.7217687074829932
Confusion Matrix of SVM is:
 [[172   3   0   6  23   3   3]
 [  7 174   8   9   8   2   2]
 [  0  11 190   4   1   4   0]
 [  8  17  12 152   5   8   8]
 [ 40  14   8  32 107   1   8]
 [ 13  25   3  15   0  67  87]
 [  1   0   0   2   3   5 199]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.71      0.82      0.76       210
           2       0.71      0.83      0.77       210
           3       0.86      0.90      0.88       210
           4       0.69      0.72      0.71       210
           5       0.73      0.51      0.60       210
           6       0.74      0.32      0.45       210
           7       0.65      0.95      0.77       210

    accuracy                           0.72      1470
   macro avg       0.73      0.72      0.70      1470
weighted avg       0.73      0.72      0.70      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.2619047619047619
Confusion Matrix of Decision Tree is:
 [[  0   0   0   0 190   0  20]
 [  0   0   0   0 177   0  33]
 [  0   0   0   0 203   0   7]
 [  0   0   0   0 190   0  20]
 [  0   0   0   0 201   0   9]
 [  0   0   0   0  63   0 147]
 [  0   0   0   0  26   0 184]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.00      0.00      0.00       210
           4       0.00      0.00      0.00       210
           5       0.19      0.96      0.32       210
           6       0.00      0.00      0.00       210
           7       0.44      0.88      0.58       210

    accuracy                           0.26      1470
   macro avg       0.09      0.26      0.13      1470
weighted avg       0.09      0.26      0.13      1470

Decision Tree with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.37142857142857144
Confusion Matrix of Decision Tree is:
 [[  0   0  82   0 108  12   8]
 [  0   0  44   0 133  20  13]
 [  0   0 172   0  31   6   1]
 [  0   0  64   0 126  10  10]
 [  0   0  53   0 148   6   3]
 [  0   0  17   0  46  62  85]
 [  0   0   1   0  25  20 164]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.40      0.82      0.53       210
           4       0.00      0.00      0.00       210
           5       0.24      0.70      0.36       210
           6       0.46      0.30      0.36       210
           7       0.58      0.78      0.66       210

    accuracy                           0.37      1470
   macro avg       0.24      0.37      0.27      1470
weighted avg       0.24      0.37      0.27      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.4013605442176871
Confusion Matrix of Decision Tree is:
 [[ 78   0  82   0  30  12   8]
 [ 18  11  44   0 115   9  13]
 [  5   0 172   0  26   6   1]
 [ 31   1  64   0  95   9  10]
 [ 41   0  53   0 107   6   3]
 [ 23   4  17   0  23  58  85]
 [ 24   2   1   0   1  18 164]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.37      0.36       210
           2       0.61      0.05      0.10       210
           3       0.40      0.82      0.53       210
           4       0.00      0.00      0.00       210
           5       0.27      0.51      0.35       210
           6       0.49      0.28      0.35       210
           7       0.58      0.78      0.66       210

    accuracy                           0.40      1470
   macro avg       0.39      0.40      0.34      1470
weighted avg       0.39      0.40      0.34      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.4496598639455782
Confusion Matrix of Decision Tree is:
 [[ 78   0  81   1  30  13   7]
 [ 18  87  40   2  41   9  13]
 [  5  10 162   0  26   6   1]
 [ 31  14  50  10  86  11   8]
 [ 41  10  45   3 102   6   3]
 [ 23  11  16   2  15  67  76]
 [ 24   3   1   0   0  27 155]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.37      0.36       210
           2       0.64      0.41      0.50       210
           3       0.41      0.77      0.54       210
           4       0.56      0.05      0.09       210
           5       0.34      0.49      0.40       210
           6       0.48      0.32      0.38       210
           7       0.59      0.74      0.66       210

    accuracy                           0.45      1470
   macro avg       0.48      0.45      0.42      1470
weighted avg       0.48      0.45      0.42      1470

Decision Tree with 5 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.45918367346938777
Confusion Matrix of Decision Tree is:
 [[102  11  21  20  36  13   7]
 [ 10  94  23  22  41  17   3]
 [  7   4 172   6  15   6   0]
 [ 37  26  19  65  36  17  10]
 [ 49  26  19  41  61   9   5]
 [ 13  26  16  26   9  75  45]
 [  2   9   8   7  14  64 106]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.49      0.47       210
           2       0.48      0.45      0.46       210
           3       0.62      0.82      0.70       210
           4       0.35      0.31      0.33       210
           5       0.29      0.29      0.29       210
           6       0.37      0.36      0.36       210
           7       0.60      0.50      0.55       210

    accuracy                           0.46      1470
   macro avg       0.45      0.46      0.45      1470
weighted avg       0.45      0.46      0.45      1470

Decision Tree with 13 max_depth

Accuracy of Decision Tree after MinMax Scaling is: 0.45170068027210886
Confusion Matrix of Decision Tree is:
 [[115   7  14  20  28  18   8]
 [  9  76  30  32  31  28   4]
 [ 14   3 171   6  10   5   1]
 [ 43  24  16  79  16  24   8]
 [ 54  29  16  42  44  13  12]
 [ 17  31  11  16  14  74  47]
 [  4   8   8  11  10  64 105]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.55      0.49       210
           2       0.43      0.36      0.39       210
           3       0.64      0.81      0.72       210
           4       0.38      0.38      0.38       210
           5       0.29      0.21      0.24       210
           6       0.33      0.35      0.34       210
           7       0.57      0.50      0.53       210

    accuracy                           0.45      1470
   macro avg       0.44      0.45      0.44      1470
weighted avg       0.44      0.45      0.44      1470

Random Forest with 1 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6571428571428571
Confusion Matrix of Random Forest is:
 [[135   7   2  12  25  25   4]
 [  3 141   9  16  19  18   4]
 [  9   5 173  12   4   7   0]
 [  6  13  10 123  23  30   5]
 [ 28  20   5  34  98  20   5]
 [ 13   3   0  18   2 113  61]
 [  1   1   0   0   1  24 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.64      0.67       210
           2       0.74      0.67      0.70       210
           3       0.87      0.82      0.85       210
           4       0.57      0.59      0.58       210
           5       0.57      0.47      0.51       210
           6       0.48      0.54      0.51       210
           7       0.70      0.87      0.78       210

    accuracy                           0.66      1470
   macro avg       0.66      0.66      0.66      1470
weighted avg       0.66      0.66      0.66      1470

Random Forest with 9 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6693877551020408
Confusion Matrix of Random Forest is:
 [[143   6   1  12  22  22   4]
 [  2 143   7  17  17  21   3]
 [  9   3 177  11   3   7   0]
 [  7   8  13 136  14  29   3]
 [ 29  21   5  33  99  17   6]
 [ 17   3   0  17   2 124  47]
 [  1   1   0   0   2  44 162]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.68      0.68       210
           2       0.77      0.68      0.72       210
           3       0.87      0.84      0.86       210
           4       0.60      0.65      0.62       210
           5       0.62      0.47      0.54       210
           6       0.47      0.59      0.52       210
           7       0.72      0.77      0.74       210

    accuracy                           0.67      1470
   macro avg       0.68      0.67      0.67      1470
weighted avg       0.68      0.67      0.67      1470

Random Forest with 17 max_depth


In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_vbert.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.6387755102040816
Confusion Matrix of Logistic Regression is:
 [[187   2   0   2   1  18   0]
 [  5 164  10   5   1  24   1]
 [  0   1 194   3   0  12   0]
 [  9  11   8 109   1  72   0]
 [104  30   7  17  21  28   3]
 [  5   4   1   4   1 189   6]
 [  5   1   0   0   0 129  75]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.59      0.89      0.71       210
           2       0.77      0.78      0.78       210
           3       0.88      0.92      0.90       210
           4       0.78      0.52      0.62       210
           5       0.84      0.10      0.18       210
           6       0.40      0.90      0.55       210
           7       0.88      0.36      0.51       210

    accuracy                           0.64      1470
   macro avg       0.74      0.64      0.61      1470
weighted avg       0.74      0.64      0.61      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.6482993197278911
Confusion Matrix of SVM is:
 [[199   0   0   1   3   7   0]
 [  7 155  16   5   4  22   1]
 [  1   1 201   3   0   4   0]
 [ 18   9  15 131   1  36   0]
 [125  22   8   8  31  16   0]
 [ 14   5   6   5   1 175   4]
 [  5   1   0   0   1 142  61]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.54      0.95      0.69       210
           2       0.80      0.74      0.77       210
           3       0.82      0.96      0.88       210
           4       0.86      0.62      0.72       210
           5       0.76      0.15      0.25       210
           6       0.44      0.83      0.57       210
           7       0.92      0.29      0.44       210

    accuracy                           0.65      1470
   macro avg       0.73      0.65      0.62      1470
weighted avg       0.73      0.65      0.62      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3013605442176871
Confusion Matrix of Decision Tree is:
 [[201   0   2   0   0   0   7]
 [183   0   8   8   0   0  11]
 [ 81   0 115   6   0   0   8]
 [174   0  20  11   0   0   5]
 [203   0   4   0   0   0   3]
 [139   0   7   0   0   0  64]
 [ 94   0   0   0   0   0 116]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.19      0.96      0.31       210
           2       0.00      0.00      0.00       210
           3       0.74      0.55      0.63       210
           4       0.44      0.05      0.09       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.54      0.55      0.55       210

    accuracy                           0.30      1470
   macro avg       0.27      0.30      0.23      1470
weighted avg       0.27      0.30      0.23      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.34625850340136055
Confusion Matrix of Decision Tree is:
 [[159   0   2   0  42   5   2]
 [ 72   8   8   0 111  10   1]
 [ 73   2 115   4   8   8   0]
 [ 98   2  18  11  76   4   1]
 [106   0   4   0  97   1   2]
 [ 95   1   6   0  44  42  22]
 [ 54   0   0   0  40  39  77]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.24      0.76      0.37       210
           2       0.62      0.04      0.07       210
           3       0.75      0.55      0.63       210
           4       0.73      0.05      0.10       210
           5       0.23      0.46      0.31       210
           6       0.39      0.20      0.26       210
           7       0.73      0.37      0.49       210

    accuracy                           0.35      1470
   macro avg       0.53      0.35      0.32      1470
weighted avg       0.53      0.35      0.32      1470

Decision Tree with 4 max_depth


Accuracy of Decision Tree after MinMax Scaling is: 0.39727891156462586
Confusion Matrix of Decision Tree is:
 [[ 79  19   5  23  55  23   6]
 [ 11  90  46  14  17  24   8]
 [  7  37 122  15  10   5  14]
 [ 12  32  29  58  46  26   7]
 [ 23  19  22  31  93  14   8]
 [ 18  30  20  33  33  49  27]
 [ 10  13  12  24  21  37  93]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.38      0.43       210
           2       0.38      0.43      0.40       210
           3       0.48      0.58      0.52       210
           4       0.29      0.28      0.28       210
           5       0.34      0.44      0.38       210
           6       0.28      0.23      0.25       210
           7       0.57      0.44      0.50       210

    accuracy                           0.40      1470
   macro avg       0.40      0.40      0.40      1470
weighted avg       0.40      0.40      0.40      1470

Decision Tree with 12 max_depth

Accuracy of Decision Tree after MinMax Scaling is: 0.3727891156462585
Confusion Matrix of Decision Tree is:
 [[ 66  15  20  41  41  25   2]
 [ 11  80  21  19  26  49   4]
 [  4  50 118  16  10   7   5]
 [ 14  24  29  73  23  40   7]
 [ 33  27  15  46  68  20   1]
 [  9  21  19  46  27  60  28]
 [  8  10   1  25  19  64  83]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.31      0.37       210
           2       0.35      0.38      0.37       210
           3       0.53      0.56      0.55       210
           4       0.27      0.35      0.31       210
           5       0.32      0.32      0.32       210
           6       0.23      0.29      0.25       210
           7       0.64      0.40      0.49       210

    accuracy                           0.37      1470
   macro avg       0.40      0.37      0.38      1470
weighted avg       0.40      0.37      0.38      1470

Decision Tree with 20 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6659863945578232
Confusion Matrix of Random Forest is:
 [[139   3   0   4  41  20   3]
 [  4 143  11  13  21  16   2]
 [  8   5 166  12   5  14   0]
 [ 14   5   8 118  36  20   9]
 [ 27  14   0  12 134  13  10]
 [  8   6   8  20   9 112  47]
 [  2   1   0   2   9  29 167]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.66      0.67       210
           2       0.81      0.68      0.74       210
           3       0.86      0.79      0.82       210
           4       0.65      0.56      0.60       210
           5       0.53      0.64      0.58       210
           6       0.50      0.53      0.52       210
           7       0.70      0.80      0.75       210

    accuracy                           0.67      1470
   macro avg       0.68      0.67      0.67      1470
weighted avg       0.68      0.67      0.67      1470

Random Forest with 8 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6503401360544218
Confusion Matrix of Random Forest is:
 [[145   2   0   5  34  23   1]
 [  2 143   8  15  24  17   1]
 [  5   3 174  13   4  11   0]
 [ 17   4  10 121  31  25   2]
 [ 34  11   2  13 131  15   4]
 [ 13   3   8  16  11 133  26]
 [  5   2   0   3   8  83 109]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.66      0.69      0.67       210
           2       0.85      0.68      0.76       210
           3       0.86      0.83      0.84       210
           4       0.65      0.58      0.61       210
           5       0.54      0.62      0.58       210
           6       0.43      0.63      0.51       210
           7       0.76      0.52      0.62       210

    accuracy                           0.65      1470
   macro avg       0.68      0.65      0.66      1470
weighted avg       0.68      0.65      0.66      1470

Random Forest with 16 max_depth


In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//gpt_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")

# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.6829931972789116
Confusion Matrix of Logistic Regression is:
 [[119  13   0   5  32  36   5]
 [  0 185   3   5   4   9   4]
 [  0  50 143  11   1   5   0]
 [  0  13   2 162   2  24   7]
 [ 12  51   4  20  99  13  11]
 [  4  16   3  19   0 129  39]
 [  0   6   0   1   3  33 167]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.88      0.57      0.69       210
           2       0.55      0.88      0.68       210
           3       0.92      0.68      0.78       210
           4       0.73      0.77      0.75       210
           5       0.70      0.47      0.56       210
           6       0.52      0.61      0.56       210
           7       0.72      0.80      0.75       210

    accuracy                           0.68      1470
   macro avg       0.72      0.68      0.68      1470
weighted avg       0.72      0.68      0.68      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.6843537414965987
Confusion Matrix of SVM is:
 [[166   9   1   6   6  21   1]
 [  2 185   6   3   3   8   3]
 [  1   7 196   1   0   5   0]
 [  5  31  13 123   0  35   3]
 [ 62  39   7  21  58  19   4]
 [  8  26   6  11   0 119  40]
 [  4   8   1   1   4  33 159]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.67      0.79      0.72       210
           2       0.61      0.88      0.72       210
           3       0.85      0.93      0.89       210
           4       0.74      0.59      0.65       210
           5       0.82      0.28      0.41       210
           6       0.50      0.57      0.53       210
           7       0.76      0.76      0.76       210

    accuracy                           0.68      1470
   macro avg       0.71      0.68      0.67      1470
weighted avg       0.71      0.68      0.67      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.24625850340136055
Confusion Matrix of Decision Tree is:
 [[  0   0  38   0   0   0 172]
 [  0   0  54   0   0   0 156]
 [  0   0 175   0   0   0  35]
 [  0   0  46   0   0   0 164]
 [  0   0  26   0   0   0 184]
 [  0   0  40   0   0   0 170]
 [  0   0  23   0   0   0 187]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.44      0.83      0.57       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.18      0.89      0.29       210

    accuracy                           0.25      1470
   macro avg       0.09      0.25      0.12      1470
weighted avg       0.09      0.25      0.12      1470

Decision Tree with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3224489795918367
Confusion Matrix of Decision Tree is:
 [[  2   0  36   0  86   0  86]
 [  2   0  52   0 122   0  34]
 [  1   0 174   0  16   0  19]
 [  3   0  43   0  92   0  72]
 [  1   0  25   0 141   0  43]
 [  9   0  31   0  39   0 131]
 [ 15   0   8   0  30   0 157]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.06      0.01      0.02       210
           2       0.00      0.00      0.00       210
           3       0.47      0.83      0.60       210
           4       0.00      0.00      0.00       210
           5       0.27      0.67      0.38       210
           6       0.00      0.00      0.00       210
           7       0.29      0.75      0.42       210

    accuracy                           0.32      1470
   macro avg       0.16      0.32      0.20      1470
weighted avg       0.16      0.32      0.20      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3870748299319728
Confusion Matrix of Decision Tree is:
 [[ 49   7  32   0  79   5  38]
 [  3  76  50   0  46   4  31]
 [  4   3 167   0  13   8  15]
 [ 27  20  36   0  72   9  46]
 [ 14  20  17   0 121   9  29]
 [ 29   9  25   0  30  12 105]
 [ 18   4   5   0  26  13 144]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.23      0.28       210
           2       0.55      0.36      0.44       210
           3       0.50      0.80      0.62       210
           4       0.00      0.00      0.00       210
           5       0.31      0.58      0.41       210
           6       0.20      0.06      0.09       210
           7       0.35      0.69      0.47       210

    accuracy                           0.39      1470
   macro avg       0.32      0.39      0.33      1470
weighted avg       0.32      0.39      0.33      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3741496598639456
Confusion Matrix of Decision Tree is:
 [[ 90  11  12  49  17   1  30]
 [ 17  71  26  52  31   2  11]
 [ 15  12 106  67   6   1   3]
 [ 38   8   5  70  46   3  40]
 [ 61  11   5  33  71   2  27]
 [ 25  25   9  49  14   7  81]
 [ 13  12   3  15  17  15 135]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.43      0.38       210
           2       0.47      0.34      0.39       210
           3       0.64      0.50      0.56       210
           4       0.21      0.33      0.26       210
           5       0.35      0.34      0.34       210
           6       0.23      0.03      0.06       210
           7       0.41      0.64      0.50       210

    accuracy                           0.37      1470
   macro avg       0.38      0.37      0.36      1470
weighted avg       0.38      0.37      0.36      1470

Decision Tree with 5 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.3816326530612245
Confusion Matrix of Decision Tree is:
 [[90 17  8 24 24 35 12]
 [12 84 21 31 28 24 10]
 [10 49 94 33  6 16  2]
 [29 32 13 72 27 24 13]
 [30 30 10 40 65 21 14]
 [27 27 16 28 19 66 27]
 [18 19  6 25  9 43 90]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.42      0.43      0.42       210
           2       0.33      0.40      0.36       210
           3       0.56      0.45      0.50       210
           4       0.28      0.34      0.31       210
           5       0.37      0.31      0.34       210
           6       0.29      0.31      0.30       210
           7       0.54      0.43      0.48       210

    accuracy                           0.38      1470
   macro avg       0.40      0.38      0.39      1470
weighted avg       0.40      0.38      0.39      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after MinMax Scaling is

Accuracy of Decision Tree after MinMax Scaling is: 0.3877551020408163
Confusion Matrix of Decision Tree is:
 [[ 84  20   7  23  27  34  15]
 [ 11 101  18  15  32  22  11]
 [ 13  35 118  17   5  18   4]
 [ 30  33  10  62  34  25  16]
 [ 32  32  10  38  68  22   8]
 [ 25  40  14  22  25  51  33]
 [ 25  17   6  18  15  43  86]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.40      0.39       210
           2       0.36      0.48      0.41       210
           3       0.64      0.56      0.60       210
           4       0.32      0.30      0.31       210
           5       0.33      0.32      0.33       210
           6       0.24      0.24      0.24       210
           7       0.50      0.41      0.45       210

    accuracy                           0.39      1470
   macro avg       0.40      0.39      0.39      1470
weighted avg       0.40      0.39      0.39      1470

Random Forest with 1 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6659863945578232
Confusion Matrix of Random Forest is:
 [[117   1   0  15  31  39   7]
 [  0 156   7  12  12  21   2]
 [  1   2 174  11   1  21   0]
 [  2   5  10 140   6  34  13]
 [ 18  19   0  32  99  26  16]
 [  4   7   4  15   5 127  48]
 [  0   1   0   1   3  39 166]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.56      0.66       210
           2       0.82      0.74      0.78       210
           3       0.89      0.83      0.86       210
           4       0.62      0.67      0.64       210
           5       0.63      0.47      0.54       210
           6       0.41      0.60      0.49       210
           7       0.66      0.79      0.72       210

    accuracy                           0.67      1470
   macro avg       0.69      0.67      0.67      1470
weighted avg       0.69      0.67      0.67      1470

Random Forest with 9 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6598639455782312
Confusion Matrix of Random Forest is:
 [[115   1   0  15  29  38  12]
 [  0 158   6  15  15  13   3]
 [  2   1 183  10   1  12   1]
 [  3   8   6 133  17  33  10]
 [ 14  22   0  27 110  24  13]
 [  6   8   5  14   7 124  46]
 [  0   3   0   2   3  55 147]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.55      0.66       210
           2       0.79      0.75      0.77       210
           3       0.92      0.87      0.89       210
           4       0.62      0.63      0.62       210
           5       0.60      0.52      0.56       210
           6       0.41      0.59      0.49       210
           7       0.63      0.70      0.67       210

    accuracy                           0.66      1470
   macro avg       0.68      0.66      0.67      1470
weighted avg       0.68      0.66      0.67      1470

Random Forest with 17 max_depth


In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//xlm_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.7081632653061225
Confusion Matrix of Logistic Regression is:
 [[187   0   0   2  10   9   2]
 [  9 170  10   6   3  12   0]
 [  2   3 197   0   0   8   0]
 [ 25  17  12 106  12  29   9]
 [ 66  23   5   7  98   4   7]
 [ 22  11   6   5   0 102  64]
 [  6   1   0   0   1  21 181]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.59      0.89      0.71       210
           2       0.76      0.81      0.78       210
           3       0.86      0.94      0.90       210
           4       0.84      0.50      0.63       210
           5       0.79      0.47      0.59       210
           6       0.55      0.49      0.52       210
           7       0.69      0.86      0.77       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.70      1470
weighted avg       0.72      0.71      0.70      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.719047619047619
Confusion Matrix of SVM is:
 [[184   0   0   3  11  10   2]
 [  4 173   7  11  11   3   1]
 [  0   5 190   6   3   6   0]
 [ 20   9   8 143   8  18   4]
 [ 68  19   5  11 102   2   3]
 [ 22  18   5  12   2  99  52]
 [  4   4   0   3   2  31 166]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.61      0.88      0.72       210
           2       0.76      0.82      0.79       210
           3       0.88      0.90      0.89       210
           4       0.76      0.68      0.72       210
           5       0.73      0.49      0.58       210
           6       0.59      0.47      0.52       210
           7       0.73      0.79      0.76       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.71      1470
weighted avg       0.72      0.72      0.71      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scalin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.308843537414966
Confusion Matrix of Decision Tree is:
 [[  0   0   0   0 168   0  42]
 [  0   0   1   0 166   0  43]
 [  0   0  74   2 107   0  27]
 [  0   0   3   2 126   0  79]
 [  0   0   2   0 174   0  34]
 [  0   0   1   0  28   0 181]
 [  0   0   1   0   5   0 204]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.90      0.35      0.51       210
           4       0.50      0.01      0.02       210
           5       0.22      0.83      0.35       210
           6       0.00      0.00      0.00       210
           7       0.33      0.97      0.50       210

    accuracy                           0.31      1470
   macro avg       0.28      0.31      0.20      1470
weighted avg       0.28      0.31      0.20      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.4122448979591837
Confusion Matrix of Decision Tree is:
 [[144  24   0   0   0  34   8]
 [ 12 154   1   0   0  41   2]
 [ 13  94  40   2   0  61   0]
 [ 41  85   1   2   0  74   7]
 [108  66   2   0   0  20  14]
 [ 13  15   1   0   0 129  52]
 [  5   0   0   0   0  68 137]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.43      0.69      0.53       210
           2       0.35      0.73      0.48       210
           3       0.89      0.19      0.31       210
           4       0.50      0.01      0.02       210
           5       0.00      0.00      0.00       210
           6       0.30      0.61      0.41       210
           7       0.62      0.65      0.64       210

    accuracy                           0.41      1470
   macro avg       0.44      0.41      0.34      1470
weighted avg       0.44      0.41      0.34      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.44693877551020406
Confusion Matrix of Decision Tree is:
 [[ 85   1   0  23  61  34   6]
 [  3  92   1  62   9  41   2]
 [ 10  52  40  58   3  47   0]
 [  3  12   1  75  38  74   7]
 [ 17  30   0  36 103  20   4]
 [  0   6   1   9  14 129  51]
 [  0   0   0   0   9  68 133]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.72      0.40      0.52       210
           2       0.48      0.44      0.46       210
           3       0.93      0.19      0.32       210
           4       0.29      0.36      0.32       210
           5       0.43      0.49      0.46       210
           6       0.31      0.61      0.41       210
           7       0.66      0.63      0.64       210

    accuracy                           0.45      1470
   macro avg       0.54      0.45      0.45      1470
weighted avg       0.54      0.45      0.45      1470

Decision Tree with 5 max_depth


Accuracy of Decision Tree after MinMax Scaling is: 0.47346938775510206
Confusion Matrix of Decision Tree is:
 [[104  13  12  20  36  20   5]
 [  6 123  11  14  37  15   4]
 [ 10  10 119  46   6  18   1]
 [ 14  29   9  71  38  38  11]
 [ 36  29   7  40  80  12   6]
 [ 10  17   7  30   7  92  47]
 [  8  10   4  20   7  54 107]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.50      0.52       210
           2       0.53      0.59      0.56       210
           3       0.70      0.57      0.63       210
           4       0.29      0.34      0.31       210
           5       0.38      0.38      0.38       210
           6       0.37      0.44      0.40       210
           7       0.59      0.51      0.55       210

    accuracy                           0.47      1470
   macro avg       0.49      0.47      0.48      1470
weighted avg       0.49      0.47      0.48      1470

Decision Tree with 13 max_depth

Accuracy of Decision Tree after MinMax Scaling is: 0.46258503401360546
Confusion Matrix of Decision Tree is:
 [[101  21   5  22  33  24   4]
 [  7 126  17  15  27  12   6]
 [  4  11 124  47   5  18   1]
 [ 10  24  13  75  42  35  11]
 [ 37  29  10  42  73  12   7]
 [ 11  16  16  37   5  78  47]
 [  8   7   7  21   7  57 103]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.57      0.48      0.52       210
           2       0.54      0.60      0.57       210
           3       0.65      0.59      0.62       210
           4       0.29      0.36      0.32       210
           5       0.38      0.35      0.36       210
           6       0.33      0.37      0.35       210
           7       0.58      0.49      0.53       210

    accuracy                           0.46      1470
   macro avg       0.48      0.46      0.47      1470
weighted avg       0.48      0.46      0.47      1470

Random Forest with 1 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6836734693877551
Confusion Matrix of Random Forest is:
 [[125   0   0   3  43  31   8]
 [  1 146   5  20  16  20   2]
 [  1   5 172  16   1  15   0]
 [  1   4   3 130  16  41  15]
 [ 19  21   0  21 131   7  11]
 [  1   4   0  12   7 125  61]
 [  0   0   0   2   1  31 176]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.84      0.60      0.70       210
           2       0.81      0.70      0.75       210
           3       0.96      0.82      0.88       210
           4       0.64      0.62      0.63       210
           5       0.61      0.62      0.62       210
           6       0.46      0.60      0.52       210
           7       0.64      0.84      0.73       210

    accuracy                           0.68      1470
   macro avg       0.71      0.68      0.69      1470
weighted avg       0.71      0.68      0.69      1470

Random Forest with 9 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.7068027210884353
Confusion Matrix of Random Forest is:
 [[138   0   0   7  35  21   9]
 [  1 156   7  14  15  16   1]
 [  1   2 182  14   1  10   0]
 [  1   2   3 136  11  39  18]
 [ 21  23   2  22 128   5   9]
 [  1   3   1  14   5 119  67]
 [  0   0   0   0   0  30 180]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.85      0.66      0.74       210
           2       0.84      0.74      0.79       210
           3       0.93      0.87      0.90       210
           4       0.66      0.65      0.65       210
           5       0.66      0.61      0.63       210
           6       0.50      0.57      0.53       210
           7       0.63      0.86      0.73       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.71      1470
weighted avg       0.72      0.71      0.71      1470

Random Forest with 17 max_depth


### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//bert_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.5945578231292517
Confusion Matrix of Logistic Regression is:
 [[108   0   0   2  94   2   4]
 [  3 114   9   3  76   1   4]
 [  1   6 161   4  33   1   4]
 [  5   6   8 104  72   0  15]
 [  2   4   3   5 193   0   3]
 [ 10  13   9  22  56  27  73]
 [  2   0   3   6  32   0 167]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.82      0.51      0.63       210
           2       0.80      0.54      0.65       210
           3       0.83      0.77      0.80       210
           4       0.71      0.50      0.58       210
           5       0.35      0.92      0.50       210
           6       0.87      0.13      0.22       210
           7       0.62      0.80      0.70       210

    accuracy                           0.59      1470
   macro avg       0.71      0.59      0.58      1470
weighted avg       0.71      0.59      0.58      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.6503401360544218
Confusion Matrix of SVM is:
 [[133   5   1   3  58   4   6]
 [  5 154  12   5  26   7   1]
 [  3   4 187   2   7   4   3]
 [ 13  14  15 107  35  13  13]
 [ 17  15   9   8 155   1   5]
 [ 18  15   8  19  29  64  57]
 [  5   2   1   4  24  18 156]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.69      0.63      0.66       210
           2       0.74      0.73      0.74       210
           3       0.80      0.89      0.84       210
           4       0.72      0.51      0.60       210
           5       0.46      0.74      0.57       210
           6       0.58      0.30      0.40       210
           7       0.65      0.74      0.69       210

    accuracy                           0.65      1470
   macro avg       0.66      0.65      0.64      1470
weighted avg       0.66      0.65      0.64      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.27687074829931974
Confusion Matrix of Decision Tree is:
 [[ 13  32   0   0   0   0 165]
 [  2 120   9   0   0   0  79]
 [  3  50  81   0   0   0  76]
 [  0  49   2   0   0   0 159]
 [  1  52   0   0   0   0 157]
 [  4  37   5   0   0   0 164]
 [  1  16   0   0   0   0 193]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.54      0.06      0.11       210
           2       0.34      0.57      0.42       210
           3       0.84      0.39      0.53       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.19      0.92      0.32       210

    accuracy                           0.28      1470
   macro avg       0.27      0.28      0.20      1470
weighted avg       0.27      0.28      0.20      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3217687074829932
Confusion Matrix of Decision Tree is:
 [[127  34   0   0   0   0  49]
 [ 57 123   8   0   0   0  22]
 [ 64  53  81   0   0   0  12]
 [ 85  49   2   0   0   0  74]
 [103  53   0   0   0   0  54]
 [ 70  42   4   0   0   0  94]
 [ 51  17   0   0   0   0 142]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.60      0.33       210
           2       0.33      0.59      0.42       210
           3       0.85      0.39      0.53       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.32      0.68      0.43       210

    accuracy                           0.32      1470
   macro avg       0.25      0.32      0.25      1470
weighted avg       0.25      0.32      0.25      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3163265306122449
Confusion Matrix of Decision Tree is:
 [[125  11   2  28  23   0  21]
 [ 56  73   7  16  50   2   6]
 [ 49  25  95   9  25   4   3]
 [ 84  17   3  48  32   0  26]
 [103  19   0  27  34   0  27]
 [ 69  24   5  45  16   2  49]
 [ 51   5   0  54  11   1  88]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.60      0.33       210
           2       0.42      0.35      0.38       210
           3       0.85      0.45      0.59       210
           4       0.21      0.23      0.22       210
           5       0.18      0.16      0.17       210
           6       0.22      0.01      0.02       210
           7       0.40      0.42      0.41       210

    accuracy                           0.32      1470
   macro avg       0.36      0.32      0.30      1470
weighted avg       0.36      0.32      0.30      1470

Decision Tree with 5 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.36802721088435375
Confusion Matrix of Decision Tree is:
 [[ 66  22   6  24  31  54   7]
 [ 20  91  19  25  24  22   9]
 [  4  12 128  15  18  30   3]
 [ 29  20   7  58  28  50  18]
 [ 36  26   4  23  70  37  14]
 [ 14  20  13  38  29  73  23]
 [ 15  11   9  30  16  74  55]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.36      0.31      0.34       210
           2       0.45      0.43      0.44       210
           3       0.69      0.61      0.65       210
           4       0.27      0.28      0.27       210
           5       0.32      0.33      0.33       210
           6       0.21      0.35      0.27       210
           7       0.43      0.26      0.32       210

    accuracy                           0.37      1470
   macro avg       0.39      0.37      0.37      1470
weighted avg       0.39      0.37      0.37      1470

Decision Tree with 13 max_depth

Accuracy of Decision Tree after MinMax Scaling is: 0.34217687074829933
Confusion Matrix of Decision Tree is:
 [[ 58  13  13  38  33  40  15]
 [ 19  75  17  35  36  16  12]
 [ 15  18 136  10   8  19   4]
 [ 30  22  11  57  35  33  22]
 [ 37  23   8  30  67  24  21]
 [ 27  22  11  33  34  57  26]
 [ 22  14   9  22  32  58  53]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.28      0.28      0.28       210
           2       0.40      0.36      0.38       210
           3       0.66      0.65      0.66       210
           4       0.25      0.27      0.26       210
           5       0.27      0.32      0.29       210
           6       0.23      0.27      0.25       210
           7       0.35      0.25      0.29       210

    accuracy                           0.34      1470
   macro avg       0.35      0.34      0.34      1470
weighted avg       0.35      0.34      0.34      1470

Random Forest with 1 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.5340136054421769
Confusion Matrix of Random Forest is:
 [[ 86   6   0  15  59  16  28]
 [  2 134   3  24  35   5   7]
 [  1  22 160  13   3   9   2]
 [  3  22   5  61  40  29  50]
 [ 11  20   1  14 131   3  30]
 [  4  21   4  24  21  56  80]
 [  1   5   1  10  15  21 157]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.80      0.41      0.54       210
           2       0.58      0.64      0.61       210
           3       0.92      0.76      0.83       210
           4       0.38      0.29      0.33       210
           5       0.43      0.62      0.51       210
           6       0.40      0.27      0.32       210
           7       0.44      0.75      0.56       210

    accuracy                           0.53      1470
   macro avg       0.56      0.53      0.53      1470
weighted avg       0.56      0.53      0.53      1470

Random Forest with 9 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.5857142857142857
Confusion Matrix of Random Forest is:
 [[110   7   0  10  45  12  26]
 [  1 138   3  15  29  19   5]
 [  2  23 164   7   3   9   2]
 [  6  13   4  87  33  33  34]
 [ 13  22   2  12 127  10  24]
 [ 21   9   3  21   9  86  61]
 [  2   5   0  12  12  30 149]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.52      0.60       210
           2       0.64      0.66      0.65       210
           3       0.93      0.78      0.85       210
           4       0.53      0.41      0.47       210
           5       0.49      0.60      0.54       210
           6       0.43      0.41      0.42       210
           7       0.50      0.71      0.58       210

    accuracy                           0.59      1470
   macro avg       0.60      0.59      0.59      1470
weighted avg       0.60      0.59      0.59      1470

Random Forest with 17 max_depth


In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.6197278911564625
Confusion Matrix of Logistic Regression is:
 [[ 51   6   3  31  80  31   8]
 [  0 147   8  13  27  11   4]
 [  1   1 174  15   6  13   0]
 [  4   5  11 152  27  10   1]
 [  5   9   3  38 142  10   3]
 [  5  11  10  26  12 112  34]
 [  1   2   0  11  18  45 133]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.76      0.24      0.37       210
           2       0.81      0.70      0.75       210
           3       0.83      0.83      0.83       210
           4       0.53      0.72      0.61       210
           5       0.46      0.68      0.54       210
           6       0.48      0.53      0.51       210
           7       0.73      0.63      0.68       210

    accuracy                           0.62      1470
   macro avg       0.66      0.62      0.61      1470
weighted avg       0.66      0.62      0.61      1470

KNN with 3 Nei

Accuracy of SVM after MinMax Scaling is: 0.6489795918367347
Confusion Matrix of SVM is:
 [[101   8   6  26  50  16   3]
 [  2 163   3  10  16  14   2]
 [  4   6 178   9   5   8   0]
 [ 11  11  10 147  14  16   1]
 [ 23  27   3  34 117   4   2]
 [ 11  17  13  24   5  94  46]
 [  4   2   2   5   9  34 154]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.65      0.48      0.55       210
           2       0.70      0.78      0.73       210
           3       0.83      0.85      0.84       210
           4       0.58      0.70      0.63       210
           5       0.54      0.56      0.55       210
           6       0.51      0.45      0.47       210
           7       0.74      0.73      0.74       210

    accuracy                           0.65      1470
   macro avg       0.65      0.65      0.65      1470
weighted avg       0.65      0.65      0.65      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.21224489795918366
Confusion Matrix of Decision Tree is:
 [[  0   0   9   0   0   0 201]
 [  0   0   9   0   0   0 201]
 [  0   0 102   0   0   0 108]
 [  0   0   9   0   0   0 201]
 [  0   0   0   0   0   0 210]
 [  0   0   6   0   0   0 204]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.76      0.49      0.59       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.16      1.00      0.27       210

    accuracy                           0.21      1470
   macro avg       0.13      0.21      0.12      1470
weighted avg       0.13      0.21      0.12      1470

Decision Tree with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.28503401360544217
Confusion Matrix of Decision Tree is:
 [[  9   0   0   0 186   0  15]
 [  4   0   5   0 157   0  44]
 [ 50   0  52   0 103   0   5]
 [  6   0   3   0 183   0  18]
 [  0   0   0   0 190   0  20]
 [  5   0   1   0  95   0 109]
 [  0   0   0   0  42   0 168]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.12      0.04      0.06       210
           2       0.00      0.00      0.00       210
           3       0.85      0.25      0.38       210
           4       0.00      0.00      0.00       210
           5       0.20      0.90      0.33       210
           6       0.00      0.00      0.00       210
           7       0.44      0.80      0.57       210

    accuracy                           0.29      1470
   macro avg       0.23      0.29      0.19      1470
weighted avg       0.23      0.29      0.19      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3231292517006803
Confusion Matrix of Decision Tree is:
 [[  7  69   2   0 117   9   6]
 [  0 109   4   0  48  41   8]
 [ 46  30  14   0  73  47   0]
 [  6  51   2   0 132  16   3]
 [  0  24   0   0 166   6  14]
 [  5  59   1   0  36  57  52]
 [  0  28   0   0  14  46 122]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.11      0.03      0.05       210
           2       0.29      0.52      0.38       210
           3       0.61      0.07      0.12       210
           4       0.00      0.00      0.00       210
           5       0.28      0.79      0.42       210
           6       0.26      0.27      0.26       210
           7       0.60      0.58      0.59       210

    accuracy                           0.32      1470
   macro avg       0.31      0.32      0.26      1470
weighted avg       0.31      0.32      0.26      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.35578231292517004
Confusion Matrix of Decision Tree is:
 [[ 58  18   0  29  88  12   5]
 [ 28  95   2  10  43  27   5]
 [ 54  22  12  44  68  10   0]
 [ 47  11   2  39  95  13   3]
 [ 14  11   0  12 154  10   9]
 [ 43  32   1  18  18  54  44]
 [ 18  16   0   4  10  51 111]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.22      0.28      0.25       210
           2       0.46      0.45      0.46       210
           3       0.71      0.06      0.11       210
           4       0.25      0.19      0.21       210
           5       0.32      0.73      0.45       210
           6       0.31      0.26      0.28       210
           7       0.63      0.53      0.57       210

    accuracy                           0.36      1470
   macro avg       0.41      0.36      0.33      1470
weighted avg       0.41      0.36      0.33      1470

Decision Tree with 5 max_depth


Accuracy of Decision Tree after MinMax Scaling is: 0.3979591836734694
Confusion Matrix of Decision Tree is:
 [[ 60  13  16  34  53  26   8]
 [ 12 105   9  30  26  18  10]
 [ 61  35  67  13  11  19   4]
 [ 45  16  13  66  47  17   6]
 [ 19  13  12  37 101  14  14]
 [ 20  24   5  23  16  67  55]
 [ 19  12   1   8   9  42 119]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.25      0.29      0.27       210
           2       0.48      0.50      0.49       210
           3       0.54      0.32      0.40       210
           4       0.31      0.31      0.31       210
           5       0.38      0.48      0.43       210
           6       0.33      0.32      0.32       210
           7       0.55      0.57      0.56       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.40      1470
weighted avg       0.41      0.40      0.40      1470

Decision Tree with 13 max_depth


Accuracy of Decision Tree after MinMax Scaling is: 0.3625850340136054
Confusion Matrix of Decision Tree is:
 [[ 58  17  28  31  36  32   8]
 [ 15 104  11  24  19  28   9]
 [ 67  36  67  11   9  17   3]
 [ 42  22  21  55  40  21   9]
 [ 30  22  23  35  68  17  15]
 [ 25  20  11  22  18  69  45]
 [ 13  16   4  11  10  44 112]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.28      0.25       210
           2       0.44      0.50      0.47       210
           3       0.41      0.32      0.36       210
           4       0.29      0.26      0.28       210
           5       0.34      0.32      0.33       210
           6       0.30      0.33      0.32       210
           7       0.56      0.53      0.55       210

    accuracy                           0.36      1470
   macro avg       0.37      0.36      0.36      1470
weighted avg       0.37      0.36      0.36      1470

Random Forest with 1 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.5945578231292517
Confusion Matrix of Random Forest is:
 [[ 86  10   4  23  52  27   8]
 [  0 144   2  14  25  15  10]
 [ 22   4 157  12   8   6   1]
 [ 12   6   3 113  30  33  13]
 [  8  15   2  21 135  11  18]
 [ 10  19   7  19   4  61  90]
 [  1   5   0   0   3  23 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.41      0.49       210
           2       0.71      0.69      0.70       210
           3       0.90      0.75      0.82       210
           4       0.56      0.54      0.55       210
           5       0.53      0.64      0.58       210
           6       0.35      0.29      0.32       210
           7       0.56      0.85      0.67       210

    accuracy                           0.59      1470
   macro avg       0.60      0.59      0.59      1470
weighted avg       0.60      0.59      0.59      1470

Random Forest with 9 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6210884353741497
Confusion Matrix of Random Forest is:
 [[ 93  10   4  21  49  27   6]
 [  4 151   3  11  18  17   6]
 [  7   1 173  14   5  10   0]
 [ 14   5   9 121  30  26   5]
 [ 15  10   3  22 134  12  14]
 [ 12  16   7  22   7  73  73]
 [  2   4   0   2   1  33 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.63      0.44      0.52       210
           2       0.77      0.72      0.74       210
           3       0.87      0.82      0.85       210
           4       0.57      0.58      0.57       210
           5       0.55      0.64      0.59       210
           6       0.37      0.35      0.36       210
           7       0.62      0.80      0.70       210

    accuracy                           0.62      1470
   macro avg       0.62      0.62      0.62      1470
weighted avg       0.62      0.62      0.62      1470

Random Forest with 17 max_depth


In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.726530612244898
Confusion Matrix of Logistic Regression is:
 [[150   3   1   1  27  24   4]
 [  0 177   7   5   5  13   3]
 [  0   7 195   1   1   6   0]
 [  1  17   7 146   4  30   5]
 [ 27  30   9  14 106  17   7]
 [  1  13   6   5   1 155  29]
 [  1   5   0   0   1  64 139]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.83      0.71      0.77       210
           2       0.70      0.84      0.77       210
           3       0.87      0.93      0.90       210
           4       0.85      0.70      0.76       210
           5       0.73      0.50      0.60       210
           6       0.50      0.74      0.60       210
           7       0.74      0.66      0.70       210

    accuracy                           0.73      1470
   macro avg       0.75      0.73      0.73      1470
weighted avg       0.75      0.73      0.73      1470

KNN with 3 Neig

Accuracy of SVM after MinMax Scaling is: 0.7346938775510204
Confusion Matrix of SVM is:
 [[153   5   1   4  34  11   2]
 [  1 183   9   7   5   4   1]
 [  0   2 203   0   2   3   0]
 [  8  20  19 138   6  15   4]
 [ 30  22   6  15 125   5   7]
 [  9  21  12   9   1 120  38]
 [  3   4   1   0   2  42 158]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.75      0.73      0.74       210
           2       0.71      0.87      0.78       210
           3       0.81      0.97      0.88       210
           4       0.80      0.66      0.72       210
           5       0.71      0.60      0.65       210
           6       0.60      0.57      0.59       210
           7       0.75      0.75      0.75       210

    accuracy                           0.73      1470
   macro avg       0.73      0.73      0.73      1470
weighted avg       0.73      0.73      0.73      1470

Working on SVM Kernal: poly
Accuracy of SVM after MinMax Scali

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.2163265306122449
Confusion Matrix of Decision Tree is:
 [[208   0   2   0   0   0   0]
 [198   0  12   0   0   0   0]
 [100   0 110   0   0   0   0]
 [202   0   8   0   0   0   0]
 [209   0   1   0   0   0   0]
 [209   0   1   0   0   0   0]
 [205   0   5   0   0   0   0]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.16      0.99      0.27       210
           2       0.00      0.00      0.00       210
           3       0.79      0.52      0.63       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.00      0.00      0.00       210

    accuracy                           0.22      1470
   macro avg       0.14      0.22      0.13      1470
weighted avg       0.14      0.22      0.13      1470

Decision Tree with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.2829931972789116
Confusion Matrix of Decision Tree is:
 [[170   2   0   0   0   0  38]
 [176   6   6   0   0   0  22]
 [ 91  35  75   0   0   0   9]
 [128   4   4   0   0   0  74]
 [151   1   0   0   0   0  58]
 [103   1   0   0   0   0 106]
 [ 40   4   1   0   0   0 165]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.20      0.81      0.32       210
           2       0.11      0.03      0.05       210
           3       0.87      0.36      0.51       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.35      0.79      0.48       210

    accuracy                           0.28      1470
   macro avg       0.22      0.28      0.19      1470
weighted avg       0.22      0.28      0.19      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.354421768707483
Confusion Matrix of Decision Tree is:
 [[138  32   0  26   0   0  14]
 [ 48 129   4  21   0   0   8]
 [ 53  40  75  41   0   0   1]
 [ 97  31   4  69   0   0   9]
 [117  35   0  34   0   0  24]
 [ 76  28   0  43   0   0  63]
 [ 33   7   1  59   0   0 110]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.25      0.66      0.36       210
           2       0.43      0.61      0.50       210
           3       0.89      0.36      0.51       210
           4       0.24      0.33      0.27       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.48      0.52      0.50       210

    accuracy                           0.35      1470
   macro avg       0.33      0.35      0.31      1470
weighted avg       0.33      0.35      0.31      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.37891156462585035
Confusion Matrix of Decision Tree is:
 [[ 42  12  20 114   0   6  16]
 [  1 102  33  59   0   1  14]
 [  1   7 108  58   0   1  35]
 [  0  16  19 143   0   6  26]
 [  6  22  13 143   0   9  17]
 [  9  12  16  94   0  22  57]
 [  0   4   4  51   0  11 140]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.71      0.20      0.31       210
           2       0.58      0.49      0.53       210
           3       0.51      0.51      0.51       210
           4       0.22      0.68      0.33       210
           5       0.00      0.00      0.00       210
           6       0.39      0.10      0.17       210
           7       0.46      0.67      0.54       210

    accuracy                           0.38      1470
   macro avg       0.41      0.38      0.34      1470
weighted avg       0.41      0.38      0.34      1470

Decision Tree with 5 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3945578231292517
Confusion Matrix of Decision Tree is:
 [[ 24  12  11 109  19  24  11]
 [  1 120   5  52  22   1   9]
 [  1  15 127  57   8   1   1]
 [  6  24   7 130  25   0  18]
 [ 11  20   0 119  45   4  11]
 [ 22  23   4  92  11   9  49]
 [ 10   3   3  48  20   1 125]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.32      0.11      0.17       210
           2       0.55      0.57      0.56       210
           3       0.81      0.60      0.69       210
           4       0.21      0.62      0.32       210
           5       0.30      0.21      0.25       210
           6       0.23      0.04      0.07       210
           7       0.56      0.60      0.58       210

    accuracy                           0.39      1470
   macro avg       0.43      0.39      0.38      1470
weighted avg       0.43      0.39      0.38      1470

Decision Tree with 6 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.39727891156462586
Confusion Matrix of Decision Tree is:
 [[ 57  18  20  29  35  35  16]
 [  2 126  14  10  27  13  18]
 [  6  19 116  17  37  13   2]
 [ 27  26  15  59  20  32  31]
 [ 39  30  11  35  53  24  18]
 [ 25  28   9  24  14  59  51]
 [ 11   9   2  14  21  39 114]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.27      0.30       210
           2       0.49      0.60      0.54       210
           3       0.62      0.55      0.58       210
           4       0.31      0.28      0.30       210
           5       0.26      0.25      0.25       210
           6       0.27      0.28      0.28       210
           7       0.46      0.54      0.50       210

    accuracy                           0.40      1470
   macro avg       0.39      0.40      0.39      1470
weighted avg       0.39      0.40      0.39      1470

Decision Tree with 14 max_depth

Accuracy of Random Forest after MinMax Scaling is: 0.37482993197278913
Confusion Matrix of Random Forest is:
 [[ 16   1  51   1  77   1  63]
 [  0  28 110   1  41   0  30]
 [  0   1 185   6   7   0  11]
 [  1   4  41  16  25   0 123]
 [  1   5  17   1 109   0  77]
 [  0   2  34   4  28   1 141]
 [  0   2   8   2   2   0 196]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.89      0.08      0.14       210
           2       0.65      0.13      0.22       210
           3       0.41      0.88      0.56       210
           4       0.52      0.08      0.13       210
           5       0.38      0.52      0.44       210
           6       0.50      0.00      0.01       210
           7       0.31      0.93      0.46       210

    accuracy                           0.37      1470
   macro avg       0.52      0.37      0.28      1470
weighted avg       0.52      0.37      0.28      1470

Random Forest with 2 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6251700680272109
Confusion Matrix of Random Forest is:
 [[110   2   0  10  38  36  14]
 [  0 151   8  15  18  14   4]
 [  2   1 180  12   3  12   0]
 [  6   8  13 101  14  47  21]
 [ 23  13   0  15 113  13  33]
 [  8   9   7  14   8  92  72]
 [  0   4   0   2   3  29 172]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.52      0.61       210
           2       0.80      0.72      0.76       210
           3       0.87      0.86      0.86       210
           4       0.60      0.48      0.53       210
           5       0.57      0.54      0.56       210
           6       0.38      0.44      0.41       210
           7       0.54      0.82      0.65       210

    accuracy                           0.63      1470
   macro avg       0.64      0.63      0.63      1470
weighted avg       0.64      0.63      0.63      1470

Random Forest with 10 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.645578231292517
Confusion Matrix of Random Forest is:
 [[119   2   1  10  30  36  12]
 [  1 165   7  10  10  14   3]
 [  1   8 178   9   0  14   0]
 [  4  10   9 113  14  43  17]
 [ 28  12   0  21 101  18  30]
 [  8   9   6  13   2 119  53]
 [  0   4   0   6   5  41 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.57      0.64       210
           2       0.79      0.79      0.79       210
           3       0.89      0.85      0.87       210
           4       0.62      0.54      0.58       210
           5       0.62      0.48      0.54       210
           6       0.42      0.57      0.48       210
           7       0.57      0.73      0.64       210

    accuracy                           0.65      1470
   macro avg       0.66      0.65      0.65      1470
weighted avg       0.66      0.65      0.65      1470

Random Forest with 18 max_depth
A

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.7687074829931972
Confusion Matrix of Logistic Regression is:
 [[143   1   0   3  56   5   2]
 [  2 163   5   9  27   2   2]
 [  0   2 194   6   5   3   0]
 [  2   6   3 169  17  12   1]
 [ 11   4   3  15 169   3   5]
 [  5   7   6  24  14 122  32]
 [  1   1   0   0  10  28 170]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.87      0.68      0.76       210
           2       0.89      0.78      0.83       210
           3       0.92      0.92      0.92       210
           4       0.75      0.80      0.78       210
           5       0.57      0.80      0.67       210
           6       0.70      0.58      0.63       210
           7       0.80      0.81      0.81       210

    accuracy                           0.77      1470
   macro avg       0.78      0.77      0.77      1470
weighted avg       0.78      0.77      0.77      1470

KNN with 3 Nei

Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 30   7   2   9 155   4   3]
 [  7  38   0  10 137  10   8]
 [  5   7  57   7 111  22   1]
 [  6  12   4  23 151   7   7]
 [  8   4   0  10 176   7   5]
 [  6  10   3   8 148  21  14]
 [  2   5   0   3 170   9  21]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.47      0.14      0.22       210
           2       0.46      0.18      0.26       210
           3       0.86      0.27      0.41       210
           4       0.33      0.11      0.16       210
           5       0.17      0.84      0.28       210
           6       0.26      0.10      0.14       210
           7       0.36      0.10      0.16       210

    accuracy                           0.25      1470
   macro avg       0.42      0.25      0.23      1470
weighted avg       0.42      0.25      0.23      1470

Working on SVM Kernal: linear
Accuracy of SVM after MinMax Scaling is: 0.7571428571428

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.29523809523809524
Confusion Matrix of Decision Tree is:
 [[  0   0  21   0 179   0  10]
 [  0   0  17   0 187   0   6]
 [  0   0 153   0  55   0   2]
 [  0   0  14   0 176   0  20]
 [  0   0   8   0 198   0   4]
 [  0   0  16   0 152   0  42]
 [  0   0   2   0 125   0  83]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.66      0.73      0.69       210
           4       0.00      0.00      0.00       210
           5       0.18      0.94      0.31       210
           6       0.00      0.00      0.00       210
           7       0.50      0.40      0.44       210

    accuracy                           0.30      1470
   macro avg       0.19      0.30      0.21      1470
weighted avg       0.19      0.30      0.21      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.36394557823129253
Confusion Matrix of Decision Tree is:
 [[ 18  13   1   0 168   0  10]
 [  1 114  12   0  77   0   6]
 [  7  35 146   1  20   0   1]
 [  3  45  11   4 131   0  16]
 [  3  28   5   1 170   0   3]
 [  2  34  13   1 119   0  41]
 [  1  33   1   0  92   0  83]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.51      0.09      0.15       210
           2       0.38      0.54      0.45       210
           3       0.77      0.70      0.73       210
           4       0.57      0.02      0.04       210
           5       0.22      0.81      0.34       210
           6       0.00      0.00      0.00       210
           7       0.52      0.40      0.45       210

    accuracy                           0.36      1470
   macro avg       0.42      0.36      0.31      1470
weighted avg       0.42      0.36      0.31      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.3802721088435374
Confusion Matrix of Decision Tree is:
 [[107   2  10   0  80   9   2]
 [ 14  88  30   0  65  13   0]
 [  4  16 161   1  17  11   0]
 [ 19  14  39   2 114  18   4]
 [ 63  18  12   1 110   5   1]
 [ 16   4  34   0 105  37  14]
 [ 11   6  27   0  82  30  54]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.51      0.48       210
           2       0.59      0.42      0.49       210
           3       0.51      0.77      0.62       210
           4       0.50      0.01      0.02       210
           5       0.19      0.52      0.28       210
           6       0.30      0.18      0.22       210
           7       0.72      0.26      0.38       210

    accuracy                           0.38      1470
   macro avg       0.47      0.38      0.36      1470
weighted avg       0.47      0.38      0.36      1470

Decision Tree with 5 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.4238095238095238
Confusion Matrix of Decision Tree is:
 [[ 82  13   5  17  50  29  14]
 [ 10 105  13  10  45  24   3]
 [  6  10 156  19   4  14   1]
 [ 17  25  15  56  38  42  17]
 [ 47  30   6  17  74  23  13]
 [ 18  31  11  25  25  67  33]
 [ 13  22   3  17  28  44  83]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.42      0.39      0.41       210
           2       0.44      0.50      0.47       210
           3       0.75      0.74      0.74       210
           4       0.35      0.27      0.30       210
           5       0.28      0.35      0.31       210
           6       0.28      0.32      0.30       210
           7       0.51      0.40      0.44       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.43      1470
weighted avg       0.43      0.42      0.43      1470

Decision Tree with 13 max_depth


Accuracy of Decision Tree after MinMax Scaling is: 0.40476190476190477
Confusion Matrix of Decision Tree is:
 [[ 88  12   7  18  36  37  12]
 [ 12 104  14  23  38  17   2]
 [  6  22 146  13  10  11   2]
 [ 14  23  17  63  35  37  21]
 [ 54  31   4  28  56  21  16]
 [ 10  32  16  25  31  52  44]
 [ 13  23   5  14  26  43  86]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.42      0.43       210
           2       0.42      0.50      0.46       210
           3       0.70      0.70      0.70       210
           4       0.34      0.30      0.32       210
           5       0.24      0.27      0.25       210
           6       0.24      0.25      0.24       210
           7       0.47      0.41      0.44       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.41      1470
weighted avg       0.41      0.40      0.41      1470

Random Forest with 1 max_depth


Accuracy of Random Forest after MinMax Scaling is: 0.6340136054421769
Confusion Matrix of Random Forest is:
 [[114   1   0   7  50  24  14]
 [  1 150   3   9  32   9   6]
 [  2  12 164  11   6  14   1]
 [  2  11   3 114  28  28  24]
 [ 16  13   1  10 134   9  27]
 [  5   7   4  22  13  60  99]
 [  0   1   0   1   5   7 196]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.81      0.54      0.65       210
           2       0.77      0.71      0.74       210
           3       0.94      0.78      0.85       210
           4       0.66      0.54      0.59       210
           5       0.50      0.64      0.56       210
           6       0.40      0.29      0.33       210
           7       0.53      0.93      0.68       210

    accuracy                           0.63      1470
   macro avg       0.66      0.63      0.63      1470
weighted avg       0.66      0.63      0.63      1470

Random Forest with 9 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.6761904761904762
Confusion Matrix of Random Forest is:
 [[114   1   0   9  51  20  15]
 [  1 160   2   8  23  10   6]
 [  5   9 168  10   6  11   1]
 [  5   7   3 124  24  28  19]
 [ 16  12   1  16 133  12  20]
 [  4   5   7  12  14 101  67]
 [  0   0   0   0   4  12 194]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.54      0.64       210
           2       0.82      0.76      0.79       210
           3       0.93      0.80      0.86       210
           4       0.69      0.59      0.64       210
           5       0.52      0.63      0.57       210
           6       0.52      0.48      0.50       210
           7       0.60      0.92      0.73       210

    accuracy                           0.68      1470
   macro avg       0.70      0.68      0.68      1470
weighted avg       0.70      0.68      0.68      1470

Random Forest with 17 max_depth


In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//xlm_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['kabita_labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after MinMax Scaling is: 0.5122448979591837
Confusion Matrix of Logistic Regression is:
 [[ 97   5   1  10  70  11  16]
 [  4 116  12  10  50  12   6]
 [  4  11 164   9  18   4   0]
 [ 17  10   6  76  57  29  15]
 [ 19  15   3  26 133   9   5]
 [ 16  12   5  19  41  66  51]
 [ 16   2   1  15  38  37 101]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.56      0.46      0.51       210
           2       0.68      0.55      0.61       210
           3       0.85      0.78      0.82       210
           4       0.46      0.36      0.41       210
           5       0.33      0.63      0.43       210
           6       0.39      0.31      0.35       210
           7       0.52      0.48      0.50       210

    accuracy                           0.51      1470
   macro avg       0.54      0.51      0.52      1470
weighted avg       0.54      0.51      0.52      1470

KNN with 3 Nei

Accuracy of Bernoulli Naive Bayes after MinMax Scaling is: 0.25510204081632654
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 22  14   2  17  18  28 109]
 [ 13  60   0  18  22  16  81]
 [  6   4  80  10   8  60  42]
 [ 19  11   1  27  20  18 114]
 [ 14  14   0  13  29  14 126]
 [ 13  15   2  26  22  27 105]
 [ 14   9   1  19  13  24 130]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.22      0.10      0.14       210
           2       0.47      0.29      0.36       210
           3       0.93      0.38      0.54       210
           4       0.21      0.13      0.16       210
           5       0.22      0.14      0.17       210
           6       0.14      0.13      0.14       210
           7       0.18      0.62      0.28       210

    accuracy                           0.26      1470
   macro avg       0.34      0.26      0.26      1470
weighted avg       0.34      0.26      0.26      1470

Working

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.20136054421768707
Confusion Matrix of Decision Tree is:
 [[  5   5   1   0 199   0   0]
 [  3  39   2   0 166   0   0]
 [  0   2  49   0 159   0   0]
 [  0   7   4   0 199   0   0]
 [  1   5   1   0 203   0   0]
 [  2   6   3   0 199   0   0]
 [  0  10   0   0 200   0   0]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.02      0.05       210
           2       0.53      0.19      0.27       210
           3       0.82      0.23      0.36       210
           4       0.00      0.00      0.00       210
           5       0.15      0.97      0.26       210
           6       0.00      0.00      0.00       210
           7       0.00      0.00      0.00       210

    accuracy                           0.20      1470
   macro avg       0.28      0.20      0.14      1470
weighted avg       0.28      0.20      0.14      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.2217687074829932
Confusion Matrix of Decision Tree is:
 [[ 33   5   0   0   0   1 171]
 [  9  35   1   0   0   1 164]
 [  7   2  48   0   0   1 152]
 [  1   1   3   0   0   1 204]
 [  3   4   0   0   0   1 202]
 [  8   3   2   0   0   1 196]
 [  1   0   0   0   0   0 209]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.16      0.24       210
           2       0.70      0.17      0.27       210
           3       0.89      0.23      0.36       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.17      0.00      0.01       210
           7       0.16      1.00      0.28       210

    accuracy                           0.22      1470
   macro avg       0.35      0.22      0.17      1470
weighted avg       0.35      0.22      0.17      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.23605442176870747
Confusion Matrix of Decision Tree is:
 [[ 32   4   9   0   0   4 161]
 [  5  34  13   0   0   7 151]
 [  1   2  70   2   0   6 129]
 [  3   1  11   3   0   0 192]
 [  3   2   7   0   0   4 194]
 [  6   2   9   0   0   6 187]
 [  6   0   2   0   0   0 202]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.57      0.15      0.24       210
           2       0.76      0.16      0.27       210
           3       0.58      0.33      0.42       210
           4       0.60      0.01      0.03       210
           5       0.00      0.00      0.00       210
           6       0.22      0.03      0.05       210
           7       0.17      0.96      0.28       210

    accuracy                           0.24      1470
   macro avg       0.41      0.24      0.18      1470
weighted avg       0.41      0.24      0.18      1470

Decision Tree with 5 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.24897959183673468
Confusion Matrix of Decision Tree is:
 [[ 33  13   1 107   0   1  55]
 [  2  44   7 115   0   4  38]
 [  1  22  57 109   0   1  20]
 [  2   8   4 109   0   1  86]
 [  2  12   1 117   0   0  78]
 [  3   9   6 111   0   5  76]
 [  1   1   2  84   0   4 118]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.75      0.16      0.26       210
           2       0.40      0.21      0.28       210
           3       0.73      0.27      0.40       210
           4       0.14      0.52      0.23       210
           5       0.00      0.00      0.00       210
           6       0.31      0.02      0.04       210
           7       0.25      0.56      0.35       210

    accuracy                           0.25      1470
   macro avg       0.37      0.25      0.22      1470
weighted avg       0.37      0.25      0.22      1470

Decision Tree with 6 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after MinMax Scaling is: 0.2714285714285714
Confusion Matrix of Decision Tree is:
 [[ 32  12   2 106   1   2  55]
 [  4  71   3  88   2   3  39]
 [  1  18  66 103   1   1  20]
 [  2  11   2 106   0   4  85]
 [  0  14   2 113   2   1  78]
 [  5  12   3 110   0   5  75]
 [  0   4   1  83   0   5 117]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.73      0.15      0.25       210
           2       0.50      0.34      0.40       210
           3       0.84      0.31      0.46       210
           4       0.15      0.50      0.23       210
           5       0.33      0.01      0.02       210
           6       0.24      0.02      0.04       210
           7       0.25      0.56      0.34       210

    accuracy                           0.27      1470
   macro avg       0.43      0.27      0.25      1470
weighted avg       0.43      0.27      0.25      1470

Decision Tree with 7 max_depth
A

Accuracy of Decision Tree after MinMax Scaling is: 0.27414965986394557
Confusion Matrix of Decision Tree is:
 [[69 19  6 30 23 34 29]
 [15 75 15 25 34 27 19]
 [ 9 15 63 65 25 22 11]
 [28  8  3 44 36 38 53]
 [35 21  2 25 45 35 47]
 [24 22 10 38 26 37 53]
 [25  8  3 32 32 40 70]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.33      0.33       210
           2       0.45      0.36      0.40       210
           3       0.62      0.30      0.40       210
           4       0.17      0.21      0.19       210
           5       0.20      0.21      0.21       210
           6       0.16      0.18      0.17       210
           7       0.25      0.33      0.28       210

    accuracy                           0.27      1470
   macro avg       0.31      0.27      0.28      1470
weighted avg       0.31      0.27      0.28      1470

Decision Tree with 15 max_depth
Accuracy of Decision Tree after MinMax Scaling i

Accuracy of Random Forest after MinMax Scaling is: 0.3346938775510204
Confusion Matrix of Random Forest is:
 [[ 56  21   3  12   4   6 108]
 [ 26  95   9  10   5   9  56]
 [  9  37 144   6   1   3  10]
 [ 28  25  12  15  10  10 110]
 [ 18  26   7   7   8   2 142]
 [ 20  34   8   8  11  15 114]
 [ 19   8   2   7   5  10 159]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.32      0.27      0.29       210
           2       0.39      0.45      0.42       210
           3       0.78      0.69      0.73       210
           4       0.23      0.07      0.11       210
           5       0.18      0.04      0.06       210
           6       0.27      0.07      0.11       210
           7       0.23      0.76      0.35       210

    accuracy                           0.33      1470
   macro avg       0.34      0.33      0.30      1470
weighted avg       0.34      0.33      0.30      1470

Random Forest with 3 max_depth
A

Accuracy of Random Forest after MinMax Scaling is: 0.44693877551020406
Confusion Matrix of Random Forest is:
 [[ 73   8   0  39  42  16  32]
 [  6 116   5  23  29  19  12]
 [  3  26 153  16   7   1   4]
 [  9  18   2  69  40  28  44]
 [ 11  17   5  35  88  16  38]
 [  9  19   3  39  31  50  59]
 [  4   5   0  40  35  18 108]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.63      0.35      0.45       210
           2       0.56      0.55      0.55       210
           3       0.91      0.73      0.81       210
           4       0.26      0.33      0.29       210
           5       0.32      0.42      0.37       210
           6       0.34      0.24      0.28       210
           7       0.36      0.51      0.43       210

    accuracy                           0.45      1470
   macro avg       0.48      0.45      0.45      1470
weighted avg       0.48      0.45      0.45      1470

Random Forest with 11 max_depth

Accuracy of Random Forest after MinMax Scaling is: 0.4523809523809524
Confusion Matrix of Random Forest is:
 [[ 89   6   0  31  38  20  26]
 [ 15 115   4  24  23  19  10]
 [  8  18 157  12   4  11   0]
 [ 22  11   5  67  31  32  42]
 [ 19  22   2  28  81  23  35]
 [ 17  16   3  32  25  57  60]
 [  8   6   0  33  28  36  99]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.50      0.42      0.46       210
           2       0.59      0.55      0.57       210
           3       0.92      0.75      0.82       210
           4       0.30      0.32      0.31       210
           5       0.35      0.39      0.37       210
           6       0.29      0.27      0.28       210
           7       0.36      0.47      0.41       210

    accuracy                           0.45      1470
   macro avg       0.47      0.45      0.46      1470
weighted avg       0.47      0.45      0.46      1470

Random Forest with 19 max_depth
