In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.model_selection import KFold
    from sklearn.model_selection import cross_val_score
except Exception as e:
    print("Error is due to",e)

In [2]:
# getting path of file
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Kabita//Input//kabita_dataset_labels.csv")
#converting datframe to numpy array
labels = labels_df.to_numpy().ravel()

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_fold, y_fold, model_name):
    kfold = KFold(n_splits=10, random_state=7, shuffle=True)
    results = cross_val_score(ml_model, x_fold, y_fold, cv=kfold)
    print("Accuracies for K-Fold for "+model_name+" :", results)
    print("Mean Accuracy of K-Fold for "+model_name+" :", results.mean()*100.0)
    print(70*"=")

### Bag of words Models

In [5]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tfidf_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Multinomial Naive Bayes : [0.718 0.78  0.733 0.686 0.716 0.712 0.71  0.704 0.729 0.69 ]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 71.77551020408163


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//cv_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7095238095238096
Confusion Matrix of Logistic Regression is:
 [[156   2   1   7  26  17   1]
 [  0 156  10  14   9  19   2]
 [  0   3 181  11   1  14   0]
 [  2  12  17 142  12  20   5]
 [ 26  17   9  15 133   3   7]
 [  6  11   6  18   4 120  45]
 [  2   3   0   5   3  42 155]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.81      0.74      0.78       210
           2       0.76      0.74      0.75       210
           3       0.81      0.86      0.83       210
           4       0.67      0.68      0.67       210
           5       0.71      0.63      0.67       210
           6       0.51      0.57      0.54       210
           7       0.72      0.74      0.73       210

    accuracy                           0.71      1470
   macro avg       0.71      0.71      0.71      1470
weighted avg       0.71      0.71      0.71      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7074829931972789
Confusion Matrix of SVM is:
 [[161   4   1   5  23  14   2]
 [  3 164  11  10   6  16   0]
 [  1   4 183   8   1  13   0]
 [ 10  15  18 134  10  19   4]
 [ 33  19  11  13 131   2   1]
 [ 12  18   5  20   3 118  34]
 [  7   8   0   5   2  39 149]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.71      0.77      0.74       210
           2       0.71      0.78      0.74       210
           3       0.80      0.87      0.83       210
           4       0.69      0.64      0.66       210
           5       0.74      0.62      0.68       210
           6       0.53      0.56      0.55       210
           7       0.78      0.71      0.74       210

    accuracy                           0.71      1470
   macro avg       0.71      0.71      0.71      1470
weighted avg       0.71      0.71      0.71      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.35034013605442177
Confusion Matrix of Decision Tree is:
 [[155   0   1   0   0  54   0]
 [  0   0  52   0   2 156   0]
 [  0   0 132   0   0  78   0]
 [  1   0  17   0   0 192   0]
 [100   0  26   0  20  64   0]
 [  0   0   2   0   0 208   0]
 [  2   0   0   0   0 208   0]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.74      0.66       210
           2       0.00      0.00      0.00       210
           3       0.57      0.63      0.60       210
           4       0.00      0.00      0.00       210
           5       0.91      0.10      0.17       210
           6       0.22      0.99      0.36       210
           7       0.00      0.00      0.00       210

    accuracy                           0.35      1470
   macro avg       0.33      0.35      0.26      1470
weighted avg       0.33      0.35      0.26      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.43673469387755104
Confusion Matrix of Decision Tree is:
 [[137   0   1   0  18  54   0]
 [  0  70  17   0   2 121   0]
 [  0   0 132   0   0  78   0]
 [  0   5  13   0   1 191   0]
 [ 67  13  22   0  54  54   0]
 [  0   1   1   0   0 204   4]
 [  2   1   0   0   0 162  45]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.67      0.65      0.66       210
           2       0.78      0.33      0.47       210
           3       0.71      0.63      0.67       210
           4       0.00      0.00      0.00       210
           5       0.72      0.26      0.38       210
           6       0.24      0.97      0.38       210
           7       0.92      0.21      0.35       210

    accuracy                           0.44      1470
   macro avg       0.58      0.44      0.41      1470
weighted avg       0.58      0.44      0.41      1470

Decision Tree with 6 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4891156462585034
Confusion Matrix of Decision Tree is:
 [[154   0   1   0   1  54   0]
 [  0 106  17   3   4  80   0]
 [  0   0 132   0   0  78   0]
 [  1   4   8   6   1 188   2]
 [ 63  18  22   0  62  45   0]
 [  0   2   1   1   0 188  18]
 [  2   1   0   0   0 136  71]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.73      0.72       210
           2       0.81      0.50      0.62       210
           3       0.73      0.63      0.68       210
           4       0.60      0.03      0.05       210
           5       0.91      0.30      0.45       210
           6       0.24      0.90      0.38       210
           7       0.78      0.34      0.47       210

    accuracy                           0.49      1470
   macro avg       0.68      0.49      0.48      1470
weighted avg       0.68      0.49      0.48      1470

Decision Tree with 8 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5761904761904761
Confusion Matrix of Decision Tree is:
 [[157   0   1   0   8  44   0]
 [  0 115  18   4   7  66   0]
 [  0   0 155   0   0  55   0]
 [  0   5  22  37   2 140   4]
 [ 44  19  22   3  92  29   1]
 [  0   2   3   1   0 179  25]
 [  2   1   1   2   4  88 112]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.77      0.75      0.76       210
           2       0.81      0.55      0.65       210
           3       0.70      0.74      0.72       210
           4       0.79      0.18      0.29       210
           5       0.81      0.44      0.57       210
           6       0.30      0.85      0.44       210
           7       0.79      0.53      0.64       210

    accuracy                           0.58      1470
   macro avg       0.71      0.58      0.58      1470
weighted avg       0.71      0.58      0.58      1470

Decision Tree with 16 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5748299319727891
Confusion Matrix of Random Forest is:
 [[155   4   1  10   8  27   5]
 [  1  98  48  12  16  35   0]
 [  0   0 135   6   0  69   0]
 [  0  11  15 109   4  67   4]
 [ 62  22  30  20  53  10  13]
 [  0   7   2  14   3 141  43]
 [  0   1   0   6   0  49 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.74      0.72       210
           2       0.69      0.47      0.56       210
           3       0.58      0.64      0.61       210
           4       0.62      0.52      0.56       210
           5       0.63      0.25      0.36       210
           6       0.35      0.67      0.46       210
           7       0.70      0.73      0.72       210

    accuracy                           0.57      1470
   macro avg       0.61      0.57      0.57      1470
weighted avg       0.61      0.57      0.57      1470

Random Forest with 4 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6523809523809524
Confusion Matrix of Random Forest is:
 [[157   4   2   8  11  28   0]
 [  0 138  21  10   8  33   0]
 [  0   0 170   6   0  34   0]
 [  0  13  21 118   5  50   3]
 [ 53  22  27  15  76   9   8]
 [  0   8   4  12   2 141  43]
 [  1   2   0   5   1  42 159]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.75      0.75       210
           2       0.74      0.66      0.70       210
           3       0.69      0.81      0.75       210
           4       0.68      0.56      0.61       210
           5       0.74      0.36      0.49       210
           6       0.42      0.67      0.52       210
           7       0.75      0.76      0.75       210

    accuracy                           0.65      1470
   macro avg       0.68      0.65      0.65      1470
weighted avg       0.68      0.65      0.65      1470

Random Forest with 12 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6741496598639456
Confusion Matrix of Random Forest is:
 [[161   4   2   9   7  27   0]
 [  0 150  16   5   6  33   0]
 [  0   0 171   8   0  31   0]
 [  0  12  21 129   4  41   3]
 [ 49  24  23  16  83   7   8]
 [  0   9   4  16   1 141  39]
 [  1   2   1   5   1  44 156]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.76      0.77      0.76       210
           2       0.75      0.71      0.73       210
           3       0.72      0.81      0.76       210
           4       0.69      0.61      0.65       210
           5       0.81      0.40      0.53       210
           6       0.44      0.67      0.53       210
           7       0.76      0.74      0.75       210

    accuracy                           0.67      1470
   macro avg       0.70      0.67      0.67      1470
weighted avg       0.70      0.67      0.67      1470

Random Forest with 20 max_dept

In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tf_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7054421768707483
Confusion Matrix of Logistic Regression is:
 [[152   1   1   8  33  13   2]
 [  0 155   9  13  11  19   3]
 [  0   2 179  12   3  13   1]
 [  3  11  17 146  12  16   5]
 [ 25  15   8  19 134   2   7]
 [  6  14   5  26   3 119  37]
 [  2   3   0   4   3  46 152]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.81      0.72      0.76       210
           2       0.77      0.74      0.75       210
           3       0.82      0.85      0.83       210
           4       0.64      0.70      0.67       210
           5       0.67      0.64      0.66       210
           6       0.52      0.57      0.54       210
           7       0.73      0.72      0.73       210

    accuracy                           0.71      1470
   macro avg       0.71      0.71      0.71      1470
weighted avg       0.71      0.71      0.71      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.7142857142857143
Confusion Matrix of Bernoulli Naive Bayes is:
 [[162   1  11   2  30   4   0]
 [  1 161  24   7  13   4   0]
 [  0   1 198  10   0   1   0]
 [  7  10  33 144   6   7   3]
 [ 28  15  17  12 134   0   4]
 [  6  11  42  19   5  98  29]
 [  3   5   7   2   3  37 153]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.78      0.77      0.78       210
           2       0.79      0.77      0.78       210
           3       0.60      0.94      0.73       210
           4       0.73      0.69      0.71       210
           5       0.70      0.64      0.67       210
           6       0.65      0.47      0.54       210
           7       0.81      0.73      0.77       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.71      1470
weighted avg       0.72      0.71      0.71      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40476190476190477
Confusion Matrix of Decision Tree is:
 [[133   5   0   0  22  50   0]
 [  0  58   2   0   2 148   0]
 [  0   0  86   0   0 124   0]
 [  1   6   1   0   0 202   0]
 [ 51   5   1   0  70  81   2]
 [  0   3   0   0   0 203   4]
 [  0   0   0   0   2 163  45]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.72      0.63      0.67       210
           2       0.75      0.28      0.40       210
           3       0.96      0.41      0.57       210
           4       0.00      0.00      0.00       210
           5       0.73      0.33      0.46       210
           6       0.21      0.97      0.34       210
           7       0.88      0.21      0.34       210

    accuracy                           0.40      1470
   macro avg       0.61      0.40      0.40      1470
weighted avg       0.61      0.40      0.40      1470

Decision Tree with 6 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[123   5   1   0  32  49   0]
 [  0 129  21   1   2  57   0]
 [  0   0 132   0   0  78   0]
 [  0   6  17   0   1 186   0]
 [ 34  22  19   0  87  46   2]
 [  0   3   2   0   0 201   4]
 [  0   1   0   0   2 163  44]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.78      0.59      0.67       210
           2       0.78      0.61      0.69       210
           3       0.69      0.63      0.66       210
           4       0.00      0.00      0.00       210
           5       0.70      0.41      0.52       210
           6       0.26      0.96      0.41       210
           7       0.88      0.21      0.34       210

    accuracy                           0.49      1470
   macro avg       0.58      0.49      0.47      1470
weighted avg       0.58      0.49      0.47      1470

Decision Tree with 9 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.4863945578231292

Accuracy of Decision Tree after Standard Scaling is: 0.5469387755102041
Confusion Matrix of Decision Tree is:
 [[147   5   0  40  18   0   0]
 [  0 140   7  54   9   0   0]
 [  0  16 148  45   1   0   0]
 [  0  13  11 176   6   0   4]
 [ 50  26   5  42  85   1   1]
 [  0   4   2 177   1   1  25]
 [  1   1   0  95   3   3 107]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.74      0.70      0.72       210
           2       0.68      0.67      0.67       210
           3       0.86      0.70      0.77       210
           4       0.28      0.84      0.42       210
           5       0.69      0.40      0.51       210
           6       0.20      0.00      0.01       210
           7       0.78      0.51      0.62       210

    accuracy                           0.55      1470
   macro avg       0.60      0.55      0.53      1470
weighted avg       0.60      0.55      0.53      1470

Decision Tree with 17 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.638095238095238
Confusion Matrix of Random Forest is:
 [[153   4   0  10  12  28   3]
 [  0 131  15  13  13  37   1]
 [  0   0 158  13   0  39   0]
 [  2  11  11 121  10  51   4]
 [ 54  20   6  23  80  15  12]
 [  0   6   3  19   4 134  44]
 [  0   1   0   8   0  40 161]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.73      0.73      0.73       210
           2       0.76      0.62      0.68       210
           3       0.82      0.75      0.78       210
           4       0.58      0.58      0.58       210
           5       0.67      0.38      0.49       210
           6       0.39      0.64      0.48       210
           7       0.72      0.77      0.74       210

    accuracy                           0.64      1470
   macro avg       0.67      0.64      0.64      1470
weighted avg       0.67      0.64      0.64      1470

Random Forest with 5 max_depth


Accuracy of Random Forest after Standard Scaling is: 0.6768707482993197
Confusion Matrix of Random Forest is:
 [[151   4   0   9  17  29   0]
 [  0 142  12  11  14  31   0]
 [  0   1 164  11   0  34   0]
 [  0   9   8 133  10  47   3]
 [ 43  16   6  19 105  12   9]
 [  0   4   1  14   5 144  42]
 [  1   0   0   5   2  46 156]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.77      0.72      0.75       210
           2       0.81      0.68      0.74       210
           3       0.86      0.78      0.82       210
           4       0.66      0.63      0.65       210
           5       0.69      0.50      0.58       210
           6       0.42      0.69      0.52       210
           7       0.74      0.74      0.74       210

    accuracy                           0.68      1470
   macro avg       0.71      0.68      0.68      1470
weighted avg       0.71      0.68      0.68      1470

Random Forest with 13 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6870748299319728
Confusion Matrix of Random Forest is:
 [[150   4   0   9  18  29   0]
 [  0 146  13  11  11  29   0]
 [  0   2 167   9   0  32   0]
 [  0   8   9 131  14  45   3]
 [ 35  20   6  16 116   9   8]
 [  0   5   2  14   5 143  41]
 [  1   0   0   5   3  44 157]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.81      0.71      0.76       210
           2       0.79      0.70      0.74       210
           3       0.85      0.80      0.82       210
           4       0.67      0.62      0.65       210
           5       0.69      0.55      0.62       210
           6       0.43      0.68      0.53       210
           7       0.75      0.75      0.75       210

    accuracy                           0.69      1470
   macro avg       0.71      0.69      0.69      1470
weighted avg       0.71      0.69      0.69      1470

Accuracy of Multinomial Naive 

### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7639455782312925
Confusion Matrix of Logistic Regression is:
 [[160   2   2  10  25  10   1]
 [  4 172   8   7  10   6   3]
 [  0   4 197   2   3   4   0]
 [  3  13   2 158  15  11   8]
 [ 24  15   5  15 146   3   2]
 [  5  10   3  17   5 134  36]
 [  4   1   1   3   8  37 156]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.80      0.76      0.78       210
           2       0.79      0.82      0.81       210
           3       0.90      0.94      0.92       210
           4       0.75      0.75      0.75       210
           5       0.69      0.70      0.69       210
           6       0.65      0.64      0.65       210
           7       0.76      0.74      0.75       210

    accuracy                           0.76      1470
   macro avg       0.76      0.76      0.76      1470
weighted avg       0.76      0.76      0.76      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5387755102040817
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 95   2  12   7  49  34  11]
 [  0 110  28   9  33  28   2]
 [  0  15 168   6   0  21   0]
 [  6  14  29  29  65  47  20]
 [ 14  34   7   6 129   5  15]
 [  5   7   7  12  12 110  57]
 [  0   0   0   1   4  54 151]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.79      0.45      0.58       210
           2       0.60      0.52      0.56       210
           3       0.67      0.80      0.73       210
           4       0.41      0.14      0.21       210
           5       0.44      0.61      0.51       210
           6       0.37      0.52      0.43       210
           7       0.59      0.72      0.65       210

    accuracy                           0.54      1470
   macro avg       0.55      0.54      0.52      1470
weighted avg       0.55      0.54      0.52      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3585034013605442
Confusion Matrix of Decision Tree is:
 [[  0   0   0   0 170   0  40]
 [  0   0   1  11 171   0  27]
 [  0   0 129   2  58   0  21]
 [  0   0   5   9 137   0  59]
 [  0   0   1   3 191   0  15]
 [  0   0   4   3  51   0 152]
 [  0   0   0   0  12   0 198]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.92      0.61      0.74       210
           4       0.32      0.04      0.08       210
           5       0.24      0.91      0.38       210
           6       0.00      0.00      0.00       210
           7       0.39      0.94      0.55       210

    accuracy                           0.36      1470
   macro avg       0.27      0.36      0.25      1470
weighted avg       0.27      0.36      0.25      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.46462585034013604
Confusion Matrix of Decision Tree is:
 [[130  40   0   0   0  28  12]
 [  5 177   1   0   0  24   3]
 [  2  56 129   2   0  21   0]
 [ 17 125   4   5   0  32  27]
 [ 29 165   1   0   0   4  11]
 [ 14  39   2   3   0  71  81]
 [  5   7   0   0   0  27 171]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.64      0.62      0.63       210
           2       0.29      0.84      0.43       210
           3       0.94      0.61      0.74       210
           4       0.50      0.02      0.05       210
           5       0.00      0.00      0.00       210
           6       0.34      0.34      0.34       210
           7       0.56      0.81      0.66       210

    accuracy                           0.46      1470
   macro avg       0.47      0.46      0.41      1470
weighted avg       0.47      0.46      0.41      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.5292517006802722
Confusion Matrix of Decision Tree is:
 [[130   6   2   0  34  34   4]
 [  5 137   3   0  38  24   3]
 [  2  51 132   3   5  17   0]
 [ 17  33   9   4  87  46  14]
 [ 29  26   3   0 137   8   7]
 [ 14  14   3   2  24  86  67]
 [  5   0   1   0   7  45 152]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.64      0.62      0.63       210
           2       0.51      0.65      0.57       210
           3       0.86      0.63      0.73       210
           4       0.44      0.02      0.04       210
           5       0.41      0.65      0.51       210
           6       0.33      0.41      0.37       210
           7       0.62      0.72      0.67       210

    accuracy                           0.53      1470
   macro avg       0.55      0.53      0.50      1470
weighted avg       0.55      0.53      0.50      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5775510204081633
Confusion Matrix of Decision Tree is:
 [[125  11   3  18  26  15  12]
 [ 10 131  11  21  24   9   4]
 [  7   3 181   8   6   5   0]
 [ 15  17  15  89  32  30  12]
 [ 21  23   5  36 108  12   5]
 [ 18  12   6  26  11  76  61]
 [  7   1   5   7  12  39 139]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.62      0.60      0.61       210
           2       0.66      0.62      0.64       210
           3       0.80      0.86      0.83       210
           4       0.43      0.42      0.43       210
           5       0.49      0.51      0.50       210
           6       0.41      0.36      0.38       210
           7       0.60      0.66      0.63       210

    accuracy                           0.58      1470
   macro avg       0.57      0.58      0.57      1470
weighted avg       0.57      0.58      0.57      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5578231292517006
Confusion Matrix of Decision Tree is:
 [[128   8   7  22  24  12   9]
 [  8 123  12  23  28  13   3]
 [  5   6 179  11   3   6   0]
 [  9  17  12  85  34  34  19]
 [ 28  30   4  26  98  15   9]
 [ 15  12   7  30  12  78  56]
 [  4   3   7   8  15  44 129]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.65      0.61      0.63       210
           2       0.62      0.59      0.60       210
           3       0.79      0.85      0.82       210
           4       0.41      0.40      0.41       210
           5       0.46      0.47      0.46       210
           6       0.39      0.37      0.38       210
           7       0.57      0.61      0.59       210

    accuracy                           0.56      1470
   macro avg       0.55      0.56      0.56      1470
weighted avg       0.55      0.56      0.56      1470

Random Forest with 1 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.5197278911564626
Confusion Matrix of Random Forest is:
 [[107   3   0   0  52   9  39]
 [  2 130  18   0  36   7  17]
 [  3  17 170   0   4  11   5]
 [  9  31  28   2  71   8  61]
 [ 11  39   7   0 131   0  22]
 [  4   8  14   0  19  17 148]
 [  0   0   0   0   3   0 207]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.51      0.62       210
           2       0.57      0.62      0.59       210
           3       0.72      0.81      0.76       210
           4       1.00      0.01      0.02       210
           5       0.41      0.62      0.50       210
           6       0.33      0.08      0.13       210
           7       0.41      0.99      0.58       210

    accuracy                           0.52      1470
   macro avg       0.60      0.52      0.46      1470
weighted avg       0.60      0.52      0.46      1470

Random Forest with 3 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.7068027210884353
Confusion Matrix of Random Forest is:
 [[137   1   0   9  32  23   8]
 [  2 146   7   9  29  13   4]
 [  1   2 188   6   4   9   0]
 [  3   3   4 125  27  35  13]
 [ 17  18   0  10 152   6   7]
 [  4   2   1  12  10 113  68]
 [  2   0   0   2   3  25 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.83      0.65      0.73       210
           2       0.85      0.70      0.76       210
           3       0.94      0.90      0.92       210
           4       0.72      0.60      0.65       210
           5       0.59      0.72      0.65       210
           6       0.50      0.54      0.52       210
           7       0.64      0.85      0.73       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.71      1470
weighted avg       0.72      0.71      0.71      1470

Random Forest with 11 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.7108843537414966
Confusion Matrix of Random Forest is:
 [[143   2   0   6  29  23   7]
 [  3 151   7  13  23  11   2]
 [  1   8 185   3   5   8   0]
 [  5   4   4 133  23  32   9]
 [ 22  18   1  15 144   4   6]
 [  5   4   1  16   7 111  66]
 [  1   0   0   1   3  27 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.68      0.73       210
           2       0.81      0.72      0.76       210
           3       0.93      0.88      0.91       210
           4       0.71      0.63      0.67       210
           5       0.62      0.69      0.65       210
           6       0.51      0.53      0.52       210
           7       0.66      0.85      0.74       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.71      1470
weighted avg       0.72      0.71      0.71      1470

Random Forest with 19 max_dept

In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_gkb.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.5870748299319728
Confusion Matrix of Logistic Regression is:
 [[118  11   2  13  46  10  10]
 [  9 124  10  17  29  13   8]
 [  0  14 170   6   5   3  12]
 [  9  18  15  96  40  12  20]
 [ 12  15   4  18 135   6  20]
 [ 13  16   5  12  20  61  83]
 [  5   4   1   6   8  27 159]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.71      0.56      0.63       210
           2       0.61      0.59      0.60       210
           3       0.82      0.81      0.82       210
           4       0.57      0.46      0.51       210
           5       0.48      0.64      0.55       210
           6       0.46      0.29      0.36       210
           7       0.51      0.76      0.61       210

    accuracy                           0.59      1470
   macro avg       0.60      0.59      0.58      1470
weighted avg       0.60      0.59      0.58      1470

KNN with 3 N

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after Standard Scaling is: 0.5972789115646259
Confusion Matrix of SVM is:
 [[117   6   0  15  47  17   8]
 [  9 131  11  17  25  11   6]
 [  0  23 164   6   3  14   0]
 [  7  21  11 104  39  14  14]
 [ 14  16   3  16 138   9  14]
 [ 13  21   2  25  14  90  45]
 [  4   4   2  15  12  39 134]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.71      0.56      0.63       210
           2       0.59      0.62      0.61       210
           3       0.85      0.78      0.81       210
           4       0.53      0.50      0.51       210
           5       0.50      0.66      0.57       210
           6       0.46      0.43      0.45       210
           7       0.61      0.64      0.62       210

    accuracy                           0.60      1470
   macro avg       0.61      0.60      0.60      1470
weighted avg       0.61      0.60      0.60      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.32448979591836735
Confusion Matrix of Decision Tree is:
 [[  0  12   0   0 159   0  39]
 [  0  37  18   0 102   0  53]
 [  0  13 118   0  63   0  16]
 [  0  11   4   0 164   0  31]
 [  0  16   1   0 162   0  31]
 [  0   8   1   0  77   0 124]
 [  0   1   0   0  49   0 160]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.38      0.18      0.24       210
           3       0.83      0.56      0.67       210
           4       0.00      0.00      0.00       210
           5       0.21      0.77      0.33       210
           6       0.00      0.00      0.00       210
           7       0.35      0.76      0.48       210

    accuracy                           0.32      1470
   macro avg       0.25      0.32      0.25      1470
weighted avg       0.25      0.32      0.25      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.37142857142857144
Confusion Matrix of Decision Tree is:
 [[117   4   8   0  42  27  12]
 [ 45  15  40   0  57  41  12]
 [ 31   7 124   0  32   4  12]
 [ 83   3  12   0  81   9  22]
 [ 51  10   7   0 111  13  18]
 [ 60   3   6   0  17  50  74]
 [ 47   0   1   0   2  31 129]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.27      0.56      0.36       210
           2       0.36      0.07      0.12       210
           3       0.63      0.59      0.61       210
           4       0.00      0.00      0.00       210
           5       0.32      0.53      0.40       210
           6       0.29      0.24      0.26       210
           7       0.46      0.61      0.53       210

    accuracy                           0.37      1470
   macro avg       0.33      0.37      0.33      1470
weighted avg       0.33      0.37      0.33      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.38639455782312926
Confusion Matrix of Decision Tree is:
 [[117  16   0   0  40  25  12]
 [ 45  56  32   0  47  18  12]
 [ 31  25 121   0  18   3  12]
 [ 83  19   9   0  70   7  22]
 [ 51  25   2   0 106   8  18]
 [ 60  20   1   0  16  39  74]
 [ 47   9   1   0   2  22 129]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.27      0.56      0.36       210
           2       0.33      0.27      0.29       210
           3       0.73      0.58      0.64       210
           4       0.00      0.00      0.00       210
           5       0.35      0.50      0.42       210
           6       0.32      0.19      0.23       210
           7       0.46      0.61      0.53       210

    accuracy                           0.39      1470
   macro avg       0.35      0.39      0.35      1470
weighted avg       0.35      0.39      0.35      1470

Decision Tree with 5 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4095238095238095
Confusion Matrix of Decision Tree is:
 [[116  13   2  22  20  25  12]
 [ 43  49  34  16  36  20  12]
 [ 20  14 132  21   8   3  12]
 [ 77  10  15  46  32   8  22]
 [ 51  21   2  23  87   8  18]
 [ 54  15   7  12   5  43  74]
 [ 37   5  11   1   1  26 129]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.29      0.55      0.38       210
           2       0.39      0.23      0.29       210
           3       0.65      0.63      0.64       210
           4       0.33      0.22      0.26       210
           5       0.46      0.41      0.44       210
           6       0.32      0.20      0.25       210
           7       0.46      0.61      0.53       210

    accuracy                           0.41      1470
   macro avg       0.41      0.41      0.40      1470
weighted avg       0.41      0.41      0.40      1470

Decision Tree with 6 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.37482993197278913
Confusion Matrix of Decision Tree is:
 [[ 80  10   2  31  33  39  15]
 [ 18  58  39  32  33  19  11]
 [  1  20 145  16  21   5   2]
 [ 28  16  12  62  34  35  23]
 [ 43  24   2  42  69  20  10]
 [ 27  12   6  21  46  58  40]
 [ 22   7   6  17  30  49  79]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.37      0.38      0.37       210
           2       0.39      0.28      0.32       210
           3       0.68      0.69      0.69       210
           4       0.28      0.30      0.29       210
           5       0.26      0.33      0.29       210
           6       0.26      0.28      0.27       210
           7       0.44      0.38      0.41       210

    accuracy                           0.37      1470
   macro avg       0.38      0.37      0.38      1470
weighted avg       0.38      0.37      0.38      1470

Decision Tree with 14 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.29931972789115646
Confusion Matrix of Random Forest is:
 [[  4   3  41   0  83   0  79]
 [  1   1 110   1  41   0  56]
 [  1   0 169   2  11   0  27]
 [  2   1  80   1  81   0  45]
 [  3   1  86   0  84   0  36]
 [  3   0  24   0  35   0 148]
 [  7   0   5   0  17   0 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.19      0.02      0.03       210
           2       0.17      0.00      0.01       210
           3       0.33      0.80      0.47       210
           4       0.25      0.00      0.01       210
           5       0.24      0.40      0.30       210
           6       0.00      0.00      0.00       210
           7       0.32      0.86      0.46       210

    accuracy                           0.30      1470
   macro avg       0.21      0.30      0.18      1470
weighted avg       0.21      0.30      0.18      1470

Random Forest with 2 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.34829931972789113
Confusion Matrix of Random Forest is:
 [[ 45   7  16   3  98   0  41]
 [  4   5  79   4  63   0  55]
 [  4  12 152   1  14   0  27]
 [  8  14  32   8 108   0  40]
 [  8  12  26   4 128   0  32]
 [ 18   5  11   1  41   0 134]
 [ 14   2   2   0  18   0 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.45      0.21      0.29       210
           2       0.09      0.02      0.04       210
           3       0.48      0.72      0.58       210
           4       0.38      0.04      0.07       210
           5       0.27      0.61      0.38       210
           6       0.00      0.00      0.00       210
           7       0.35      0.83      0.49       210

    accuracy                           0.35      1470
   macro avg       0.29      0.35      0.26      1470
weighted avg       0.29      0.35      0.26      1470

Random Forest with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.3952380952380952
Confusion Matrix of Random Forest is:
 [[103  16   2  11  42  13  23]
 [ 39  59  39   5  16  24  28]
 [ 23  27 137   4   3   3  13]
 [ 46  40  11  16  60   2  35]
 [ 34  49   6   4  85   6  26]
 [ 51  11   3   3  15  17 110]
 [ 31   4   0   0   4   7 164]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.31      0.49      0.38       210
           2       0.29      0.28      0.28       210
           3       0.69      0.65      0.67       210
           4       0.37      0.08      0.13       210
           5       0.38      0.40      0.39       210
           6       0.24      0.08      0.12       210
           7       0.41      0.78      0.54       210

    accuracy                           0.40      1470
   macro avg       0.38      0.40      0.36      1470
weighted avg       0.38      0.40      0.36      1470

Random Forest with 4 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.508843537414966
Confusion Matrix of Random Forest is:
 [[109   7   1  23  36  22  12]
 [ 21  88  27  29  18  16  11]
 [  3   9 176  12   3   6   1]
 [ 24  15  11  79  40  18  23]
 [ 24  26   4  30  94   8  24]
 [ 30   4   1  25  12  81  57]
 [ 16   2   0  10   4  57 121]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.48      0.52      0.50       210
           2       0.58      0.42      0.49       210
           3       0.80      0.84      0.82       210
           4       0.38      0.38      0.38       210
           5       0.45      0.45      0.45       210
           6       0.39      0.39      0.39       210
           7       0.49      0.58      0.53       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Random Forest with 12 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.5238095238095238
Confusion Matrix of Random Forest is:
 [[105   3   2  31  38  21  10]
 [ 16 100  23  29  14  18  10]
 [  3  11 177  10   2   5   2]
 [ 18  16  11  92  34  14  25]
 [ 22  35   4  25  95   6  23]
 [ 21   3   2  30  14  73  67]
 [ 13   4   0   6   5  54 128]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.50      0.51       210
           2       0.58      0.48      0.52       210
           3       0.81      0.84      0.83       210
           4       0.41      0.44      0.42       210
           5       0.47      0.45      0.46       210
           6       0.38      0.35      0.36       210
           7       0.48      0.61      0.54       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.52      1470
weighted avg       0.52      0.52      0.52      1470

Random Forest with 20 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_ndisbert.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7510204081632653
Confusion Matrix of Logistic Regression is:
 [[173   6   1   4  20   5   1]
 [  3 159  10  11  15  12   0]
 [  0   4 190   9   2   5   0]
 [ 11   8   6 156  11  16   2]
 [ 29   9   5  11 148   5   3]
 [ 10  13   6  14   4 119  44]
 [  3   0   1   2   4  41 159]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.76      0.82      0.79       210
           2       0.80      0.76      0.78       210
           3       0.87      0.90      0.89       210
           4       0.75      0.74      0.75       210
           5       0.73      0.70      0.71       210
           6       0.59      0.57      0.58       210
           7       0.76      0.76      0.76       210

    accuracy                           0.75      1470
   macro avg       0.75      0.75      0.75      1470
weighted avg       0.75      0.75      0.75      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5122448979591837
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 93   8  22   4  41  35   7]
 [  5  91  29   8  28  38  11]
 [  9  19 159   6   4  13   0]
 [ 19  10  38  56  41  37   9]
 [ 13  16  35  15  93  29   9]
 [ 12  11   1  15   2  75  94]
 [  0   1   0   0   0  23 186]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.62      0.44      0.52       210
           2       0.58      0.43      0.50       210
           3       0.56      0.76      0.64       210
           4       0.54      0.27      0.36       210
           5       0.44      0.44      0.44       210
           6       0.30      0.36      0.33       210
           7       0.59      0.89      0.71       210

    accuracy                           0.51      1470
   macro avg       0.52      0.51      0.50      1470
weighted avg       0.52      0.51      0.50      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.373469387755102
Confusion Matrix of Decision Tree is:
 [[  0   0  13   0 176  15   6]
 [  0   0   7   0 170  20  13]
 [  0   0 130   0  73   7   0]
 [  0   0   7   0 183  11   9]
 [  0   0  12   0 189   6   3]
 [  0   0   4   0  59  69  78]
 [  0   0   0   0  23  26 161]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.75      0.62      0.68       210
           4       0.00      0.00      0.00       210
           5       0.22      0.90      0.35       210
           6       0.45      0.33      0.38       210
           7       0.60      0.77      0.67       210

    accuracy                           0.37      1470
   macro avg       0.29      0.37      0.30      1470
weighted avg       0.29      0.37      0.30      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.43673469387755104
Confusion Matrix of Decision Tree is:
 [[124   0  13   0  52  15   6]
 [ 20   9   7   0 150  11  13]
 [  9   0 130   0  64   7   0]
 [ 21   1   7   0 162  10   9]
 [ 38   0  12   0 151   6   3]
 [ 22   2   4   0  37  67  78]
 [ 20   2   0   0   3  24 161]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.59      0.53       210
           2       0.64      0.04      0.08       210
           3       0.75      0.62      0.68       210
           4       0.00      0.00      0.00       210
           5       0.24      0.72      0.36       210
           6       0.48      0.32      0.38       210
           7       0.60      0.77      0.67       210

    accuracy                           0.44      1470
   macro avg       0.46      0.44      0.39      1470
weighted avg       0.46      0.44      0.39      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4857142857142857
Confusion Matrix of Decision Tree is:
 [[124   1  12   0  52  16   5]
 [ 20  94   5   1  66  11  13]
 [  9  14 126   0  54   7   0]
 [ 21  14   4   1 151  10   9]
 [ 38  11   8   1 143   6   3]
 [ 22  10   3   1  29  72  73]
 [ 20   3   1   0   1  31 154]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.59      0.53       210
           2       0.64      0.45      0.53       210
           3       0.79      0.60      0.68       210
           4       0.25      0.00      0.01       210
           5       0.29      0.68      0.41       210
           6       0.47      0.34      0.40       210
           7       0.60      0.73      0.66       210

    accuracy                           0.49      1470
   macro avg       0.50      0.49      0.46      1470
weighted avg       0.50      0.49      0.46      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5380952380952381
Confusion Matrix of Decision Tree is:
 [[109   9   4  28  36  14  10]
 [  9 108   5  20  49  14   5]
 [  4   6 167  11  17   5   0]
 [ 26  16   9  88  50  14   7]
 [ 29  11   6  43 110   6   5]
 [ 13  19   7  19  16  81  55]
 [  3  10   3   8  11  47 128]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.56      0.52      0.54       210
           2       0.60      0.51      0.56       210
           3       0.83      0.80      0.81       210
           4       0.41      0.42      0.41       210
           5       0.38      0.52      0.44       210
           6       0.45      0.39      0.41       210
           7       0.61      0.61      0.61       210

    accuracy                           0.54      1470
   macro avg       0.55      0.54      0.54      1470
weighted avg       0.55      0.54      0.54      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5374149659863946
Confusion Matrix of Decision Tree is:
 [[115  14   4  24  32  14   7]
 [ 10 116  10  20  32  18   4]
 [  6   9 165  11  12   6   1]
 [ 26  25   7  93  32  22   5]
 [ 31  29   8  41  90   6   5]
 [ 14  19   8  19  13  90  47]
 [  8   8   4  10   9  50 121]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.55      0.55       210
           2       0.53      0.55      0.54       210
           3       0.80      0.79      0.79       210
           4       0.43      0.44      0.43       210
           5       0.41      0.43      0.42       210
           6       0.44      0.43      0.43       210
           7       0.64      0.58      0.60       210

    accuracy                           0.54      1470
   macro avg       0.54      0.54      0.54      1470
weighted avg       0.54      0.54      0.54      1470

Random Forest with 1 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6673469387755102
Confusion Matrix of Random Forest is:
 [[131   3   1  16  35  21   3]
 [  2 136   5  16  27  18   6]
 [  8   6 172  11   7   6   0]
 [  5   9   5 128  32  27   4]
 [ 21  11   2  23 129  18   6]
 [ 10   4   0  24   4  99  69]
 [  1   0   0   0   1  22 186]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.62      0.68       210
           2       0.80      0.65      0.72       210
           3       0.93      0.82      0.87       210
           4       0.59      0.61      0.60       210
           5       0.55      0.61      0.58       210
           6       0.47      0.47      0.47       210
           7       0.68      0.89      0.77       210

    accuracy                           0.67      1470
   macro avg       0.68      0.67      0.67      1470
weighted avg       0.68      0.67      0.67      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6965986394557823
Confusion Matrix of Random Forest is:
 [[142   3   1   9  30  22   3]
 [  2 146   5  17  20  16   4]
 [  7   6 176  11   5   5   0]
 [  6   8   5 140  25  23   3]
 [ 22  12   3  21 132  14   6]
 [ 18   7   0  15   3 105  62]
 [  2   1   0   0   2  22 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.68      0.69       210
           2       0.80      0.70      0.74       210
           3       0.93      0.84      0.88       210
           4       0.66      0.67      0.66       210
           5       0.61      0.63      0.62       210
           6       0.51      0.50      0.50       210
           7       0.70      0.87      0.78       210

    accuracy                           0.70      1470
   macro avg       0.70      0.70      0.70      1470
weighted avg       0.70      0.70      0.70      1470

Random Forest with 17 max_dept

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_vbert.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7714285714285715
Confusion Matrix of Logistic Regression is:
 [[162   2   0   3  36   5   2]
 [  2 170  10   8  11   8   1]
 [  0   1 197   6   1   5   0]
 [  6  11   4 157  17  13   2]
 [ 35   9   1  10 143   8   4]
 [ 10  15   4  15   7 136  23]
 [  5   0   1   1   3  31 169]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.74      0.77      0.75       210
           2       0.82      0.81      0.81       210
           3       0.91      0.94      0.92       210
           4       0.79      0.75      0.77       210
           5       0.66      0.68      0.67       210
           6       0.66      0.65      0.65       210
           7       0.84      0.80      0.82       210

    accuracy                           0.77      1470
   macro avg       0.77      0.77      0.77      1470
weighted avg       0.77      0.77      0.77      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7795918367346939
Confusion Matrix of SVM is:
 [[171   1   0   1  30   6   1]
 [  4 178  10   3  12   2   1]
 [  0   3 199   6   0   2   0]
 [ 10  11   5 158  13  11   2]
 [ 39   8   2  13 139   6   3]
 [  6  16   8  12   6 135  27]
 [  2   1   0   1   4  36 166]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.74      0.81      0.77       210
           2       0.82      0.85      0.83       210
           3       0.89      0.95      0.92       210
           4       0.81      0.75      0.78       210
           5       0.68      0.66      0.67       210
           6       0.68      0.64      0.66       210
           7       0.83      0.79      0.81       210

    accuracy                           0.78      1470
   macro avg       0.78      0.78      0.78      1470
weighted avg       0.78      0.78      0.78      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3360544217687075
Confusion Matrix of Decision Tree is:
 [[182   0   2   1   0   0  25]
 [151   0   9   9   0   0  41]
 [ 61   0 125  10   0   0  14]
 [144   0  13  20   0   0  33]
 [190   0   3   2   0   0  15]
 [ 78   0   7   4   0   0 121]
 [ 41   0   0   2   0   0 167]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.21      0.87      0.34       210
           2       0.00      0.00      0.00       210
           3       0.79      0.60      0.68       210
           4       0.42      0.10      0.16       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.40      0.80      0.53       210

    accuracy                           0.34      1470
   macro avg       0.26      0.34      0.24      1470
weighted avg       0.26      0.34      0.24      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40476190476190477
Confusion Matrix of Decision Tree is:
 [[ 85   0   2   1  97  18   7]
 [ 11  13   4   1 140  39   2]
 [ 21   5 122   8  40  13   1]
 [ 22   8   6  19 122  23  10]
 [ 30   2   3   0 160   8   7]
 [ 22   4   3   4  56  76  45]
 [  4   1   0   1  37  47 120]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.44      0.40      0.42       210
           2       0.39      0.06      0.11       210
           3       0.87      0.58      0.70       210
           4       0.56      0.09      0.16       210
           5       0.25      0.76      0.37       210
           6       0.34      0.36      0.35       210
           7       0.62      0.57      0.60       210

    accuracy                           0.40      1470
   macro avg       0.50      0.40      0.39      1470
weighted avg       0.50      0.40      0.39      1470

Decision Tree with 4 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5319727891156463
Confusion Matrix of Decision Tree is:
 [[ 98   8   1  33  44  16  10]
 [ 11 133   9  22  22   9   4]
 [  1   8 160  15  13  10   3]
 [ 13  21  11  90  45  20  10]
 [ 30  20   5  20 109  17   9]
 [ 19  29   5  27  28  58  44]
 [  7   5   5  15  17  27 134]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.47      0.50       210
           2       0.59      0.63      0.61       210
           3       0.82      0.76      0.79       210
           4       0.41      0.43      0.42       210
           5       0.39      0.52      0.45       210
           6       0.37      0.28      0.32       210
           7       0.63      0.64      0.63       210

    accuracy                           0.53      1470
   macro avg       0.54      0.53      0.53      1470
weighted avg       0.54      0.53      0.53      1470

Decision Tree with 12 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5285714285714286
Confusion Matrix of Decision Tree is:
 [[105  12   4  28  31  18  12]
 [  8 136  10  16  20  16   4]
 [  1  11 162   8  16  11   1]
 [ 22  20  11  85  34  26  12]
 [ 30  29   7  21  89  25   9]
 [ 16  26   7  26  20  71  44]
 [  8  13   2  13  12  33 129]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.50      0.53       210
           2       0.55      0.65      0.60       210
           3       0.80      0.77      0.78       210
           4       0.43      0.40      0.42       210
           5       0.40      0.42      0.41       210
           6       0.35      0.34      0.35       210
           7       0.61      0.61      0.61       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Decision Tree with 20 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6761904761904762
Confusion Matrix of Random Forest is:
 [[118   3   0   9  52  16  12]
 [  0 152   7  10  26  10   5]
 [  1   7 176  12   9   4   1]
 [  6   5  11 122  39  14  13]
 [ 12  18   0  16 142   8  14]
 [  5   5  10  15   8  92  75]
 [  0   1   0   1   3  13 192]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.83      0.56      0.67       210
           2       0.80      0.72      0.76       210
           3       0.86      0.84      0.85       210
           4       0.66      0.58      0.62       210
           5       0.51      0.68      0.58       210
           6       0.59      0.44      0.50       210
           7       0.62      0.91      0.74       210

    accuracy                           0.68      1470
   macro avg       0.69      0.68      0.67      1470
weighted avg       0.69      0.68      0.67      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.7136054421768707
Confusion Matrix of Random Forest is:
 [[134   4   0   2  46  17   7]
 [  0 154   7  12  21  14   2]
 [  1   6 183  10   5   5   0]
 [  5   7   7 135  29  19   8]
 [ 14  14   2  16 148   3  13]
 [  3   5   9  22   7 107  57]
 [  1   0   0   2   4  15 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.85      0.64      0.73       210
           2       0.81      0.73      0.77       210
           3       0.88      0.87      0.88       210
           4       0.68      0.64      0.66       210
           5       0.57      0.70      0.63       210
           6       0.59      0.51      0.55       210
           7       0.68      0.90      0.78       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.71      1470
weighted avg       0.72      0.71      0.71      1470

Random Forest with 16 max_dept

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//gpt_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7578231292517007
Confusion Matrix of Logistic Regression is:
 [[162   1   1   5  33   7   1]
 [  1 169   6   6  16   8   4]
 [  0   2 192   7   4   5   0]
 [  4   5   3 165   9  18   6]
 [ 32   9   3  14 141   5   6]
 [  7   9   5  21   5 123  40]
 [  3   2   0   1   8  34 162]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.78      0.77      0.77       210
           2       0.86      0.80      0.83       210
           3       0.91      0.91      0.91       210
           4       0.75      0.79      0.77       210
           5       0.65      0.67      0.66       210
           6       0.61      0.59      0.60       210
           7       0.74      0.77      0.76       210

    accuracy                           0.76      1470
   macro avg       0.76      0.76      0.76      1470
weighted avg       0.76      0.76      0.76      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7360544217687075
Confusion Matrix of SVM is:
 [[164   2   0   8  26   9   1]
 [  1 168   7  11  16   4   3]
 [  2   7 192   2   2   5   0]
 [  9   9   6 161  10  14   1]
 [ 38  12   3  10 139   8   0]
 [ 15  22   6  21   3 112  31]
 [  9   3   0   6   5  41 146]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.69      0.78      0.73       210
           2       0.75      0.80      0.78       210
           3       0.90      0.91      0.91       210
           4       0.74      0.77      0.75       210
           5       0.69      0.66      0.68       210
           6       0.58      0.53      0.56       210
           7       0.80      0.70      0.74       210

    accuracy                           0.74      1470
   macro avg       0.74      0.74      0.73      1470
weighted avg       0.74      0.74      0.73      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3306122448979592
Confusion Matrix of Decision Tree is:
 [[ 18   0   1   0  92   0  99]
 [  7   0  11   0 140   0  52]
 [  5   0 139   0  32   0  34]
 [  7   0  19   0 102   0  82]
 [  2   0   4   0 158   0  46]
 [ 12   0   8   0  45   0 145]
 [  6   0   1   0  32   0 171]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.32      0.09      0.13       210
           2       0.00      0.00      0.00       210
           3       0.76      0.66      0.71       210
           4       0.00      0.00      0.00       210
           5       0.26      0.75      0.39       210
           6       0.00      0.00      0.00       210
           7       0.27      0.81      0.41       210

    accuracy                           0.33      1470
   macro avg       0.23      0.33      0.23      1470
weighted avg       0.23      0.33      0.23      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40748299319727893
Confusion Matrix of Decision Tree is:
 [[ 83   9   1   0  83   2  32]
 [ 15  96   9   0  44   8  38]
 [ 19  19 136   0  13   8  15]
 [ 48  35  13   0  67  11  36]
 [ 25  27   1   0 131   4  22]
 [ 62  12   5   0  33   5  93]
 [ 26   7   0   0  25   4 148]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.30      0.40      0.34       210
           2       0.47      0.46      0.46       210
           3       0.82      0.65      0.73       210
           4       0.00      0.00      0.00       210
           5       0.33      0.62      0.43       210
           6       0.12      0.02      0.04       210
           7       0.39      0.70      0.50       210

    accuracy                           0.41      1470
   macro avg       0.35      0.41      0.36      1470
weighted avg       0.35      0.41      0.36      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.45374149659863944
Confusion Matrix of Decision Tree is:
 [[ 95  13   1  25  49   4  23]
 [  6 102   7  36  41   7  11]
 [  4  12 135  37  12   7   3]
 [ 22  11   8  73  62   5  29]
 [ 36  20   0  21 111   2  20]
 [ 22  29   5  45  27  12  70]
 [ 15  15   0  14  23   4 139]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.45      0.46       210
           2       0.50      0.49      0.50       210
           3       0.87      0.64      0.74       210
           4       0.29      0.35      0.32       210
           5       0.34      0.53      0.41       210
           6       0.29      0.06      0.10       210
           7       0.47      0.66      0.55       210

    accuracy                           0.45      1470
   macro avg       0.46      0.45      0.44      1470
weighted avg       0.46      0.45      0.44      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.4925170068027211
Confusion Matrix of Decision Tree is:
 [[107   9   2  23  39  20  10]
 [  6 121  12  17  23  17  14]
 [ 13   4 159  12   4  16   2]
 [ 28  18  10  83  28  30  13]
 [ 40  28   3  30  74  14  21]
 [ 21  17  10  26  20  63  53]
 [ 15  16   3  13  10  36 117]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.51      0.49       210
           2       0.57      0.58      0.57       210
           3       0.80      0.76      0.78       210
           4       0.41      0.40      0.40       210
           5       0.37      0.35      0.36       210
           6       0.32      0.30      0.31       210
           7       0.51      0.56      0.53       210

    accuracy                           0.49      1470
   macro avg       0.49      0.49      0.49      1470
weighted avg       0.49      0.49      0.49      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.47891156462585033
Confusion Matrix of Decision Tree is:
 [[100  11   4  20  44  19  12]
 [  5 121   7  16  29  15  17]
 [  7   8 160  12  10  10   3]
 [ 26  16   7  81  31  34  15]
 [ 34  32   3  35  74  14  18]
 [ 24  25  10  28  19  59  45]
 [ 17   9   2  15  17  41 109]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.48      0.47       210
           2       0.55      0.58      0.56       210
           3       0.83      0.76      0.79       210
           4       0.39      0.39      0.39       210
           5       0.33      0.35      0.34       210
           6       0.31      0.28      0.29       210
           7       0.50      0.52      0.51       210

    accuracy                           0.48      1470
   macro avg       0.48      0.48      0.48      1470
weighted avg       0.48      0.48      0.48      1470

Random Forest with 1 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6591836734693878
Confusion Matrix of Random Forest is:
 [[121   2   0   8  40  23  16]
 [  0 147   7  12  28  13   3]
 [  3   4 179  13   1  10   0]
 [  4   6   8 136  14  21  21]
 [ 21  18   0  20 115  10  26]
 [  4  10   5  17   7  85  82]
 [  0   1   0   2   3  18 186]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.58      0.67       210
           2       0.78      0.70      0.74       210
           3       0.90      0.85      0.88       210
           4       0.65      0.65      0.65       210
           5       0.55      0.55      0.55       210
           6       0.47      0.40      0.44       210
           7       0.56      0.89      0.68       210

    accuracy                           0.66      1470
   macro avg       0.67      0.66      0.66      1470
weighted avg       0.67      0.66      0.66      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6952380952380952
Confusion Matrix of Random Forest is:
 [[124   1   0   8  41  22  14]
 [  0 160   7  10  21  10   2]
 [  4   3 183  10   2   7   1]
 [  3   6   6 151  10  14  20]
 [ 19  19   1  28 114  10  19]
 [  8   8   6  19   7 100  62]
 [  0   2   0   0   2  16 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.59      0.67       210
           2       0.80      0.76      0.78       210
           3       0.90      0.87      0.89       210
           4       0.67      0.72      0.69       210
           5       0.58      0.54      0.56       210
           6       0.56      0.48      0.51       210
           7       0.62      0.90      0.73       210

    accuracy                           0.70      1470
   macro avg       0.70      0.70      0.69      1470
weighted avg       0.70      0.70      0.69      1470

Random Forest with 17 max_dept

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//xlm_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7578231292517007
Confusion Matrix of Logistic Regression is:
 [[170   0   0   4  25  10   1]
 [  1 173   8  10  13   5   0]
 [  1   7 192   6   0   4   0]
 [  6  10   4 157  15  15   3]
 [ 20  15   4  18 146   2   5]
 [  7  17   5  13   3 113  52]
 [  2   2   0   4   3  36 163]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.82      0.81      0.82       210
           2       0.77      0.82      0.80       210
           3       0.90      0.91      0.91       210
           4       0.74      0.75      0.74       210
           5       0.71      0.70      0.70       210
           6       0.61      0.54      0.57       210
           7       0.73      0.78      0.75       210

    accuracy                           0.76      1470
   macro avg       0.76      0.76      0.76      1470
weighted avg       0.76      0.76      0.76      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7210884353741497
Confusion Matrix of SVM is:
 [[166   1   0   5  26  10   2]
 [  1 170   6  11  17   5   0]
 [  0   6 193   5   3   3   0]
 [ 15  11  10 150  10  13   1]
 [ 30  19   4  17 138   1   1]
 [ 17  21   7  19   5  91  50]
 [  1   4   0   6   8  39 152]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.72      0.79      0.75       210
           2       0.73      0.81      0.77       210
           3       0.88      0.92      0.90       210
           4       0.70      0.71      0.71       210
           5       0.67      0.66      0.66       210
           6       0.56      0.43      0.49       210
           7       0.74      0.72      0.73       210

    accuracy                           0.72      1470
   macro avg       0.71      0.72      0.72      1470
weighted avg       0.71      0.72      0.72      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3564625850340136
Confusion Matrix of Decision Tree is:
 [[  0   0   0   0 175   0  35]
 [  0   0   3   0 177   0  30]
 [  0   0 128   4  56   0  22]
 [  0   0   5   7 144   0  54]
 [  0   0   3   0 190   0  17]
 [  0   0   2   0  42   0 166]
 [  0   0   1   0  10   0 199]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.90      0.61      0.73       210
           4       0.64      0.03      0.06       210
           5       0.24      0.90      0.38       210
           6       0.00      0.00      0.00       210
           7       0.38      0.95      0.54       210

    accuracy                           0.36      1470
   macro avg       0.31      0.36      0.24      1470
weighted avg       0.31      0.36      0.24      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.48299319727891155
Confusion Matrix of Decision Tree is:
 [[157  18   0   0   0  26   9]
 [ 15 162   3   0   0  24   6]
 [ 15  41 130   2   0  21   1]
 [ 56  88   4   6   0  45  11]
 [125  65   3   0   0   4  13]
 [ 18  24   1   0   0  96  71]
 [  8   2   0   0   0  41 159]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.75      0.52       210
           2       0.41      0.77      0.53       210
           3       0.92      0.62      0.74       210
           4       0.75      0.03      0.06       210
           5       0.00      0.00      0.00       210
           6       0.37      0.46      0.41       210
           7       0.59      0.76      0.66       210

    accuracy                           0.48      1470
   macro avg       0.49      0.48      0.42      1470
weighted avg       0.49      0.48      0.42      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.5319727891156463
Confusion Matrix of Decision Tree is:
 [[ 98   1   0  17  62  26   6]
 [  5 120   2  42  15  24   2]
 [ 11  24 130  19   4  21   1]
 [  5  14   4  82  52  43  10]
 [ 33  32   0  33 103   4   5]
 [  3   6   1  18  21  96  65]
 [  0   1   0   1  14  41 153]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.63      0.47      0.54       210
           2       0.61      0.57      0.59       210
           3       0.95      0.62      0.75       210
           4       0.39      0.39      0.39       210
           5       0.38      0.49      0.43       210
           6       0.38      0.46      0.41       210
           7       0.63      0.73      0.68       210

    accuracy                           0.53      1470
   macro avg       0.57      0.53      0.54      1470
weighted avg       0.57      0.53      0.54      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5571428571428572
Confusion Matrix of Decision Tree is:
 [[122   7   3  14  38  20   6]
 [  8 139   9  17  22  11   4]
 [  4   8 174   8   7   6   3]
 [ 17  18  10  93  40  26   6]
 [ 32  28   4  35  92  15   4]
 [ 18  16   8  26  10  73  59]
 [  8   7   2  18   5  44 126]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.58      0.58       210
           2       0.62      0.66      0.64       210
           3       0.83      0.83      0.83       210
           4       0.44      0.44      0.44       210
           5       0.43      0.44      0.43       210
           6       0.37      0.35      0.36       210
           7       0.61      0.60      0.60       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5591836734693878
Confusion Matrix of Decision Tree is:
 [[124   6   3  18  31  22   6]
 [  7 137  12  14  23  11   6]
 [  0   7 175  12   6   7   3]
 [  9  19  13  92  38  28  11]
 [ 35  28   7  33  91  10   6]
 [ 16  13  10  33   8  74  56]
 [  7   5   5  16   9  39 129]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.63      0.59      0.61       210
           2       0.64      0.65      0.64       210
           3       0.78      0.83      0.80       210
           4       0.42      0.44      0.43       210
           5       0.44      0.43      0.44       210
           6       0.39      0.35      0.37       210
           7       0.59      0.61      0.60       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Random Forest with 1 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6952380952380952
Confusion Matrix of Random Forest is:
 [[132   0   0   4  40  25   9]
 [  2 154   4  16  18  15   1]
 [  0   9 174   9   5  12   1]
 [  3   4   3 126  26  31  17]
 [ 21  23   0  15 140   3   8]
 [  2   4   0  14   7 110  73]
 [  0   0   0   0   2  22 186]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.63      0.71       210
           2       0.79      0.73      0.76       210
           3       0.96      0.83      0.89       210
           4       0.68      0.60      0.64       210
           5       0.59      0.67      0.62       210
           6       0.50      0.52      0.51       210
           7       0.63      0.89      0.74       210

    accuracy                           0.70      1470
   macro avg       0.71      0.70      0.70      1470
weighted avg       0.71      0.70      0.70      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.7292517006802721
Confusion Matrix of Random Forest is:
 [[142   1   0   5  33  20   9]
 [  1 160   6  15  17  11   0]
 [  0   2 188   9   3   8   0]
 [  2   5   2 137  20  34  10]
 [ 19  21   0  15 145   3   7]
 [  2   4   0  18   5 123  58]
 [  1   0   0   2   0  30 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.85      0.68      0.75       210
           2       0.83      0.76      0.79       210
           3       0.96      0.90      0.93       210
           4       0.68      0.65      0.67       210
           5       0.65      0.69      0.67       210
           6       0.54      0.59      0.56       210
           7       0.68      0.84      0.75       210

    accuracy                           0.73      1470
   macro avg       0.74      0.73      0.73      1470
weighted avg       0.74      0.73      0.73      1470

Random Forest with 17 max_dept

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//bert_base_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6843537414965987
Confusion Matrix of Logistic Regression is:
 [[155   1   1   6  29  13   5]
 [  3 158   9  10  18   9   3]
 [  1   7 181   6   6   8   1]
 [ 11  15  10 132  14  21   7]
 [ 28  21   5  16 131   4   5]
 [ 10  16   8  24   6 102  44]
 [  6   2   2   4   6  43 147]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.72      0.74      0.73       210
           2       0.72      0.75      0.73       210
           3       0.84      0.86      0.85       210
           4       0.67      0.63      0.65       210
           5       0.62      0.62      0.62       210
           6       0.51      0.49      0.50       210
           7       0.69      0.70      0.70       210

    accuracy                           0.68      1470
   macro avg       0.68      0.68      0.68      1470
weighted avg       0.68      0.68      0.68      1470

KNN with 3 N

Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.62      0.34      0.44       210
           2       0.48      0.65      0.55       210
           3       0.68      0.65      0.67       210
           4       0.38      0.25      0.30       210
           5       0.38      0.57      0.46       210
           6       0.38      0.22      0.28       210
           7       0.38      0.55      0.45       210

    accuracy                           0.46      1470
   macro avg       0.47      0.46      0.45      1470
weighted avg       0.47      0.46      0.45      1470

Working on SVM Kernal: linear
Accuracy of SVM after Standard Scaling is: 0.6775510204081633
Confusion Matrix of SVM is:
 [[157   5   2   9  22  11   4]
 [  6 162  11   7  14   8   2]
 [  2   7 192   2   1   4   2]
 [ 10  16  12 134  15  18   5]
 [ 41  26   5  14 116   4   4]
 [ 18  21   7  25   9  97  33]
 [  5   3   0  10   4  50 138]]
Classificati

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.29183673469387755
Confusion Matrix of Decision Tree is:
 [[ 13  22   1   0   0   0 174]
 [  2 105  10   0   0   0  93]
 [  0  23 114   0   0   0  73]
 [  0  36   3   0   0   0 171]
 [  1  32   1   0   0   0 176]
 [  6  30   6   0   0   0 168]
 [  0  12   1   0   0   0 197]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.59      0.06      0.11       210
           2       0.40      0.50      0.45       210
           3       0.84      0.54      0.66       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.19      0.94      0.31       210

    accuracy                           0.29      1470
   macro avg       0.29      0.29      0.22      1470
weighted avg       0.29      0.29      0.22      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3448979591836735
Confusion Matrix of Decision Tree is:
 [[134  24   1   0   0   0  51]
 [ 70 113   4   0   0   0  23]
 [ 61  24 113   0   0   0  12]
 [ 92  36   3   0   0   0  79]
 [113  34   0   0   0   0  63]
 [ 70  38   4   0   0   0  98]
 [ 50  13   0   0   0   0 147]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.64      0.34       210
           2       0.40      0.54      0.46       210
           3       0.90      0.54      0.67       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.31      0.70      0.43       210

    accuracy                           0.34      1470
   macro avg       0.26      0.34      0.27      1470
weighted avg       0.26      0.34      0.27      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3401360544217687
Confusion Matrix of Decision Tree is:
 [[132   9   3  29  15   0  22]
 [ 70  80   2  17  33   2   6]
 [ 46   8 126  10  15   3   2]
 [ 92  15   3  52  21   0  27]
 [113  16   1  36  17   0  27]
 [ 70  24   5  49  10   3  49]
 [ 50   3   1  57   9   0  90]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.63      0.34       210
           2       0.52      0.38      0.44       210
           3       0.89      0.60      0.72       210
           4       0.21      0.25      0.23       210
           5       0.14      0.08      0.10       210
           6       0.38      0.01      0.03       210
           7       0.40      0.43      0.42       210

    accuracy                           0.34      1470
   macro avg       0.40      0.34      0.32      1470
weighted avg       0.40      0.34      0.32      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.40068027210884355
Confusion Matrix of Decision Tree is:
 [[ 67  21   7  24  41  39  11]
 [ 18 108  11  15  23  23  12]
 [  6   8 148  11  10  24   3]
 [ 27  19   9  46  34  56  19]
 [ 35  33   5  18  73  29  17]
 [ 21  14  11  35  14  69  46]
 [ 16  12   2  18  15  69  78]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.32      0.33       210
           2       0.50      0.51      0.51       210
           3       0.77      0.70      0.73       210
           4       0.28      0.22      0.24       210
           5       0.35      0.35      0.35       210
           6       0.22      0.33      0.27       210
           7       0.42      0.37      0.39       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.40      1470
weighted avg       0.41      0.40      0.40      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.4142857142857143
Confusion Matrix of Decision Tree is:
 [[ 74  17   7  23  46  26  17]
 [ 21 104  11  16  28  14  16]
 [  9   7 165  11   6   8   4]
 [ 28  19   8  59  37  38  21]
 [ 36  26   3  25  74  21  25]
 [ 21  23   9  40  20  57  40]
 [ 30   7   3  16  27  51  76]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.35      0.34       210
           2       0.51      0.50      0.50       210
           3       0.80      0.79      0.79       210
           4       0.31      0.28      0.30       210
           5       0.31      0.35      0.33       210
           6       0.27      0.27      0.27       210
           7       0.38      0.36      0.37       210

    accuracy                           0.41      1470
   macro avg       0.42      0.41      0.42      1470
weighted avg       0.42      0.41      0.42      1470

Random Forest with 1 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.5469387755102041
Confusion Matrix of Random Forest is:
 [[ 91   3   0  11  58  18  29]
 [  2 135   5  20  37   4   7]
 [  4  21 161   6   7   8   3]
 [ 10  17   5  61  41  29  47]
 [ 11  22   1  11 129   2  34]
 [  6  16   5  15  21  63  84]
 [  0   4   1  13   9  19 164]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.73      0.43      0.54       210
           2       0.62      0.64      0.63       210
           3       0.90      0.77      0.83       210
           4       0.45      0.29      0.35       210
           5       0.43      0.61      0.50       210
           6       0.44      0.30      0.36       210
           7       0.45      0.78      0.57       210

    accuracy                           0.55      1470
   macro avg       0.57      0.55      0.54      1470
weighted avg       0.57      0.55      0.54      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.5795918367346938
Confusion Matrix of Random Forest is:
 [[102   4   1  10  53  19  21]
 [  0 144   4  11  35  12   4]
 [  2  20 162   9   2  11   4]
 [  9  12   2  88  39  31  29]
 [ 16  19   2  17 128   2  26]
 [ 11  11   6  26  15  69  72]
 [  3   3   0   7   8  30 159]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.49      0.58       210
           2       0.68      0.69      0.68       210
           3       0.92      0.77      0.84       210
           4       0.52      0.42      0.47       210
           5       0.46      0.61      0.52       210
           6       0.40      0.33      0.36       210
           7       0.50      0.76      0.61       210

    accuracy                           0.58      1470
   macro avg       0.60      0.58      0.58      1470
weighted avg       0.60      0.58      0.58      1470

Random Forest with 17 max_dept

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6517006802721088
Confusion Matrix of Logistic Regression is:
 [[131   6   6  13  31  17   6]
 [  3 153  10  11  17  13   3]
 [  4   4 178   6   5  12   1]
 [ 17   8  16 134  18  12   5]
 [ 40  16  10  22 108  11   3]
 [ 22  13  10  14   6 111  34]
 [  2   3   0   6  10  46 143]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.60      0.62      0.61       210
           2       0.75      0.73      0.74       210
           3       0.77      0.85      0.81       210
           4       0.65      0.64      0.64       210
           5       0.55      0.51      0.53       210
           6       0.50      0.53      0.51       210
           7       0.73      0.68      0.71       210

    accuracy                           0.65      1470
   macro avg       0.65      0.65      0.65      1470
weighted avg       0.65      0.65      0.65      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6598639455782312
Confusion Matrix of SVM is:
 [[140   6   7  14  23  15   5]
 [  9 161   8   9  13   8   2]
 [  7   6 179   8   3   7   0]
 [ 22  11  10 134  16  15   2]
 [ 55  24   4  13 105   5   4]
 [ 26  15  11  10   6 106  36]
 [  7   2   2   4   8  42 145]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.53      0.67      0.59       210
           2       0.72      0.77      0.74       210
           3       0.81      0.85      0.83       210
           4       0.70      0.64      0.67       210
           5       0.60      0.50      0.55       210
           6       0.54      0.50      0.52       210
           7       0.75      0.69      0.72       210

    accuracy                           0.66      1470
   macro avg       0.66      0.66      0.66      1470
weighted avg       0.66      0.66      0.66      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.32857142857142857
Confusion Matrix of Decision Tree is:
 [[  5   0   5   0 188   0  12]
 [ 10   0   7   0 164   0  29]
 [  7   0 123   0  78   0   2]
 [  0   0   9   0 187   0  14]
 [  0   0   0   0 192   0  18]
 [  2   0   6   0  98   0 104]
 [  0   0   1   0  46   0 163]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.21      0.02      0.04       210
           2       0.00      0.00      0.00       210
           3       0.81      0.59      0.68       210
           4       0.00      0.00      0.00       210
           5       0.20      0.91      0.33       210
           6       0.00      0.00      0.00       210
           7       0.48      0.78      0.59       210

    accuracy                           0.33      1470
   macro avg       0.24      0.33      0.23      1470
weighted avg       0.24      0.33      0.23      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3727891156462585
Confusion Matrix of Decision Tree is:
 [[  4  70   3   0 118  10   5]
 [  0 117  10   0  47  29   7]
 [  2  24  81   0  54  49   0]
 [  0  50   3   0 137  18   2]
 [  0  23   0   0 169   4  14]
 [  2  60   2   0  38  60  48]
 [  0  30   1   0  16  46 117]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.02      0.04       210
           2       0.31      0.56      0.40       210
           3       0.81      0.39      0.52       210
           4       0.00      0.00      0.00       210
           5       0.29      0.80      0.43       210
           6       0.28      0.29      0.28       210
           7       0.61      0.56      0.58       210

    accuracy                           0.37      1470
   macro avg       0.40      0.37      0.32      1470
weighted avg       0.40      0.37      0.32      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40476190476190477
Confusion Matrix of Decision Tree is:
 [[ 61  14   2  72  49   7   5]
 [ 42  90   1  16  38  18   5]
 [ 12  13  80  74  29   2   0]
 [ 41  11   3  95  48  10   2]
 [ 14   9   0  55 114   7  11]
 [ 48  30   2  30  12  44  44]
 [ 20  19   1  10   6  43 111]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.26      0.29      0.27       210
           2       0.48      0.43      0.45       210
           3       0.90      0.38      0.54       210
           4       0.27      0.45      0.34       210
           5       0.39      0.54      0.45       210
           6       0.34      0.21      0.26       210
           7       0.62      0.53      0.57       210

    accuracy                           0.40      1470
   macro avg       0.46      0.40      0.41      1470
weighted avg       0.46      0.40      0.41      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.45170068027210886
Confusion Matrix of Decision Tree is:
 [[ 80   8   8  41  47  19   7]
 [ 23 108   8  25  22  11  13]
 [  5   2 127  55  11   8   2]
 [ 37  14  12  73  49  14  11]
 [ 32  10   5  45  91  10  17]
 [ 18  26   8  33  10  57  58]
 [ 13  10   1  10   7  41 128]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.38      0.38       210
           2       0.61      0.51      0.56       210
           3       0.75      0.60      0.67       210
           4       0.26      0.35      0.30       210
           5       0.38      0.43      0.41       210
           6       0.36      0.27      0.31       210
           7       0.54      0.61      0.57       210

    accuracy                           0.45      1470
   macro avg       0.47      0.45      0.46      1470
weighted avg       0.47      0.45      0.46      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.43945578231292515
Confusion Matrix of Decision Tree is:
 [[ 76  10  13  33  42  27   9]
 [ 21 108   7  16  26  18  14]
 [  9   6 126  53   7   9   0]
 [ 27  11  15  70  52  22  13]
 [ 32  10   9  37  86  18  18]
 [ 19  24  13  28  18  57  51]
 [  8  12   2  13  10  42 123]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.36      0.38       210
           2       0.60      0.51      0.55       210
           3       0.68      0.60      0.64       210
           4       0.28      0.33      0.30       210
           5       0.36      0.41      0.38       210
           6       0.30      0.27      0.28       210
           7       0.54      0.59      0.56       210

    accuracy                           0.44      1470
   macro avg       0.45      0.44      0.44      1470
weighted avg       0.45      0.44      0.44      1470

Random Forest with 1 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5761904761904761
Confusion Matrix of Random Forest is:
 [[ 93  10   0  18  55  25   9]
 [  2 143   3  14  22  16  10]
 [ 54   4 124  12   8   8   0]
 [ 17   7   2 102  40  32  10]
 [  8  16   2  16 137  14  17]
 [ 17  18   3  11   7  70  84]
 [  1   7   0   0   3  21 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.48      0.44      0.46       210
           2       0.70      0.68      0.69       210
           3       0.93      0.59      0.72       210
           4       0.59      0.49      0.53       210
           5       0.50      0.65      0.57       210
           6       0.38      0.33      0.35       210
           7       0.58      0.85      0.69       210

    accuracy                           0.58      1470
   macro avg       0.59      0.58      0.57      1470
weighted avg       0.59      0.58      0.57      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.5993197278911565
Confusion Matrix of Random Forest is:
 [[100   9   1  18  51  25   6]
 [  5 146   4  12  23  14   6]
 [ 54   3 128  12   4   9   0]
 [ 25   4   3 111  35  24   8]
 [ 15  11   4  16 136  12  16]
 [ 16  16   3   7  10  90  68]
 [  1   3   0   2   1  33 170]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.46      0.48      0.47       210
           2       0.76      0.70      0.73       210
           3       0.90      0.61      0.73       210
           4       0.62      0.53      0.57       210
           5       0.52      0.65      0.58       210
           6       0.43      0.43      0.43       210
           7       0.62      0.81      0.70       210

    accuracy                           0.60      1470
   macro avg       0.62      0.60      0.60      1470
weighted avg       0.62      0.60      0.60      1470

Random Forest with 17 max_dept

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_base_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7428571428571429
Confusion Matrix of Logistic Regression is:
 [[152   4   0   2  37   9   6]
 [  2 167   6   7  12  10   6]
 [  0   7 191   5   2   5   0]
 [  4  11   4 162   9  15   5]
 [ 27  12   7  14 136   7   7]
 [  8  10   4  12   3 132  41]
 [  3   5   1   1   5  43 152]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.78      0.72      0.75       210
           2       0.77      0.80      0.78       210
           3       0.90      0.91      0.90       210
           4       0.80      0.77      0.78       210
           5       0.67      0.65      0.66       210
           6       0.60      0.63      0.61       210
           7       0.70      0.72      0.71       210

    accuracy                           0.74      1470
   macro avg       0.74      0.74      0.74      1470
weighted avg       0.74      0.74      0.74      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7421768707482993
Confusion Matrix of SVM is:
 [[162   0   0   3  31  13   1]
 [  2 170   9  10  12   7   0]
 [  1   5 194   5   2   3   0]
 [  8  12  10 156   7  15   2]
 [ 36  15   1  15 130   7   6]
 [ 13  12   6  14   2 129  34]
 [  5   1   0   3   6  45 150]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.71      0.77      0.74       210
           2       0.79      0.81      0.80       210
           3       0.88      0.92      0.90       210
           4       0.76      0.74      0.75       210
           5       0.68      0.62      0.65       210
           6       0.59      0.61      0.60       210
           7       0.78      0.71      0.74       210

    accuracy                           0.74      1470
   macro avg       0.74      0.74      0.74      1470
weighted avg       0.74      0.74      0.74      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.28435374149659864
Confusion Matrix of Decision Tree is:
 [[175   1   1   0   0   0  33]
 [172   9  11   0   0   0  18]
 [ 87  37  80   0   0   0   6]
 [134   5   5   0   0   0  66]
 [157   1   0   0   0   0  52]
 [110   1   1   0   0   0  98]
 [ 51   4   1   0   0   0 154]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.20      0.83      0.32       210
           2       0.16      0.04      0.07       210
           3       0.81      0.38      0.52       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.36      0.73      0.48       210

    accuracy                           0.28      1470
   macro avg       0.22      0.28      0.20      1470
weighted avg       0.22      0.28      0.20      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3442176870748299
Confusion Matrix of Decision Tree is:
 [[169   6   1  12   0   0  22]
 [ 89  86   9  17   0   0   9]
 [ 71  19  80  37   0   0   3]
 [122  13   2  53   0   0  20]
 [149   9   0  20   0   0  32]
 [ 97  14   1  29   0   0  69]
 [ 48   4   1  39   0   0 118]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.80      0.35       210
           2       0.57      0.41      0.48       210
           3       0.85      0.38      0.53       210
           4       0.26      0.25      0.25       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.43      0.56      0.49       210

    accuracy                           0.34      1470
   macro avg       0.33      0.34      0.30      1470
weighted avg       0.33      0.34      0.30      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3727891156462585
Confusion Matrix of Decision Tree is:
 [[ 60   4   2 121   0  12  11]
 [  2  77  20  97   0   1  13]
 [  4   3  96  70   0   2  35]
 [  1   6   9 169   0   9  16]
 [  7   8   1 162   0  11  21]
 [ 12   8   7 114   0  24  45]
 [  0   2   3  74   0   9 122]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.29      0.41       210
           2       0.71      0.37      0.48       210
           3       0.70      0.46      0.55       210
           4       0.21      0.80      0.33       210
           5       0.00      0.00      0.00       210
           6       0.35      0.11      0.17       210
           7       0.46      0.58      0.52       210

    accuracy                           0.37      1470
   macro avg       0.45      0.37      0.35      1470
weighted avg       0.45      0.37      0.35      1470

Decision Tree with 5 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4489795918367347
Confusion Matrix of Decision Tree is:
 [[ 60   5   0  61  64  11   9]
 [  1  87  10  57  47   2   6]
 [  2   6 126  58  13   4   1]
 [  7  12   1 123  52   3  12]
 [ 14   3   1  42 135   4  11]
 [ 16  12   3  94  24  20  41]
 [  5   3   1  61  27   4 109]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.57      0.29      0.38       210
           2       0.68      0.41      0.51       210
           3       0.89      0.60      0.72       210
           4       0.25      0.59      0.35       210
           5       0.37      0.64      0.47       210
           6       0.42      0.10      0.16       210
           7       0.58      0.52      0.55       210

    accuracy                           0.45      1470
   macro avg       0.54      0.45      0.45      1470
weighted avg       0.54      0.45      0.45      1470

Decision Tree with 6 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.46258503401360546
Confusion Matrix of Decision Tree is:
 [[ 99  12   7  20  30  31  11]
 [ 12 109  25  18  19  13  14]
 [  4  11 132  10  34  17   2]
 [ 18  21  13  71  29  37  21]
 [ 27  20   7  24  94  19  19]
 [ 21  23   9  26  14  62  55]
 [ 12   8   4  13  19  41 113]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.51      0.47      0.49       210
           2       0.53      0.52      0.53       210
           3       0.67      0.63      0.65       210
           4       0.39      0.34      0.36       210
           5       0.39      0.45      0.42       210
           6       0.28      0.30      0.29       210
           7       0.48      0.54      0.51       210

    accuracy                           0.46      1470
   macro avg       0.47      0.46      0.46      1470
weighted avg       0.47      0.46      0.46      1470

Decision Tree with 14 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.3795918367346939
Confusion Matrix of Random Forest is:
 [[ 16   2  39   3  86   0  64]
 [  0  27  98   2  56   0  27]
 [  0   4 178   4  14   0  10]
 [  0   4  34  13  48   0 111]
 [  1   6   9   0 129   0  65]
 [  0   5  26   6  37   0 136]
 [  0   2   2   2   9   0 195]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.94      0.08      0.14       210
           2       0.54      0.13      0.21       210
           3       0.46      0.85      0.60       210
           4       0.43      0.06      0.11       210
           5       0.34      0.61      0.44       210
           6       0.00      0.00      0.00       210
           7       0.32      0.93      0.48       210

    accuracy                           0.38      1470
   macro avg       0.43      0.38      0.28      1470
weighted avg       0.43      0.38      0.28      1470

Random Forest with 2 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.4714285714285714
Confusion Matrix of Random Forest is:
 [[ 56   4  13   4  90   0  43]
 [  0  99  33   3  60   0  15]
 [  1  11 173   1  15   1   8]
 [  3  18  22  29  67   0  71]
 [  3   9   3   1 149   0  45]
 [  6   8  21  15  46   4 110]
 [  0   4   0   9  14   0 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.81      0.27      0.40       210
           2       0.65      0.47      0.55       210
           3       0.65      0.82      0.73       210
           4       0.47      0.14      0.21       210
           5       0.34      0.71      0.46       210
           6       0.80      0.02      0.04       210
           7       0.39      0.87      0.53       210

    accuracy                           0.47      1470
   macro avg       0.59      0.47      0.42      1470
weighted avg       0.59      0.47      0.42      1470

Random Forest with 3 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6551020408163265
Confusion Matrix of Random Forest is:
 [[116   1   0   7  44  28  14]
 [  0 148   8   8  32  10   4]
 [  2   5 175  13   5  10   0]
 [  6   5   8 129  15  29  18]
 [ 16  10   1  12 138   5  28]
 [  9   8   7  21   8  80  77]
 [  0   4   0   7   5  17 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.55      0.65       210
           2       0.82      0.70      0.76       210
           3       0.88      0.83      0.86       210
           4       0.65      0.61      0.63       210
           5       0.56      0.66      0.60       210
           6       0.45      0.38      0.41       210
           7       0.56      0.84      0.67       210

    accuracy                           0.66      1470
   macro avg       0.67      0.66      0.65      1470
weighted avg       0.67      0.66      0.65      1470

Random Forest with 11 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6714285714285714
Confusion Matrix of Random Forest is:
 [[121   0   0  10  41  28  10]
 [  0 159   8   6  24  10   3]
 [  1   5 180  11   5   8   0]
 [  5   7   6 135  16  25  16]
 [ 22  13   0  15 131   6  23]
 [  6  13   6  24   6  89  66]
 [  1   4   0   3   7  23 172]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.58      0.66       210
           2       0.79      0.76      0.77       210
           3       0.90      0.86      0.88       210
           4       0.66      0.64      0.65       210
           5       0.57      0.62      0.60       210
           6       0.47      0.42      0.45       210
           7       0.59      0.82      0.69       210

    accuracy                           0.67      1470
   macro avg       0.68      0.67      0.67      1470
weighted avg       0.68      0.67      0.67      1470

Random Forest with 19 max_dept

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7816326530612245
Confusion Matrix of Logistic Regression is:
 [[162   1   0   1  36   7   3]
 [  2 168   6   9  20   4   1]
 [  0   2 195   5   2   6   0]
 [  2   7   2 164  12  21   2]
 [ 22   7   3  15 151   5   7]
 [  4   9   5  16   6 138  32]
 [  0   2   0   0   6  31 171]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.84      0.77      0.81       210
           2       0.86      0.80      0.83       210
           3       0.92      0.93      0.93       210
           4       0.78      0.78      0.78       210
           5       0.65      0.72      0.68       210
           6       0.65      0.66      0.65       210
           7       0.79      0.81      0.80       210

    accuracy                           0.78      1470
   macro avg       0.79      0.78      0.78      1470
weighted avg       0.79      0.78      0.78      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.49863945578231295
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 80   0  11   3  62  25  29]
 [  0 113  10   9  47  25   6]
 [  8  11 156   5   7  22   1]
 [  4  10  15  68  46  27  40]
 [ 18  10   1   8 126   8  39]
 [  5   9  13  25  32  44  82]
 [  0   4   0  20  23  17 146]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.70      0.38      0.49       210
           2       0.72      0.54      0.62       210
           3       0.76      0.74      0.75       210
           4       0.49      0.32      0.39       210
           5       0.37      0.60      0.46       210
           6       0.26      0.21      0.23       210
           7       0.43      0.70      0.53       210

    accuracy                           0.50      1470
   macro avg       0.53      0.50      0.50      1470
weighted avg       0.53      0.50      0.50      1470

Worki

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3272108843537415
Confusion Matrix of Decision Tree is:
 [[  0   0  21   0 144   0  45]
 [  0   0  20   0 159   0  31]
 [  0   0 156   0  33   0  21]
 [  0   0  14   0 125   0  71]
 [  0   0   8   0 162   0  40]
 [  0   0  16   0  71   0 123]
 [  0   0   2   0  45   0 163]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.66      0.74      0.70       210
           4       0.00      0.00      0.00       210
           5       0.22      0.77      0.34       210
           6       0.00      0.00      0.00       210
           7       0.33      0.78      0.46       210

    accuracy                           0.33      1470
   macro avg       0.17      0.33      0.21      1470
weighted avg       0.17      0.33      0.21      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.38299319727891157
Confusion Matrix of Decision Tree is:
 [[ 14   6   5  26 140   0  19]
 [  0  70  16  13  93   0  18]
 [  2  15 152  18  20   0   3]
 [  1   9  13  38 116   0  33]
 [  1   8   7  24 154   0  16]
 [  1   4  15  33  67   0  90]
 [  1   2   1  28  43   0 135]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.07      0.12       210
           2       0.61      0.33      0.43       210
           3       0.73      0.72      0.73       210
           4       0.21      0.18      0.19       210
           5       0.24      0.73      0.37       210
           6       0.00      0.00      0.00       210
           7       0.43      0.64      0.52       210

    accuracy                           0.38      1470
   macro avg       0.42      0.38      0.34      1470
weighted avg       0.42      0.38      0.34      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40476190476190477
Confusion Matrix of Decision Tree is:
 [[ 83   1   7   4  72  40   3]
 [ 11  62  14   3  84  32   4]
 [  3   4 158  10  18  16   1]
 [  9   4  12  18 107  44  16]
 [ 51   8   4   8 104  29   6]
 [  3   1   9   7  64  82  44]
 [  6   0   2   4  38  72  88]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.40      0.44       210
           2       0.78      0.30      0.43       210
           3       0.77      0.75      0.76       210
           4       0.33      0.09      0.14       210
           5       0.21      0.50      0.30       210
           6       0.26      0.39      0.31       210
           7       0.54      0.42      0.47       210

    accuracy                           0.40      1470
   macro avg       0.48      0.40      0.41      1470
weighted avg       0.48      0.40      0.41      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5272108843537415
Confusion Matrix of Decision Tree is:
 [[106  14   5  17  38  21   9]
 [ 11 118   9  19  28  20   5]
 [  7   6 169  13   3  11   1]
 [ 14  24   6  83  38  34  11]
 [ 20  24   7  24 110  17   8]
 [ 19  23  12  32  19  62  43]
 [ 11  12   5  12  10  33 127]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.56      0.50      0.53       210
           2       0.53      0.56      0.55       210
           3       0.79      0.80      0.80       210
           4       0.41      0.40      0.40       210
           5       0.45      0.52      0.48       210
           6       0.31      0.30      0.30       210
           7       0.62      0.60      0.61       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5129251700680272
Confusion Matrix of Decision Tree is:
 [[105  12   4  20  33  25  11]
 [ 11 123   6  22  25  17   6]
 [  6   7 167  15   7   6   2]
 [ 11  24   5  82  30  38  20]
 [ 34  34   6  25  84  14  13]
 [ 15  24  10  34  19  62  46]
 [  7   7   3  13  11  38 131]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.56      0.50      0.53       210
           2       0.53      0.59      0.56       210
           3       0.83      0.80      0.81       210
           4       0.39      0.39      0.39       210
           5       0.40      0.40      0.40       210
           6       0.31      0.30      0.30       210
           7       0.57      0.62      0.60       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Random Forest with 1 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6523809523809524
Confusion Matrix of Random Forest is:
 [[109   0   0   6  51  30  14]
 [  0 152   5  11  28  10   4]
 [  4  10 174   5   5  11   1]
 [  1   7   4 125  18  36  19]
 [ 12  12   1  11 136  12  26]
 [  3   7   5  18  15  73  89]
 [  0   2   0   1   5  12 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.84      0.52      0.64       210
           2       0.80      0.72      0.76       210
           3       0.92      0.83      0.87       210
           4       0.71      0.60      0.65       210
           5       0.53      0.65      0.58       210
           6       0.40      0.35      0.37       210
           7       0.55      0.90      0.69       210

    accuracy                           0.65      1470
   macro avg       0.68      0.65      0.65      1470
weighted avg       0.68      0.65      0.65      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6863945578231293
Confusion Matrix of Random Forest is:
 [[122   0   0  11  45  22  10]
 [  0 163   5  10  18   7   7]
 [  2   7 174  12   2  12   1]
 [  2   8   6 131  18  33  12]
 [ 14  11   2  18 139   9  17]
 [  4   7   4  19  10  90  76]
 [  0   1   0   0   3  16 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.85      0.58      0.69       210
           2       0.83      0.78      0.80       210
           3       0.91      0.83      0.87       210
           4       0.65      0.62      0.64       210
           5       0.59      0.66      0.62       210
           6       0.48      0.43      0.45       210
           7       0.61      0.90      0.73       210

    accuracy                           0.69      1470
   macro avg       0.70      0.69      0.69      1470
weighted avg       0.70      0.69      0.69      1470

Random Forest with 17 max_dept

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//xlm_base_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.5129251700680272
Confusion Matrix of Logistic Regression is:
 [[105   9   2  14  43  17  20]
 [  5 123  14  13  33  14   8]
 [  6  11 168  10  10   5   0]
 [ 18  12   9  92  26  33  20]
 [ 31  25   6  35  80  20  13]
 [ 17  17   7  23  16  71  59]
 [ 19   3   3  21  12  37 115]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.52      0.50      0.51       210
           2       0.61      0.59      0.60       210
           3       0.80      0.80      0.80       210
           4       0.44      0.44      0.44       210
           5       0.36      0.38      0.37       210
           6       0.36      0.34      0.35       210
           7       0.49      0.55      0.52       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.3979591836734694
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 52  32   1  50  41   7  27]
 [ 16 119  10  27  20   3  15]
 [  7  40 140  18   2   1   2]
 [ 11  27   5  75  40  12  40]
 [ 12  40   3  44  72   3  36]
 [ 14  32   4  50  23  24  63]
 [  5   5   0  48  29  20 103]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.44      0.25      0.32       210
           2       0.40      0.57      0.47       210
           3       0.86      0.67      0.75       210
           4       0.24      0.36      0.29       210
           5       0.32      0.34      0.33       210
           6       0.34      0.11      0.17       210
           7       0.36      0.49      0.42       210

    accuracy                           0.40      1470
   macro avg       0.42      0.40      0.39      1470
weighted avg       0.42      0.40      0.39      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.24693877551020407
Confusion Matrix of Decision Tree is:
 [[  7   1   2   0 200   0   0]
 [  5  30   3   0 172   0   0]
 [  0   0 122   0  88   0   0]
 [  1   1   4   0 204   0   0]
 [  2   3   1   0 204   0   0]
 [  2   4   3   0 201   0   0]
 [  0   4   0   0 206   0   0]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.41      0.03      0.06       210
           2       0.70      0.14      0.24       210
           3       0.90      0.58      0.71       210
           4       0.00      0.00      0.00       210
           5       0.16      0.97      0.27       210
           6       0.00      0.00      0.00       210
           7       0.00      0.00      0.00       210

    accuracy                           0.25      1470
   macro avg       0.31      0.25      0.18      1470
weighted avg       0.31      0.25      0.18      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.27006802721088435
Confusion Matrix of Decision Tree is:
 [[ 31   2   0   0   0   2 175]
 [  4  34   1   0   0   2 169]
 [  3   0 120   0   0   2  85]
 [  0   1   3   0   0   1 205]
 [  1   4   0   0   0   1 204]
 [  2   3   1   0   0   2 202]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.76      0.15      0.25       210
           2       0.77      0.16      0.27       210
           3       0.96      0.57      0.72       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.20      0.01      0.02       210
           7       0.17      1.00      0.29       210

    accuracy                           0.27      1470
   macro avg       0.41      0.27      0.22      1470
weighted avg       0.41      0.27      0.22      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.2816326530612245
Confusion Matrix of Decision Tree is:
 [[ 30   1  12   0   0   3 164]
 [  3  33  20   0   0   2 152]
 [  0   0 138   0   0   4  68]
 [  0   0  11   3   0   1 195]
 [  2   1   6   0   0   3 198]
 [  4   2  13   0   0   3 188]
 [  1   0   2   0   0   0 207]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.75      0.14      0.24       210
           2       0.89      0.16      0.27       210
           3       0.68      0.66      0.67       210
           4       1.00      0.01      0.03       210
           5       0.00      0.00      0.00       210
           6       0.19      0.01      0.03       210
           7       0.18      0.99      0.30       210

    accuracy                           0.28      1470
   macro avg       0.53      0.28      0.22      1470
weighted avg       0.53      0.28      0.22      1470

Decision Tree with 5 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3040816326530612
Confusion Matrix of Decision Tree is:
 [[ 31  11   2 111   0   1  54]
 [  1  47   8 113   0   1  40]
 [  0  11 131  56   0   0  12]
 [  0   7   4 115   0   0  84]
 [  1   8   1 121   0   0  79]
 [  2  12   4 110   0   4  78]
 [  1   1   1  88   0   0 119]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.86      0.15      0.25       210
           2       0.48      0.22      0.31       210
           3       0.87      0.62      0.73       210
           4       0.16      0.55      0.25       210
           5       0.00      0.00      0.00       210
           6       0.67      0.02      0.04       210
           7       0.26      0.57      0.35       210

    accuracy                           0.30      1470
   macro avg       0.47      0.30      0.27      1470
weighted avg       0.47      0.30      0.27      1470

Decision Tree with 6 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.32108843537414966
Confusion Matrix of Decision Tree is:
 [[ 22  10   3 117   1   3  54]
 [  3  77   3  83   2   2  40]
 [  0  14 135  47   2   0  12]
 [  0  10   2 111   1   3  83]
 [  0  10   1 116   3   1  79]
 [  2  10   4 111   0   5  78]
 [  0   4   1  86   0   0 119]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.81      0.10      0.19       210
           2       0.57      0.37      0.45       210
           3       0.91      0.64      0.75       210
           4       0.17      0.53      0.25       210
           5       0.33      0.01      0.03       210
           6       0.36      0.02      0.04       210
           7       0.26      0.57      0.35       210

    accuracy                           0.32      1470
   macro avg       0.49      0.32      0.29      1470
weighted avg       0.49      0.32      0.29      1470

Decision Tree with 7 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.34625850340136055
Confusion Matrix of Decision Tree is:
 [[ 55  16   8  22  37  30  42]
 [ 11  85  10  17  28  33  26]
 [  7   7 149   9  14  20   4]
 [ 28  13   5  42  31  27  64]
 [ 19  21   0  27  57  28  58]
 [ 29  20   5  33  31  37  55]
 [ 20   8   5  28  26  39  84]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.33      0.26      0.29       210
           2       0.50      0.40      0.45       210
           3       0.82      0.71      0.76       210
           4       0.24      0.20      0.22       210
           5       0.25      0.27      0.26       210
           6       0.17      0.18      0.17       210
           7       0.25      0.40      0.31       210

    accuracy                           0.35      1470
   macro avg       0.37      0.35      0.35      1470
weighted avg       0.37      0.35      0.35      1470

Decision Tree with 15 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.336734693877551
Confusion Matrix of Random Forest is:
 [[ 61  23   4  10   7   2 103]
 [ 30 102   6  10   4   6  52]
 [  9  34 147   5   0   3  12]
 [ 29  32   7  14   3   7 118]
 [ 23  35   4   8   4   4 132]
 [ 20  39   8   9   6   6 122]
 [ 20  14   1   5   3   6 161]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.32      0.29      0.30       210
           2       0.37      0.49      0.42       210
           3       0.83      0.70      0.76       210
           4       0.23      0.07      0.10       210
           5       0.15      0.02      0.03       210
           6       0.18      0.03      0.05       210
           7       0.23      0.77      0.35       210

    accuracy                           0.34      1470
   macro avg       0.33      0.34      0.29      1470
weighted avg       0.33      0.34      0.29      1470

Random Forest with 3 max_depth


Accuracy of Random Forest after Standard Scaling is: 0.43945578231292515
Confusion Matrix of Random Forest is:
 [[ 67   7   1  44  47  17  27]
 [  5 107   2  39  30  14  13]
 [  1  22 156  18   5   7   1]
 [  5  10   4  79  45  28  39]
 [  8  18   2  41  90  15  36]
 [  3  23   0  43  31  40  70]
 [  8   2   0  34  33  26 107]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.32      0.44       210
           2       0.57      0.51      0.54       210
           3       0.95      0.74      0.83       210
           4       0.27      0.38      0.31       210
           5       0.32      0.43      0.37       210
           6       0.27      0.19      0.22       210
           7       0.37      0.51      0.43       210

    accuracy                           0.44      1470
   macro avg       0.49      0.44      0.45      1470
weighted avg       0.49      0.44      0.45      1470

Random Forest with 11 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.4530612244897959
Confusion Matrix of Random Forest is:
 [[ 79   8   0  31  48  15  29]
 [  7 114   3  21  37  20   8]
 [  5  20 158  12   4   8   3]
 [ 15  13   5  71  35  38  33]
 [ 17  17   4  31  91  15  35]
 [ 10  23   1  34  28  55  59]
 [  4   3   0  31  37  37  98]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.38      0.46       210
           2       0.58      0.54      0.56       210
           3       0.92      0.75      0.83       210
           4       0.31      0.34      0.32       210
           5       0.33      0.43      0.37       210
           6       0.29      0.26      0.28       210
           7       0.37      0.47      0.41       210

    accuracy                           0.45      1470
   macro avg       0.48      0.45      0.46      1470
weighted avg       0.48      0.45      0.46      1470

Random Forest with 19 max_dept