In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Nisha//Input//Nisha_dataset_labels.csv")

In [2]:
# Function of Train-test split, MinMax Scaling
def minmax_scaling(x_data, y_data):
    x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=0.30,random_state=21,stratify=y_data)
    # MinMax scaling of train data
    minmax_model = MinMaxScaler(feature_range=(0,5))
    np.set_printoptions(precision=3)
    scaled_data_train = minmax_model.fit_transform(x_train)
    # MinMax scaling of test data
    scaled_data_test = minmax_model.fit_transform(x_test)
    return scaled_data_train, scaled_data_test, y_train, y_test

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" after Standard Scaling is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tfidf_500_vectors.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.708843537414966
Confusion Matrix of Logistic Regression is:
 [[163   3   0   3  25  15   1]
 [  5 152   9  12  16  16   0]
 [  0  11 172  18   3   6   0]
 [  6  12  16 128  27  20   1]
 [ 21  17   3  19 138   7   5]
 [  7  19   7  21   2 119  35]
 [  4   1   0   2   1  32 170]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.79      0.78      0.78       210
           2       0.71      0.72      0.72       210
           3       0.83      0.82      0.82       210
           4       0.63      0.61      0.62       210
           5       0.65      0.66      0.65       210
           6       0.55      0.57      0.56       210
           7       0.80      0.81      0.81       210

    accuracy                           0.71      1470
   macro avg       0.71      0.71      0.71      1470
weighted avg       0.71      0.71      0.71      1470

KNN with 3 Ne

Accuracy of SVM after Standard Scaling is: 0.6904761904761905
Confusion Matrix of SVM is:
 [[166   2   0   2  25  15   0]
 [  6 150  16  12   8  18   0]
 [  0   7 173  20   2   8   0]
 [  6  15  19 127  21  20   2]
 [ 36  23   3  19 120   4   5]
 [ 10  16  14  24   4 116  26]
 [  7   2   1   2   1  34 163]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.72      0.79      0.75       210
           2       0.70      0.71      0.71       210
           3       0.77      0.82      0.79       210
           4       0.62      0.60      0.61       210
           5       0.66      0.57      0.61       210
           6       0.54      0.55      0.55       210
           7       0.83      0.78      0.80       210

    accuracy                           0.69      1470
   macro avg       0.69      0.69      0.69      1470
weighted avg       0.69      0.69      0.69      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40748299319727893
Confusion Matrix of Decision Tree is:
 [[ 98  44   0   0  68   0   0]
 [  1 204   3   0   1   0   1]
 [  0 127  83   0   0   0   0]
 [  0 201   2   0   6   0   1]
 [ 17  76   1   0 113   0   3]
 [  0 197   0   0   3   0  10]
 [  0 108   0   0   0   1 101]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.84      0.47      0.60       210
           2       0.21      0.97      0.35       210
           3       0.93      0.40      0.56       210
           4       0.00      0.00      0.00       210
           5       0.59      0.54      0.56       210
           6       0.00      0.00      0.00       210
           7       0.87      0.48      0.62       210

    accuracy                           0.41      1470
   macro avg       0.49      0.41      0.38      1470
weighted avg       0.49      0.41      0.38      1470

Decision Tree with 6 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.47006802721088436
Confusion Matrix of Decision Tree is:
 [[123   1   0   0  43  43   0]
 [  1  74  29   1   1 104   0]
 [  0   0 118   0   0  92   0]
 [  3   1  15   0   3 187   1]
 [ 45  19   9   3  82  49   3]
 [  1   0   2   0   4 195   8]
 [  0   1   0   0   1 109  99]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.71      0.59      0.64       210
           2       0.77      0.35      0.48       210
           3       0.68      0.56      0.62       210
           4       0.00      0.00      0.00       210
           5       0.61      0.39      0.48       210
           6       0.25      0.93      0.39       210
           7       0.89      0.47      0.62       210

    accuracy                           0.47      1470
   macro avg       0.56      0.47      0.46      1470
weighted avg       0.56      0.47      0.46      1470

Decision Tree with 9 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5571428571428572
Confusion Matrix of Decision Tree is:
 [[156   0   0   0  22  32   0]
 [  2 104  13   2   6  82   1]
 [  0   9 140   0   0  61   0]
 [  5  16  11  44   3 131   0]
 [ 55  22   5   7  87  31   3]
 [  2   5   2   1   3 189   8]
 [  0   1   0   0   2 108  99]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.71      0.74      0.73       210
           2       0.66      0.50      0.57       210
           3       0.82      0.67      0.73       210
           4       0.81      0.21      0.33       210
           5       0.71      0.41      0.52       210
           6       0.30      0.90      0.45       210
           7       0.89      0.47      0.62       210

    accuracy                           0.56      1470
   macro avg       0.70      0.56      0.56      1470
weighted avg       0.70      0.56      0.56      1470

Decision Tree with 18 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6299319727891156
Confusion Matrix of Random Forest is:
 [[150   1   1  11  18  28   1]
 [  1 118  20  24   8  38   1]
 [  0   0 154   6   0  50   0]
 [  3  13  11 130  11  41   1]
 [ 56  28   4  21  89   7   5]
 [  4  11   2  39   2 126  26]
 [  0   5   0   9   1  36 159]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.70      0.71      0.71       210
           2       0.67      0.56      0.61       210
           3       0.80      0.73      0.77       210
           4       0.54      0.62      0.58       210
           5       0.69      0.42      0.53       210
           6       0.39      0.60      0.47       210
           7       0.82      0.76      0.79       210

    accuracy                           0.63      1470
   macro avg       0.66      0.63      0.64      1470
weighted avg       0.66      0.63      0.64      1470

Random Forest with 6 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6632653061224489
Confusion Matrix of Random Forest is:
 [[157   1   1   3  21  27   0]
 [  1 137  13  16   5  37   1]
 [  0  10 146   5   0  49   0]
 [  4  15  11 122   8  48   2]
 [ 54  25   2  20  94   9   6]
 [  3  10   1  19   1 152  24]
 [  0   0   0   1   2  40 167]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.72      0.75      0.73       210
           2       0.69      0.65      0.67       210
           3       0.84      0.70      0.76       210
           4       0.66      0.58      0.62       210
           5       0.72      0.45      0.55       210
           6       0.42      0.72      0.53       210
           7       0.83      0.80      0.81       210

    accuracy                           0.66      1470
   macro avg       0.70      0.66      0.67      1470
weighted avg       0.70      0.66      0.67      1470

Random Forest with 14 max_dept

In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//cv_500_vectors.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6877551020408164
Confusion Matrix of Logistic Regression is:
 [[159   2   0   5  27  17   0]
 [  5 143  11  15  18  18   0]
 [  0   6 179  13   6   6   0]
 [  5   7  18 123  32  22   3]
 [ 43  17   3  15 124   5   3]
 [  8  17   4  33   4 111  33]
 [  3   1   1   1   1  31 172]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.71      0.76      0.73       210
           2       0.74      0.68      0.71       210
           3       0.83      0.85      0.84       210
           4       0.60      0.59      0.59       210
           5       0.58      0.59      0.59       210
           6       0.53      0.53      0.53       210
           7       0.82      0.82      0.82       210

    accuracy                           0.69      1470
   macro avg       0.69      0.69      0.69      1470
weighted avg       0.69      0.69      0.69      1470

KNN with 3 N

Confusion Matrix of Bernoulli Naive Bayes is:
 [[163   4  12   6  22   2   1]
 [  1 150  31  19   5   4   0]
 [  0   8 197   5   0   0   0]
 [  6   9  36 131  18  10   0]
 [ 34  18   8  22 121   4   3]
 [  6  28  47  27   2  75  25]
 [  1   1   5   0   2  22 179]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.77      0.78      0.77       210
           2       0.69      0.71      0.70       210
           3       0.59      0.94      0.72       210
           4       0.62      0.62      0.62       210
           5       0.71      0.58      0.64       210
           6       0.64      0.36      0.46       210
           7       0.86      0.85      0.86       210

    accuracy                           0.69      1470
   macro avg       0.70      0.69      0.68      1470
weighted avg       0.70      0.69      0.68      1470

Working on SVM Kernal: linear
Accuracy of SVM after Standard Scaling is: 0.69387755102

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[148   1   0   0  18  43   0]
 [  1  50  33   0   1 124   1]
 [  0   0 123   0   0  87   0]
 [  5   3  17   0   1 183   1]
 [ 73  18  14   1  56  46   2]
 [  2   2   2   0   1 195   8]
 [  0   1   3   0   0 131  75]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.65      0.70      0.67       210
           2       0.67      0.24      0.35       210
           3       0.64      0.59      0.61       210
           4       0.00      0.00      0.00       210
           5       0.73      0.27      0.39       210
           6       0.24      0.93      0.38       210
           7       0.86      0.36      0.51       210

    accuracy                           0.44      1470
   macro avg       0.54      0.44      0.42      1470
weighted avg       0.54      0.44      0.42      1470

Decision Tree with 6 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.4557823129251701

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[145   1   0   0  21  43   0]
 [  1  83  27   2   4  92   1]
 [  0   0 123   0   0  87   0]
 [  4   6  17  22   2 158   1]
 [ 58  14  12   3  81  41   1]
 [  3   3   2   0   0 191  11]
 [  0   1   3   0   0 105 101]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.69      0.69      0.69       210
           2       0.77      0.40      0.52       210
           3       0.67      0.59      0.62       210
           4       0.81      0.10      0.19       210
           5       0.75      0.39      0.51       210
           6       0.27      0.91      0.41       210
           7       0.88      0.48      0.62       210

    accuracy                           0.51      1470
   macro avg       0.69      0.51      0.51      1470
weighted avg       0.69      0.51      0.51      1470

Decision Tree with 9 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.5142857142857142

Accuracy of Decision Tree after Standard Scaling is: 0.5517006802721088
Confusion Matrix of Decision Tree is:
 [[151   1   0   0  24  34   0]
 [  6  94  30   3   6  70   1]
 [  0   0 147   0   2  61   0]
 [  5   3  23  47   7 124   1]
 [ 57  18  12   8  85  27   3]
 [  3   0   8   3   1 182  13]
 [  0   1   5   0   0  99 105]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.68      0.72      0.70       210
           2       0.80      0.45      0.57       210
           3       0.65      0.70      0.68       210
           4       0.77      0.22      0.35       210
           5       0.68      0.40      0.51       210
           6       0.30      0.87      0.45       210
           7       0.85      0.50      0.63       210

    accuracy                           0.55      1470
   macro avg       0.68      0.55      0.56      1470
weighted avg       0.68      0.55      0.56      1470

Decision Tree with 18 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5863945578231292
Confusion Matrix of Random Forest is:
 [[155   2   4   4  19  25   1]
 [  1 104  37  23   3  41   1]
 [  0   0 147   5   0  58   0]
 [  4   9  18 106  13  58   2]
 [ 72  26  21  18  59   8   6]
 [  2   9   7  30   3 135  24]
 [  0   4   3   1   0  46 156]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.66      0.74      0.70       210
           2       0.68      0.50      0.57       210
           3       0.62      0.70      0.66       210
           4       0.57      0.50      0.53       210
           5       0.61      0.28      0.38       210
           6       0.36      0.64      0.46       210
           7       0.82      0.74      0.78       210

    accuracy                           0.59      1470
   macro avg       0.62      0.59      0.58      1470
weighted avg       0.62      0.59      0.58      1470

Random Forest with 6 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6183673469387755
Confusion Matrix of Random Forest is:
 [[157   2   0   3  21  26   1]
 [  1 106  38  24   2  38   1]
 [  0   1 153  12   0  44   0]
 [  3  13  19 115   5  54   1]
 [ 58  28  11  18  83   7   5]
 [  3   8   7  31   0 137  24]
 [  0   3   3   1   0  45 158]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.75      0.73       210
           2       0.66      0.50      0.57       210
           3       0.66      0.73      0.69       210
           4       0.56      0.55      0.56       210
           5       0.75      0.40      0.52       210
           6       0.39      0.65      0.49       210
           7       0.83      0.75      0.79       210

    accuracy                           0.62      1470
   macro avg       0.65      0.62      0.62      1470
weighted avg       0.65      0.62      0.62      1470

Random Forest with 14 max_dept

In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tf_500_vectors.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7020408163265306
Confusion Matrix of Logistic Regression is:
 [[157   3   0   5  28  17   0]
 [  5 153   8  12  15  17   0]
 [  0  11 173  19   2   5   0]
 [  5  12  17 127  28  20   1]
 [ 24  17   3  17 137   7   5]
 [  5  21   5  27   2 115  35]
 [  5   1   1   2   1  30 170]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.78      0.75      0.76       210
           2       0.70      0.73      0.71       210
           3       0.84      0.82      0.83       210
           4       0.61      0.60      0.61       210
           5       0.64      0.65      0.65       210
           6       0.55      0.55      0.55       210
           7       0.81      0.81      0.81       210

    accuracy                           0.70      1470
   macro avg       0.70      0.70      0.70      1470
weighted avg       0.70      0.70      0.70      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.691156462585034
Confusion Matrix of SVM is:
 [[159   3   0   3  27  18   0]
 [  6 153  12  12   7  20   0]
 [  0   9 178  14   1   8   0]
 [  9  19  21 118  19  23   1]
 [ 30  22   5  18 126   6   3]
 [ 13  21  10  15   6 117  28]
 [  8   2   2   2   1  30 165]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.71      0.76      0.73       210
           2       0.67      0.73      0.70       210
           3       0.78      0.85      0.81       210
           4       0.65      0.56      0.60       210
           5       0.67      0.60      0.63       210
           6       0.53      0.56      0.54       210
           7       0.84      0.79      0.81       210

    accuracy                           0.69      1470
   macro avg       0.69      0.69      0.69      1470
weighted avg       0.69      0.69      0.69      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard Sc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[128   1   0   0  37  44   0]
 [  1  24   3   0   1 180   1]
 [  0  15  93   0   0 102   0]
 [  3   6   2   0   3 195   1]
 [ 50   4   1   0  79  73   3]
 [  1   1   0   0   2 196  10]
 [  0   0   0   0   0 109 101]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.61      0.65       210
           2       0.47      0.11      0.18       210
           3       0.94      0.44      0.60       210
           4       0.00      0.00      0.00       210
           5       0.65      0.38      0.48       210
           6       0.22      0.93      0.35       210
           7       0.87      0.48      0.62       210

    accuracy                           0.42      1470
   macro avg       0.55      0.42      0.41      1470
weighted avg       0.55      0.42      0.41      1470

Decision Tree with 6 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.4421768707482993

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[123   1   0   2  40  44   0]
 [  1  75   5   0   1 127   1]
 [  0  12 110   0   0  88   0]
 [  3   6   2   0   3 195   1]
 [ 35  17   1   2  92  61   2]
 [  2   1   0   0   1 197   9]
 [  0   0   0   0   0 110 100]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.75      0.59      0.66       210
           2       0.67      0.36      0.47       210
           3       0.93      0.52      0.67       210
           4       0.00      0.00      0.00       210
           5       0.67      0.44      0.53       210
           6       0.24      0.94      0.38       210
           7       0.88      0.48      0.62       210

    accuracy                           0.47      1470
   macro avg       0.59      0.47      0.48      1470
weighted avg       0.59      0.47      0.48      1470

Decision Tree with 9 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.4904761904761904

Accuracy of Decision Tree after Standard Scaling is: 0.5639455782312925
Confusion Matrix of Decision Tree is:
 [[149   1   0   1  26  33   0]
 [  2 110  11   1   8  77   1]
 [  0  10 139   0   2  59   0]
 [  4  17  10  42   9 127   1]
 [ 46  21   5   4 102  28   4]
 [  1   6   1   2   3 189   8]
 [  0   1   0   0   3 108  98]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.74      0.71      0.72       210
           2       0.66      0.52      0.59       210
           3       0.84      0.66      0.74       210
           4       0.84      0.20      0.32       210
           5       0.67      0.49      0.56       210
           6       0.30      0.90      0.45       210
           7       0.88      0.47      0.61       210

    accuracy                           0.56      1470
   macro avg       0.70      0.56      0.57      1470
weighted avg       0.70      0.56      0.57      1470

Decision Tree with 18 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6340136054421769
Confusion Matrix of Random Forest is:
 [[155   1   2   3  24  24   1]
 [  1 113  30  23   2  40   1]
 [  0   2 150   6   0  52   0]
 [  4  12  13 118  11  50   2]
 [ 58  21   7  18  91   9   6]
 [  3   9   3  28   3 137  27]
 [  0   3   0   0   1  38 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.70      0.74      0.72       210
           2       0.70      0.54      0.61       210
           3       0.73      0.71      0.72       210
           4       0.60      0.56      0.58       210
           5       0.69      0.43      0.53       210
           6       0.39      0.65      0.49       210
           7       0.82      0.80      0.81       210

    accuracy                           0.63      1470
   macro avg       0.66      0.63      0.64      1470
weighted avg       0.66      0.63      0.64      1470

Random Forest with 6 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6564625850340136
Confusion Matrix of Random Forest is:
 [[152   1   1   4  27  24   1]
 [  1 118  26  23   3  38   1]
 [  0   2 159   5   0  44   0]
 [  3  14  13 116  11  51   2]
 [ 43  24   7  21 103   7   5]
 [  2   7   4  18   4 148  27]
 [  0   1   0   0   2  38 169]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.76      0.72      0.74       210
           2       0.71      0.56      0.63       210
           3       0.76      0.76      0.76       210
           4       0.62      0.55      0.58       210
           5       0.69      0.49      0.57       210
           6       0.42      0.70      0.53       210
           7       0.82      0.80      0.81       210

    accuracy                           0.66      1470
   macro avg       0.68      0.66      0.66      1470
weighted avg       0.68      0.66      0.66      1470

Random Forest with 14 max_dept

### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6482993197278911
Confusion Matrix of Logistic Regression is:
 [[ 99  18   3  41  36  13   0]
 [  1 184   1  18   5   0   1]
 [  0  23 171  15   0   1   0]
 [  0  29   2 176   2   1   0]
 [  6  35   1  63 102   1   2]
 [  1  62   1  60   6  55  25]
 [  1  10   0  17   4  12 166]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.92      0.47      0.62       210
           2       0.51      0.88      0.64       210
           3       0.96      0.81      0.88       210
           4       0.45      0.84      0.59       210
           5       0.66      0.49      0.56       210
           6       0.66      0.26      0.38       210
           7       0.86      0.79      0.82       210

    accuracy                           0.65      1470
   macro avg       0.72      0.65      0.64      1470
weighted avg       0.72      0.65      0.64      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.21564625850340136
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 10   1 165   9   7  13   5]
 [  4   9 170   9   5  13   0]
 [  2   4 185   1   2  14   2]
 [  3   8 146  20   9  15   9]
 [  8   4 158  12  11   9   8]
 [  8   7 118  15   8  39  15]
 [  3   2 129   6   2  25  43]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.26      0.05      0.08       210
           2       0.26      0.04      0.07       210
           3       0.17      0.88      0.29       210
           4       0.28      0.10      0.14       210
           5       0.25      0.05      0.09       210
           6       0.30      0.19      0.23       210
           7       0.52      0.20      0.29       210

    accuracy                           0.22      1470
   macro avg       0.29      0.22      0.17      1470
weighted avg       0.29      0.22      0.17      1470

Worki

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.391156462585034
Confusion Matrix of Decision Tree is:
 [[  0   1   3   0 159   0  47]
 [  0  40   7   0 136   0  27]
 [  0   1 158   0  36   0  15]
 [  0   1  23   0 140   0  46]
 [  0   3   3   0 176   0  28]
 [  0   3  14   0  51   0 142]
 [  0   0   0   0   9   0 201]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.82      0.19      0.31       210
           3       0.76      0.75      0.76       210
           4       0.00      0.00      0.00       210
           5       0.25      0.84      0.38       210
           6       0.00      0.00      0.00       210
           7       0.40      0.96      0.56       210

    accuracy                           0.39      1470
   macro avg       0.32      0.39      0.29      1470
weighted avg       0.32      0.39      0.29      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.47278911564625853
Confusion Matrix of Decision Tree is:
 [[132   0   4   0  27  29  18]
 [ 16  40   6   1 120  17  10]
 [  4   1 157   1  32  10   5]
 [ 26   1  19   4 114  19  27]
 [ 45   3   3   0 131   5  23]
 [ 33   1  15   1  18  70  72]
 [  6   0   0   0   3  40 161]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.63      0.56       210
           2       0.87      0.19      0.31       210
           3       0.77      0.75      0.76       210
           4       0.57      0.02      0.04       210
           5       0.29      0.62      0.40       210
           6       0.37      0.33      0.35       210
           7       0.51      0.77      0.61       210

    accuracy                           0.47      1470
   macro avg       0.56      0.47      0.43      1470
weighted avg       0.56      0.47      0.43      1470

Decision Tree with 4 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.48775510204081635
Confusion Matrix of Decision Tree is:
 [[113   4   4  35  26  20   8]
 [  7  92   8  40  39  19   5]
 [  5   5 166  21   9   4   0]
 [  9  24  11  78  43  30  15]
 [ 30  19   3  44  81  24   9]
 [ 17  23  10  46  10  73  31]
 [  7   4   1   7  10  67 114]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.54      0.57       210
           2       0.54      0.44      0.48       210
           3       0.82      0.79      0.80       210
           4       0.29      0.37      0.32       210
           5       0.37      0.39      0.38       210
           6       0.31      0.35      0.33       210
           7       0.63      0.54      0.58       210

    accuracy                           0.49      1470
   macro avg       0.51      0.49      0.50      1470
weighted avg       0.51      0.49      0.50      1470

Decision Tree with 12 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.47278911564625853
Confusion Matrix of Decision Tree is:
 [[120   9   6  16  28  22   9]
 [ 25  87   7  28  38  18   7]
 [  4   7 147  39   4   9   0]
 [ 21  28  15  80  30  26  10]
 [ 34  27   6  47  69  16  11]
 [ 22  21   4  39  19  72  33]
 [  9   1   1   5  11  63 120]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.51      0.57      0.54       210
           2       0.48      0.41      0.45       210
           3       0.79      0.70      0.74       210
           4       0.31      0.38      0.34       210
           5       0.35      0.33      0.34       210
           6       0.32      0.34      0.33       210
           7       0.63      0.57      0.60       210

    accuracy                           0.47      1470
   macro avg       0.49      0.47      0.48      1470
weighted avg       0.49      0.47      0.48      1470

Decision Tree with 20 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.6591836734693878
Confusion Matrix of Random Forest is:
 [[138   1   0   5  35  25   6]
 [  2 138   3  14  34  16   3]
 [  0   7 179  12   3   9   0]
 [  5   9   5  95  52  29  15]
 [ 17  16   1  13 137  11  15]
 [  8   4   2  27   9 104  56]
 [  0   1   0   1   2  28 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.81      0.66      0.73       210
           2       0.78      0.66      0.72       210
           3       0.94      0.85      0.89       210
           4       0.57      0.45      0.50       210
           5       0.50      0.65      0.57       210
           6       0.47      0.50      0.48       210
           7       0.65      0.85      0.74       210

    accuracy                           0.66      1470
   macro avg       0.68      0.66      0.66      1470
weighted avg       0.68      0.66      0.66      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6952380952380952
Confusion Matrix of Random Forest is:
 [[145   1   1   6  32  22   3]
 [  3 144   5  20  22  15   1]
 [  0   5 185  14   2   3   1]
 [  6   8   5 121  32  33   5]
 [ 20   9   1  22 136   9  13]
 [ 10   9   1  26   1 117  46]
 [  0   0   0   0   2  34 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.69      0.74       210
           2       0.82      0.69      0.75       210
           3       0.93      0.88      0.91       210
           4       0.58      0.58      0.58       210
           5       0.60      0.65      0.62       210
           6       0.50      0.56      0.53       210
           7       0.72      0.83      0.77       210

    accuracy                           0.70      1470
   macro avg       0.71      0.70      0.70      1470
weighted avg       0.71      0.70      0.70      1470

Random Forest with 16 max_dept

In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_gkb.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.3047619047619048
Confusion Matrix of Logistic Regression is:
 [[ 61   1   0  71  62  15   0]
 [  1  21   0 135  35  18   0]
 [  0   1   1 194   1  13   0]
 [  1   0   0 168  21  19   1]
 [  2   1   0 100 101   6   0]
 [  2   2   0  90  27  88   1]
 [  1   3   0  26  33 139   8]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.90      0.29      0.44       210
           2       0.72      0.10      0.18       210
           3       1.00      0.00      0.01       210
           4       0.21      0.80      0.34       210
           5       0.36      0.48      0.41       210
           6       0.30      0.42      0.35       210
           7       0.80      0.04      0.07       210

    accuracy                           0.30      1470
   macro avg       0.61      0.30      0.26      1470
weighted avg       0.61      0.30      0.26      1470

KNN with 3 N

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report of Gaussian Naive Bayes is:
               precision    recall  f1-score   support

           1       0.39      0.21      0.27       210
           2       0.00      0.00      0.00       210
           3       0.55      0.03      0.05       210
           4       0.19      0.76      0.31       210
           5       0.00      0.00      0.00       210
           6       0.30      0.03      0.05       210
           7       0.38      0.90      0.53       210

    accuracy                           0.28      1470
   macro avg       0.26      0.28      0.17      1470
weighted avg       0.26      0.28      0.17      1470

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.18979591836734694
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 22   6   5   3 161   7   6]
 [  4   2   2   7 178  16   1]
 [  0   0   7   2 197   3   1]
 [  1   7   3   5 189   3   2]
 [  5   1   3   5 188   2   6]
 [  9   7   6  10 134  23  21]
 [  2   6   1   5 146  18  32]]
Classi

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after Standard Scaling is: 0.26598639455782314
Confusion Matrix of SVM is:
 [[  0  21  83  57   3   0  46]
 [  0   4 119  34   3   0  50]
 [  0   0 184  10   0   0  16]
 [  0   1 141  39   0   0  29]
 [  0   5 146  43   1   0  15]
 [  0  11  73  33   3   0  90]
 [  0   6  28  12   1   0 163]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.08      0.02      0.03       210
           3       0.24      0.88      0.37       210
           4       0.17      0.19      0.18       210
           5       0.09      0.00      0.01       210
           6       0.00      0.00      0.00       210
           7       0.40      0.78      0.53       210

    accuracy                           0.27      1470
   macro avg       0.14      0.27      0.16      1470
weighted avg       0.14      0.27      0.16      1470

Decision Tree with 1 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.22380952380952382
Confusion Matrix of Decision Tree is:
 [[  0   0   3   0   0   0 207]
 [  0   0  20   0   0   0 190]
 [  0   0 119   0   0   0  91]
 [  0   0   6   0   0   0 204]
 [  0   0   9   0   0   0 201]
 [  0   0  10   0   0   0 200]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.71      0.57      0.63       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.16      1.00      0.28       210

    accuracy                           0.22      1470
   macro avg       0.12      0.22      0.13      1470
weighted avg       0.12      0.22      0.13      1470

Decision Tree with 2 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3217687074829932
Confusion Matrix of Decision Tree is:
 [[  0   0   3 159   0   0  48]
 [  0   0  20 139   0   0  51]
 [  0   0 119  69   0   0  22]
 [  0   0   6 167   0   0  37]
 [  0   0   9 182   0   0  19]
 [  0   0  10  80   0   0 120]
 [  0   0   0  23   0   0 187]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.71      0.57      0.63       210
           4       0.20      0.80      0.32       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.39      0.89      0.54       210

    accuracy                           0.32      1470
   macro avg       0.19      0.32      0.21      1470
weighted avg       0.19      0.32      0.21      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3340136054421769
Confusion Matrix of Decision Tree is:
 [[ 57   3   0 116   0   0  34]
 [ 12  18   2 128   0   0  50]
 [  4  55  64  65   0   0  22]
 [  2   4   2 165   0   0  37]
 [  2   8   1 180   0   0  19]
 [ 11   8   2  69   0   0 120]
 [  5   0   0  18   0   0 187]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.61      0.27      0.38       210
           2       0.19      0.09      0.12       210
           3       0.90      0.30      0.46       210
           4       0.22      0.79      0.35       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.40      0.89      0.55       210

    accuracy                           0.33      1470
   macro avg       0.33      0.33      0.26      1470
weighted avg       0.33      0.33      0.26      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3340136054421769
Confusion Matrix of Decision Tree is:
 [[ 51  19   0 100   0   6  34]
 [  8  25   2 121   0   4  50]
 [  4  57  64  63   0   0  22]
 [  2  10   2 159   0   0  37]
 [  2  19   1 169   0   0  19]
 [  6  15   3  62   0   5 119]
 [  1   5   0  13   0   4 187]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.69      0.24      0.36       210
           2       0.17      0.12      0.14       210
           3       0.89      0.30      0.45       210
           4       0.23      0.76      0.35       210
           5       0.00      0.00      0.00       210
           6       0.26      0.02      0.04       210
           7       0.40      0.89      0.55       210

    accuracy                           0.33      1470
   macro avg       0.38      0.33      0.27      1470
weighted avg       0.38      0.33      0.27      1470

Decision Tree with 5 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.33945578231292517
Confusion Matrix of Decision Tree is:
 [[ 55  34   0  80  30   4   7]
 [ 10  66   2  83  42   2   5]
 [  4  69  64  50  14   0   9]
 [  2  22   2  77  87   0  20]
 [  3  18   1  65 112   2   9]
 [  8  66   2  41  26   5  62]
 [  1  68   0  11   6   4 120]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.66      0.26      0.38       210
           2       0.19      0.31      0.24       210
           3       0.90      0.30      0.46       210
           4       0.19      0.37      0.25       210
           5       0.35      0.53      0.43       210
           6       0.29      0.02      0.04       210
           7       0.52      0.57      0.54       210

    accuracy                           0.34      1470
   macro avg       0.44      0.34      0.33      1470
weighted avg       0.44      0.34      0.33      1470

Decision Tree with 6 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.29931972789115646
Confusion Matrix of Decision Tree is:
 [[39 42 20 32 33 34 10]
 [21 67 37 28 20 28  9]
 [ 7 38 99  2 11 49  4]
 [ 6 50 17 64 34 29 10]
 [ 5 41  6 91 34 28  5]
 [12 46 34 22 14 51 31]
 [12 35 21  8 10 38 86]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.19      0.25       210
           2       0.21      0.32      0.25       210
           3       0.42      0.47      0.45       210
           4       0.26      0.30      0.28       210
           5       0.22      0.16      0.19       210
           6       0.20      0.24      0.22       210
           7       0.55      0.41      0.47       210

    accuracy                           0.30      1470
   macro avg       0.32      0.30      0.30      1470
weighted avg       0.32      0.30      0.30      1470

Decision Tree with 14 max_depth
Accuracy of Decision Tree after Standard Scali

Accuracy of Random Forest after Standard Scaling is: 0.2857142857142857
Confusion Matrix of Random Forest is:
 [[ 20   0  67  70   2   0  51]
 [  3   0 136  17   1   0  53]
 [  0   0 191   6   0   0  13]
 [  0   0 150  21   0   0  39]
 [  0   0 158  30   0   0  22]
 [  6   0  57  24   1   0 122]
 [  4   0   7  11   0   0 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.10      0.16       210
           2       0.00      0.00      0.00       210
           3       0.25      0.91      0.39       210
           4       0.12      0.10      0.11       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.39      0.90      0.54       210

    accuracy                           0.29      1470
   macro avg       0.19      0.29      0.17      1470
weighted avg       0.19      0.29      0.17      1470

Random Forest with 2 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.3115646258503401
Confusion Matrix of Random Forest is:
 [[ 47   0  53  59  10   1  40]
 [  7   0 118  29   5   0  51]
 [  1   0 189   4   3   0  13]
 [  2   0 143  22   5   0  38]
 [  5   0 147  30   9   0  19]
 [ 12   0  51  22   8   4 113]
 [  5   0   7   8   3   0 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.22      0.33       210
           2       0.00      0.00      0.00       210
           3       0.27      0.90      0.41       210
           4       0.13      0.10      0.11       210
           5       0.21      0.04      0.07       210
           6       0.80      0.02      0.04       210
           7       0.41      0.89      0.56       210

    accuracy                           0.31      1470
   macro avg       0.34      0.31      0.22      1470
weighted avg       0.34      0.31      0.22      1470

Random Forest with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.36394557823129253
Confusion Matrix of Random Forest is:
 [[122   0  34  29   0   3  22]
 [ 31   0 108  21   0   5  45]
 [  8   0 180   9   0   1  12]
 [ 23   0 113  37   0   1  36]
 [ 36   0 121  35   0   4  14]
 [ 41   0  37  17   0  11 104]
 [ 16   0   4   3   0   2 185]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.44      0.58      0.50       210
           2       0.00      0.00      0.00       210
           3       0.30      0.86      0.45       210
           4       0.25      0.18      0.20       210
           5       0.00      0.00      0.00       210
           6       0.41      0.05      0.09       210
           7       0.44      0.88      0.59       210

    accuracy                           0.36      1470
   macro avg       0.26      0.36      0.26      1470
weighted avg       0.26      0.36      0.26      1470

Random Forest with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.4108843537414966
Confusion Matrix of Random Forest is:
 [[123   5  12  45   1  10  14]
 [ 31  10  69  50   0  22  28]
 [  8   5 169  15   0   5   8]
 [ 22   6  42 100   3   7  30]
 [ 38  10  31 112   2   5  12]
 [ 43   5  18  30   0  28  86]
 [ 15   1   0   7   0  15 172]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.44      0.59      0.50       210
           2       0.24      0.05      0.08       210
           3       0.50      0.80      0.61       210
           4       0.28      0.48      0.35       210
           5       0.33      0.01      0.02       210
           6       0.30      0.13      0.19       210
           7       0.49      0.82      0.61       210

    accuracy                           0.41      1470
   macro avg       0.37      0.41      0.34      1470
weighted avg       0.37      0.41      0.34      1470

Random Forest with 5 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.42448979591836733
Confusion Matrix of Random Forest is:
 [[126   2   1  57   0  12  12]
 [ 29  19   6 112   0  27  17]
 [  8   5 135  53   0   6   3]
 [ 22   3   5 141   1  19  19]
 [ 36   5   4 145   1   7  12]
 [ 35   3   9  43   1  56  63]
 [ 12   1   0   7   0  44 146]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.47      0.60      0.53       210
           2       0.50      0.09      0.15       210
           3       0.84      0.64      0.73       210
           4       0.25      0.67      0.37       210
           5       0.33      0.00      0.01       210
           6       0.33      0.27      0.29       210
           7       0.54      0.70      0.61       210

    accuracy                           0.42      1470
   macro avg       0.47      0.42      0.38      1470
weighted avg       0.47      0.42      0.38      1470

Random Forest with 8 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.427891156462585
Confusion Matrix of Random Forest is:
 [[100  12   4  51  13  23   7]
 [ 11  59  38  52  17  24   9]
 [  3  18 144  29   1  12   3]
 [  9  29  22  93  20  24  13]
 [ 14  30  13  88  46  10   9]
 [ 20   8  16  42   7  81  36]
 [  4   2   1   9   6  82 106]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.48      0.54       210
           2       0.37      0.28      0.32       210
           3       0.61      0.69      0.64       210
           4       0.26      0.44      0.32       210
           5       0.42      0.22      0.29       210
           6       0.32      0.39      0.35       210
           7       0.58      0.50      0.54       210

    accuracy                           0.43      1470
   macro avg       0.45      0.43      0.43      1470
weighted avg       0.45      0.43      0.43      1470

Random Forest with 16 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_ndisbert.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6646258503401361
Confusion Matrix of Logistic Regression is:
 [[162   0   1   1  31  15   0]
 [ 13  89  18  25  32  33   0]
 [  2   0 197   4   3   4   0]
 [ 11   2   7 125  39  25   1]
 [ 42   4   3  11 140   7   3]
 [ 21   3  13  16  11 125  21]
 [ 11   0   1   1   5  53 139]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.62      0.77      0.69       210
           2       0.91      0.42      0.58       210
           3       0.82      0.94      0.88       210
           4       0.68      0.60      0.64       210
           5       0.54      0.67      0.59       210
           6       0.48      0.60      0.53       210
           7       0.85      0.66      0.74       210

    accuracy                           0.66      1470
   macro avg       0.70      0.66      0.66      1470
weighted avg       0.70      0.66      0.66      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6761904761904762
Confusion Matrix of SVM is:
 [[164   1   0   2  38   5   0]
 [ 13 140   9  23  11  14   0]
 [  3   4 191   9   2   1   0]
 [ 12  13   8 130  28  18   1]
 [ 44  16   2  14 130   3   1]
 [ 34  13  10  20  11  97  25]
 [ 18   1   0   1   6  42 142]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.57      0.78      0.66       210
           2       0.74      0.67      0.70       210
           3       0.87      0.91      0.89       210
           4       0.65      0.62      0.64       210
           5       0.58      0.62      0.60       210
           6       0.54      0.46      0.50       210
           7       0.84      0.68      0.75       210

    accuracy                           0.68      1470
   macro avg       0.68      0.68      0.68      1470
weighted avg       0.68      0.68      0.68      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.2598639455782313
Confusion Matrix of Decision Tree is:
 [[  0   0 191   0   0   0  19]
 [  0   0 179   0   0   0  31]
 [  0   0 194   0   0   0  16]
 [  0   0 196   0   0   0  14]
 [  0   0 203   0   0   0   7]
 [  0   0  77   0   0   0 133]
 [  0   0  22   0   0   0 188]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.18      0.92      0.31       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.46      0.90      0.61       210

    accuracy                           0.26      1470
   macro avg       0.09      0.26      0.13      1470
weighted avg       0.09      0.26      0.13      1470

Decision Tree with 2 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.38299319727891157
Confusion Matrix of Decision Tree is:
 [[  0   0  15   0 176  10   9]
 [  0   0  13   0 166  22   9]
 [  0   0 138   0  56  15   1]
 [  0   0  22   0 174   9   5]
 [  0   0   5   0 198   3   4]
 [  0   0   5   0  72  60  73]
 [  0   0   0   0  22  21 167]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.70      0.66      0.68       210
           4       0.00      0.00      0.00       210
           5       0.23      0.94      0.37       210
           6       0.43      0.29      0.34       210
           7       0.62      0.80      0.70       210

    accuracy                           0.38      1470
   macro avg       0.28      0.38      0.30      1470
weighted avg       0.28      0.38      0.30      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4020408163265306
Confusion Matrix of Decision Tree is:
 [[169   9   1  14   0  14   3]
 [ 98  73   2  11   0  20   6]
 [ 37  20  98  40   0  15   0]
 [159  15   2  20   0  11   3]
 [183  15   0   5   0   3   4]
 [ 66   7   2   3   0  79  53]
 [ 22   1   0   0   0  35 152]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.80      0.36       210
           2       0.52      0.35      0.42       210
           3       0.93      0.47      0.62       210
           4       0.22      0.10      0.13       210
           5       0.00      0.00      0.00       210
           6       0.45      0.38      0.41       210
           7       0.69      0.72      0.71       210

    accuracy                           0.40      1470
   macro avg       0.43      0.40      0.38      1470
weighted avg       0.43      0.40      0.38      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.42448979591836733
Confusion Matrix of Decision Tree is:
 [[ 96   7   4  14  73  12   4]
 [ 11  70   9  11  87  17   5]
 [  1  11  66  86  36  10   0]
 [ 19  15   3  19 140  13   1]
 [ 47  15   1   5 136   3   3]
 [ 22   5   7   3  44  93  36]
 [ 16   1   0   0   6  43 144]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.46      0.45       210
           2       0.56      0.33      0.42       210
           3       0.73      0.31      0.44       210
           4       0.14      0.09      0.11       210
           5       0.26      0.65      0.37       210
           6       0.49      0.44      0.46       210
           7       0.75      0.69      0.71       210

    accuracy                           0.42      1470
   macro avg       0.48      0.42      0.42      1470
weighted avg       0.48      0.42      0.42      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.41768707482993195
Confusion Matrix of Decision Tree is:
 [[109  20  17  24  20  15   5]
 [ 22 101  10  31  23  20   3]
 [ 13  19  46  21  58  52   1]
 [ 19  28  17  88  46  10   2]
 [ 43  28   6  50  67  15   1]
 [ 20  29   4  21  15  82  39]
 [ 10   7   1   6   3  62 121]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.52      0.49       210
           2       0.44      0.48      0.46       210
           3       0.46      0.22      0.30       210
           4       0.37      0.42      0.39       210
           5       0.29      0.32      0.30       210
           6       0.32      0.39      0.35       210
           7       0.70      0.58      0.63       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.42      1470
weighted avg       0.43      0.42      0.42      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.37142857142857144
Confusion Matrix of Decision Tree is:
 [[103  33  10  25  21  12   6]
 [ 18  92  12  25  37  21   5]
 [ 11  22  40  72  55   8   2]
 [ 17  25  14  84  46  22   2]
 [ 49  21   7  41  65  23   4]
 [ 21  25   4  19  17  86  38]
 [  9   7   3   7   8 100  76]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.49      0.47       210
           2       0.41      0.44      0.42       210
           3       0.44      0.19      0.27       210
           4       0.31      0.40      0.35       210
           5       0.26      0.31      0.28       210
           6       0.32      0.41      0.36       210
           7       0.57      0.36      0.44       210

    accuracy                           0.37      1470
   macro avg       0.39      0.37      0.37      1470
weighted avg       0.39      0.37      0.37      1470

Random Forest with 1 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.636734693877551
Confusion Matrix of Random Forest is:
 [[131   6   0  10  35  24   4]
 [  3 121   4  27  32  17   6]
 [  4   9 159  16  13   9   0]
 [ 11  15   3 123  36  19   3]
 [ 21  13   0  42 121   9   4]
 [ 14  19   0  26   5  96  50]
 [  0   4   0   1   0  20 185]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.62      0.66       210
           2       0.65      0.58      0.61       210
           3       0.96      0.76      0.85       210
           4       0.50      0.59      0.54       210
           5       0.50      0.58      0.54       210
           6       0.49      0.46      0.48       210
           7       0.73      0.88      0.80       210

    accuracy                           0.64      1470
   macro avg       0.65      0.64      0.64      1470
weighted avg       0.65      0.64      0.64      1470

Random Forest with 9 max_depth


Accuracy of Random Forest after Standard Scaling is: 0.6653061224489796
Confusion Matrix of Random Forest is:
 [[137   4   1  11  35  19   3]
 [  5 135   5  23  21  19   2]
 [  2   6 176  10   8   7   1]
 [ 13   7   3 127  37  19   4]
 [ 31  18   1  37 114   4   5]
 [ 14  11   2  19   6 110  48]
 [  0   3   0   1   0  27 179]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.68      0.65      0.67       210
           2       0.73      0.64      0.69       210
           3       0.94      0.84      0.88       210
           4       0.56      0.60      0.58       210
           5       0.52      0.54      0.53       210
           6       0.54      0.52      0.53       210
           7       0.74      0.85      0.79       210

    accuracy                           0.67      1470
   macro avg       0.67      0.67      0.67      1470
weighted avg       0.67      0.67      0.67      1470

Random Forest with 17 max_dept

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_vbert.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6414965986394557
Confusion Matrix of Logistic Regression is:
 [[186   8   1   4   1  10   0]
 [  4 193   1   3   0   9   0]
 [  0  20 184   5   0   1   0]
 [ 11  50   4 131   2  11   1]
 [ 85  68   2  42   5   5   3]
 [ 14  55   4  27   0  95  15]
 [  8   7   1   2   1  42 149]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.60      0.89      0.72       210
           2       0.48      0.92      0.63       210
           3       0.93      0.88      0.90       210
           4       0.61      0.62      0.62       210
           5       0.56      0.02      0.05       210
           6       0.55      0.45      0.50       210
           7       0.89      0.71      0.79       210

    accuracy                           0.64      1470
   macro avg       0.66      0.64      0.60      1470
weighted avg       0.66      0.64      0.60      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6632653061224489
Confusion Matrix of SVM is:
 [[195   4   1   3   5   2   0]
 [ 13 180   2   4   2   9   0]
 [  2   8 193   7   0   0   0]
 [ 16  32   8 142   1   9   2]
 [102  34   2  39  31   0   2]
 [ 38  35   6  25   0  84  22]
 [ 16   3   0   3   0  38 150]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.51      0.93      0.66       210
           2       0.61      0.86      0.71       210
           3       0.91      0.92      0.91       210
           4       0.64      0.68      0.66       210
           5       0.79      0.15      0.25       210
           6       0.59      0.40      0.48       210
           7       0.85      0.71      0.78       210

    accuracy                           0.66      1470
   macro avg       0.70      0.66      0.63      1470
weighted avg       0.70      0.66      0.63      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3843537414965986
Confusion Matrix of Decision Tree is:
 [[  0   0  14  15 157   0  24]
 [  0   0  37  54  93   0  26]
 [  0   0 167  24  15   0   4]
 [  0   0  34  53 100   0  23]
 [  0   0   6  14 173   0  17]
 [  0   0  23  39  50   0  98]
 [  0   0   2   7  29   0 172]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.59      0.80      0.68       210
           4       0.26      0.25      0.25       210
           5       0.28      0.82      0.42       210
           6       0.00      0.00      0.00       210
           7       0.47      0.82      0.60       210

    accuracy                           0.38      1470
   macro avg       0.23      0.38      0.28      1470
weighted avg       0.23      0.38      0.28      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3578231292517007
Confusion Matrix of Decision Tree is:
 [[ 49  19   0  10 108  13  11]
 [  3  58   0  33  90  16  10]
 [  0 173   2  16  15   2   2]
 [  5  38   0  49  95   9  14]
 [  3   9   0  11 170   4  13]
 [  1  30   0  32  49  42  56]
 [  0   3   0   6  29  16 156]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.80      0.23      0.36       210
           2       0.18      0.28      0.21       210
           3       1.00      0.01      0.02       210
           4       0.31      0.23      0.27       210
           5       0.31      0.81      0.44       210
           6       0.41      0.20      0.27       210
           7       0.60      0.74      0.66       210

    accuracy                           0.36      1470
   macro avg       0.51      0.36      0.32      1470
weighted avg       0.51      0.36      0.32      1470

Decision Tree with 4 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.40680272108843535
Confusion Matrix of Decision Tree is:
 [[101   7  12  25  42  12  11]
 [ 20  56  27  41  47  13   6]
 [  2  33 126  35   6   6   2]
 [ 21  20  17  70  51  19  12]
 [ 51   6   7  22 103  10  11]
 [ 20  22  14  57  21  39  37]
 [ 12   9  15  16  21  34 103]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.44      0.48      0.46       210
           2       0.37      0.27      0.31       210
           3       0.58      0.60      0.59       210
           4       0.26      0.33      0.29       210
           5       0.35      0.49      0.41       210
           6       0.29      0.19      0.23       210
           7       0.57      0.49      0.53       210

    accuracy                           0.41      1470
   macro avg       0.41      0.41      0.40      1470
weighted avg       0.41      0.41      0.40      1470

Decision Tree with 12 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.3551020408163265
Confusion Matrix of Decision Tree is:
 [[81  6  9 29 49 22 14]
 [15 57 14 40 46 30  8]
 [ 1 24 97 44 11 30  3]
 [20 25 18 71 48 18 10]
 [44 14  4 28 91 19 10]
 [ 6 17 17 55 29 50 36]
 [ 4 11  3 17 38 62 75]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.39      0.43       210
           2       0.37      0.27      0.31       210
           3       0.60      0.46      0.52       210
           4       0.25      0.34      0.29       210
           5       0.29      0.43      0.35       210
           6       0.22      0.24      0.23       210
           7       0.48      0.36      0.41       210

    accuracy                           0.36      1470
   macro avg       0.38      0.36      0.36      1470
weighted avg       0.38      0.36      0.36      1470

Decision Tree with 20 max_depth
Accuracy of Decision Tree after Standard Scalin

Accuracy of Random Forest after Standard Scaling is: 0.6523809523809524
Confusion Matrix of Random Forest is:
 [[106   9   4   8  66  17   0]
 [  1 141   8  23  21  13   3]
 [  0  18 174   9   4   4   1]
 [  1  14   6 129  48   9   3]
 [  1  15   0  22 165   4   3]
 [  3  32   5  34  28  76  32]
 [  0   2   0   7   6  27 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.95      0.50      0.66       210
           2       0.61      0.67      0.64       210
           3       0.88      0.83      0.86       210
           4       0.56      0.61      0.58       210
           5       0.49      0.79      0.60       210
           6       0.51      0.36      0.42       210
           7       0.80      0.80      0.80       210

    accuracy                           0.65      1470
   macro avg       0.68      0.65      0.65      1470
weighted avg       0.68      0.65      0.65      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6775510204081633
Confusion Matrix of Random Forest is:
 [[133   7   2   9  46  11   2]
 [  4 141   6  21  22  14   2]
 [  0  17 177   9   3   3   1]
 [  3   5  10 140  40   9   3]
 [ 12  15   2  26 150   2   3]
 [  9  28   9  22  18 100  24]
 [  2   3   0  10   5  35 155]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.63      0.71       210
           2       0.65      0.67      0.66       210
           3       0.86      0.84      0.85       210
           4       0.59      0.67      0.63       210
           5       0.53      0.71      0.61       210
           6       0.57      0.48      0.52       210
           7       0.82      0.74      0.78       210

    accuracy                           0.68      1470
   macro avg       0.69      0.68      0.68      1470
weighted avg       0.69      0.68      0.68      1470

Random Forest with 16 max_dept

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//gpt_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.689795918367347
Confusion Matrix of Logistic Regression is:
 [[182   1   0   4   5   7  11]
 [ 12 112   6  30  17  25   8]
 [  1   0 197   2   2   7   1]
 [  6   3   7 158   9  12  15]
 [ 61   4   1  48  85   4   7]
 [ 16   7   9  22   4  75  77]
 [  2   0   0   0   1   2 205]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.65      0.87      0.74       210
           2       0.88      0.53      0.66       210
           3       0.90      0.94      0.92       210
           4       0.60      0.75      0.67       210
           5       0.69      0.40      0.51       210
           6       0.57      0.36      0.44       210
           7       0.63      0.98      0.77       210

    accuracy                           0.69      1470
   macro avg       0.70      0.69      0.67      1470
weighted avg       0.70      0.69      0.67      1470

KNN with 3 Ne

Accuracy of SVM after Standard Scaling is: 0.6755102040816326
Confusion Matrix of SVM is:
 [[183   3   0   1   7  11   5]
 [ 15 131   7  29  12  13   3]
 [  0   4 198   3   1   4   0]
 [ 27   5   5 150   4  12   7]
 [ 81   8   2  48  61   4   6]
 [ 21  13   9  32   3  87  45]
 [  1   1   0   3   1  21 183]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.56      0.87      0.68       210
           2       0.79      0.62      0.70       210
           3       0.90      0.94      0.92       210
           4       0.56      0.71      0.63       210
           5       0.69      0.29      0.41       210
           6       0.57      0.41      0.48       210
           7       0.73      0.87      0.80       210

    accuracy                           0.68      1470
   macro avg       0.69      0.68      0.66      1470
weighted avg       0.69      0.68      0.66      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3224489795918367
Confusion Matrix of Decision Tree is:
 [[  0   0  10   0 154   0  46]
 [  0   0  10   0 160   0  40]
 [  0   0 129   0  64   0  17]
 [  0   0  15   0 131   0  64]
 [  0   0   2   0 177   0  31]
 [  0   0  11   0  96   0 103]
 [  0   0   9   0  33   0 168]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.69      0.61      0.65       210
           4       0.00      0.00      0.00       210
           5       0.22      0.84      0.35       210
           6       0.00      0.00      0.00       210
           7       0.36      0.80      0.49       210

    accuracy                           0.32      1470
   macro avg       0.18      0.32      0.21      1470
weighted avg       0.18      0.32      0.21      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3619047619047619
Confusion Matrix of Decision Tree is:
 [[ 61  96   7   0   0  22  24]
 [  0 160   6   4   0  27  13]
 [  1  63 105  24   0  15   2]
 [  6 125  11   4   0  37  27]
 [ 13 164   1   1   0  12  19]
 [  8  88  11   0   0  63  40]
 [  8  31   3   0   0  29 139]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.63      0.29      0.40       210
           2       0.22      0.76      0.34       210
           3       0.73      0.50      0.59       210
           4       0.12      0.02      0.03       210
           5       0.00      0.00      0.00       210
           6       0.31      0.30      0.30       210
           7       0.53      0.66      0.59       210

    accuracy                           0.36      1470
   macro avg       0.36      0.36      0.32      1470
weighted avg       0.36      0.36      0.32      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4306122448979592
Confusion Matrix of Decision Tree is:
 [[ 61  25   3   9  74  19  19]
 [  0 104   6   2  67  19  12]
 [  3  43  98  20  38   7   1]
 [  6  27   7  17 107  30  16]
 [ 14  16   0   4 149  12  15]
 [  8  18   7   7  71  62  37]
 [  2   2   2   4  29  29 142]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.65      0.29      0.40       210
           2       0.44      0.50      0.47       210
           3       0.80      0.47      0.59       210
           4       0.27      0.08      0.12       210
           5       0.28      0.71      0.40       210
           6       0.35      0.30      0.32       210
           7       0.59      0.68      0.63       210

    accuracy                           0.43      1470
   macro avg       0.48      0.43      0.42      1470
weighted avg       0.48      0.43      0.42      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.42108843537414964
Confusion Matrix of Decision Tree is:
 [[ 75  23  17  27  21  29  18]
 [ 28  88  13  22  19  26  14]
 [ 19  14 122  25  14   8   8]
 [ 17  25  11  74  37  32  14]
 [ 33  37   4  33  72  23   8]
 [ 14  20  11  33  20  65  47]
 [ 15   5   4  16  13  34 123]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.37      0.36      0.36       210
           2       0.42      0.42      0.42       210
           3       0.67      0.58      0.62       210
           4       0.32      0.35      0.34       210
           5       0.37      0.34      0.35       210
           6       0.30      0.31      0.30       210
           7       0.53      0.59      0.56       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.42      1470
weighted avg       0.43      0.42      0.42      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.40272108843537413
Confusion Matrix of Decision Tree is:
 [[ 80  16  19  25  20  32  18]
 [ 27  84  11  31  17  23  17]
 [  6  17 121  26  16  10  14]
 [ 18  26   7  85  29  26  19]
 [ 49  25   8  43  47  31   7]
 [ 25  18  11  31  18  65  42]
 [ 12   2   4  20  24  38 110]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.37      0.38      0.37       210
           2       0.45      0.40      0.42       210
           3       0.67      0.58      0.62       210
           4       0.33      0.40      0.36       210
           5       0.27      0.22      0.25       210
           6       0.29      0.31      0.30       210
           7       0.48      0.52      0.50       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.40      1470
weighted avg       0.41      0.40      0.40      1470

Random Forest with 1 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6646258503401361
Confusion Matrix of Random Forest is:
 [[134   4   1   6  24  24  17]
 [  1 143   5  12  17  28   4]
 [  3   2 185  12   1   7   0]
 [  2   7   4 117  28  37  15]
 [ 17  14   1  25 117  21  15]
 [ 11  12   3  25   5  90  64]
 [  0   0   0   2   2  15 191]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.80      0.64      0.71       210
           2       0.79      0.68      0.73       210
           3       0.93      0.88      0.90       210
           4       0.59      0.56      0.57       210
           5       0.60      0.56      0.58       210
           6       0.41      0.43      0.42       210
           7       0.62      0.91      0.74       210

    accuracy                           0.66      1470
   macro avg       0.68      0.66      0.66      1470
weighted avg       0.68      0.66      0.66      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6789115646258503
Confusion Matrix of Random Forest is:
 [[142   3   0  10  25  20  10]
 [  2 141   5  13  15  30   4]
 [  2   2 187   8   5   6   0]
 [  2  11   4 126  17  32  18]
 [ 26  12   0  23 117  18  14]
 [  9  12   4  18   4 110  53]
 [  0   1   0   0   3  31 175]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.68      0.72       210
           2       0.77      0.67      0.72       210
           3       0.94      0.89      0.91       210
           4       0.64      0.60      0.62       210
           5       0.63      0.56      0.59       210
           6       0.45      0.52      0.48       210
           7       0.64      0.83      0.72       210

    accuracy                           0.68      1470
   macro avg       0.69      0.68      0.68      1470
weighted avg       0.69      0.68      0.68      1470

Random Forest with 17 max_dept

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//xlm_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6040816326530613
Confusion Matrix of Logistic Regression is:
 [[135   1   0   0  52  21   1]
 [  2 125   1   0  56  26   0]
 [  2   3 161   0  21  23   0]
 [  5   5   1  20 132  46   1]
 [ 12   3   0   1 180  13   1]
 [  7   4   1   1  25 167   5]
 [  1   0   0   0   9 100 100]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.82      0.64      0.72       210
           2       0.89      0.60      0.71       210
           3       0.98      0.77      0.86       210
           4       0.91      0.10      0.17       210
           5       0.38      0.86      0.53       210
           6       0.42      0.80      0.55       210
           7       0.93      0.48      0.63       210

    accuracy                           0.60      1470
   macro avg       0.76      0.60      0.60      1470
weighted avg       0.76      0.60      0.60      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6551020408163265
Confusion Matrix of SVM is:
 [[159   3   1   1  37   8   1]
 [  6 178   2   1  11  12   0]
 [  0   9 190   0   0  11   0]
 [ 14  40  16  39  61  37   3]
 [ 43  18   0   4 133   9   3]
 [ 24  23   7   4  11 131  10]
 [  2   0   0   1   3  71 133]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.64      0.76      0.69       210
           2       0.66      0.85      0.74       210
           3       0.88      0.90      0.89       210
           4       0.78      0.19      0.30       210
           5       0.52      0.63      0.57       210
           6       0.47      0.62      0.54       210
           7       0.89      0.63      0.74       210

    accuracy                           0.66      1470
   macro avg       0.69      0.66      0.64      1470
weighted avg       0.69      0.66      0.64      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3476190476190476
Confusion Matrix of Decision Tree is:
 [[  0   0   0   0 178   0  32]
 [  0   0   2   0 176   0  32]
 [  0   0 115   5  71   0  19]
 [  0   0   2   2 160   0  46]
 [  0   0   0   0 192   0  18]
 [  0   0   2   0  58   0 150]
 [  0   0   2   0   6   0 202]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.93      0.55      0.69       210
           4       0.29      0.01      0.02       210
           5       0.23      0.91      0.37       210
           6       0.00      0.00      0.00       210
           7       0.40      0.96      0.57       210

    accuracy                           0.35      1470
   macro avg       0.26      0.35      0.23      1470
weighted avg       0.26      0.35      0.23      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4605442176870748
Confusion Matrix of Decision Tree is:
 [[164  14   0   0   0  11  21]
 [ 24 152   2   0   0  21  11]
 [  8  63 115   5   0  18   1]
 [ 63  97   1   2   0  24  23]
 [137  55   0   0   0   3  15]
 [ 32  26   2   0   0  58  92]
 [  6   0   0   0   0  18 186]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.78      0.51       210
           2       0.37      0.72      0.49       210
           3       0.96      0.55      0.70       210
           4       0.29      0.01      0.02       210
           5       0.00      0.00      0.00       210
           6       0.38      0.28      0.32       210
           7       0.53      0.89      0.67       210

    accuracy                           0.46      1470
   macro avg       0.42      0.46      0.39      1470
weighted avg       0.42      0.46      0.39      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.5108843537414965
Confusion Matrix of Decision Tree is:
 [[106   3   0  12  58  21  10]
 [  2 103   1  65  22  11   6]
 [  6  10 115  60   7  11   1]
 [  3  22   1  80  60  34  10]
 [ 24  19   0  36 113  14   4]
 [  3  22   0  18  29  80  58]
 [  0   1   0   1   6  48 154]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.74      0.50      0.60       210
           2       0.57      0.49      0.53       210
           3       0.98      0.55      0.70       210
           4       0.29      0.38      0.33       210
           5       0.38      0.54      0.45       210
           6       0.37      0.38      0.37       210
           7       0.63      0.73      0.68       210

    accuracy                           0.51      1470
   macro avg       0.57      0.51      0.52      1470
weighted avg       0.57      0.51      0.52      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5340136054421769
Confusion Matrix of Decision Tree is:
 [[145   4   2  12  26  16   5]
 [ 10 103  19  29  26  17   6]
 [  4   4 165  13   9  14   1]
 [ 20  21   9  80  34  33  13]
 [ 61  29   6  23  68  15   8]
 [ 17  19   9  29  11  84  41]
 [  7   4   3   7   2  47 140]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.69      0.61       210
           2       0.56      0.49      0.52       210
           3       0.77      0.79      0.78       210
           4       0.41      0.38      0.40       210
           5       0.39      0.32      0.35       210
           6       0.37      0.40      0.39       210
           7       0.65      0.67      0.66       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5163265306122449
Confusion Matrix of Decision Tree is:
 [[130   6   2  15  28  24   5]
 [ 10  99  14  24  39  18   6]
 [  2   4 169  13  10  11   1]
 [ 23  19  19  68  30  34  17]
 [ 56  24   5  23  72  23   7]
 [ 18  12   4  33  13  86  44]
 [  5   3   3   8   3  53 135]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.62      0.57       210
           2       0.59      0.47      0.53       210
           3       0.78      0.80      0.79       210
           4       0.37      0.32      0.35       210
           5       0.37      0.34      0.36       210
           6       0.35      0.41      0.37       210
           7       0.63      0.64      0.64       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.51      1470
weighted avg       0.52      0.52      0.51      1470

Random Forest with 1 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.5380952380952381
Confusion Matrix of Random Forest is:
 [[141   2   1   0  25   5  36]
 [  2 133  16   0  29   3  27]
 [  1  12 181   0   4   6   6]
 [  9  43  26   3  57  11  61]
 [ 46  18   5   0 114   1  26]
 [ 14  10  13   1  12  10 150]
 [  0   0   0   0   1   0 209]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.66      0.67      0.67       210
           2       0.61      0.63      0.62       210
           3       0.75      0.86      0.80       210
           4       0.75      0.01      0.03       210
           5       0.47      0.54      0.50       210
           6       0.28      0.05      0.08       210
           7       0.41      1.00      0.58       210

    accuracy                           0.54      1470
   macro avg       0.56      0.54      0.47      1470
weighted avg       0.56      0.54      0.47      1470

Random Forest with 3 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6952380952380952
Confusion Matrix of Random Forest is:
 [[141   2   1   4  34  26   2]
 [  1 164   1  12  12  17   3]
 [  0   5 186   6   4   9   0]
 [  0  13   3 110  30  40  14]
 [ 23  22   0  14 133   6  12]
 [  7   7   1  17   6 117  55]
 [  0   0   0   0   1  38 171]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.67      0.74       210
           2       0.77      0.78      0.78       210
           3       0.97      0.89      0.93       210
           4       0.67      0.52      0.59       210
           5       0.60      0.63      0.62       210
           6       0.46      0.56      0.51       210
           7       0.67      0.81      0.73       210

    accuracy                           0.70      1470
   macro avg       0.71      0.70      0.70      1470
weighted avg       0.71      0.70      0.70      1470

Random Forest with 11 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.7136054421768707
Confusion Matrix of Random Forest is:
 [[153   2   1   5  24  22   3]
 [  3 163   2  11  12  17   2]
 [  2   3 190   5   2   8   0]
 [  1   7   4 120  31  38   9]
 [ 34  15   1  21 123   7   9]
 [  7   7   3  19   4 131  39]
 [  0   0   0   0   1  40 169]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.77      0.73      0.75       210
           2       0.83      0.78      0.80       210
           3       0.95      0.90      0.92       210
           4       0.66      0.57      0.61       210
           5       0.62      0.59      0.60       210
           6       0.50      0.62      0.55       210
           7       0.73      0.80      0.77       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.72      1470
weighted avg       0.72      0.71      0.72      1470

Random Forest with 19 max_dept

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//bert_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.5374149659863946
Confusion Matrix of Logistic Regression is:
 [[125   1   2   2  14  66   0]
 [  4 105  17   6   5  72   1]
 [  1   4 180   1   0  23   1]
 [  4   9   8  44  15 128   2]
 [ 37  19   5   7  76  64   2]
 [  5   5   5   5   1 184   5]
 [  0   0   0   0   0 134  76]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.71      0.60      0.65       210
           2       0.73      0.50      0.59       210
           3       0.83      0.86      0.84       210
           4       0.68      0.21      0.32       210
           5       0.68      0.36      0.47       210
           6       0.27      0.88      0.42       210
           7       0.87      0.36      0.51       210

    accuracy                           0.54      1470
   macro avg       0.68      0.54      0.54      1470
weighted avg       0.68      0.54      0.54      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6027210884353742
Confusion Matrix of SVM is:
 [[126  12   6  11  35  15   5]
 [  3 141  34  12   4  14   2]
 [  1   5 195   4   1   3   1]
 [  9  40  13  78  14  45  11]
 [ 32  46   9  20  83  12   8]
 [ 11  21  14  16  11  95  42]
 [  3   6   0   0   7  26 168]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.68      0.60      0.64       210
           2       0.52      0.67      0.59       210
           3       0.72      0.93      0.81       210
           4       0.55      0.37      0.44       210
           5       0.54      0.40      0.45       210
           6       0.45      0.45      0.45       210
           7       0.71      0.80      0.75       210

    accuracy                           0.60      1470
   macro avg       0.60      0.60      0.59      1470
weighted avg       0.60      0.60      0.59      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.27687074829931974
Confusion Matrix of Decision Tree is:
 [[ 15 168   0   0   0   0  27]
 [  6 179   1   0   0   0  24]
 [  0 145  61   0   0   0   4]
 [  1 152   0   0   0   0  57]
 [  0 158   0   0   0   0  52]
 [  3 136   0   0   0   0  71]
 [  0  58   0   0   0   0 152]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.07      0.13       210
           2       0.18      0.85      0.30       210
           3       0.98      0.29      0.45       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.39      0.72      0.51       210

    accuracy                           0.28      1470
   macro avg       0.31      0.28      0.20      1470
weighted avg       0.31      0.28      0.20      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.2707482993197279
Confusion Matrix of Decision Tree is:
 [[ 14  22   0   0 146   1  27]
 [  3  32   0   0 148   3  24]
 [  0  61  46   0  99   0   4]
 [  0  12   0   0 140   1  57]
 [  0   7   0   0 151   0  52]
 [  0  24   0   0 112   3  71]
 [  0   5   0   0  53   0 152]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.82      0.07      0.12       210
           2       0.20      0.15      0.17       210
           3       1.00      0.22      0.36       210
           4       0.00      0.00      0.00       210
           5       0.18      0.72      0.29       210
           6       0.38      0.01      0.03       210
           7       0.39      0.72      0.51       210

    accuracy                           0.27      1470
   macro avg       0.42      0.27      0.21      1470
weighted avg       0.42      0.27      0.21      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.28435374149659864
Confusion Matrix of Decision Tree is:
 [[ 29   8   0  91  66   0  16]
 [  6  32   0  82  84   0   6]
 [ 15  46  46  27  74   0   2]
 [  5   8   0  88  89   0  20]
 [  3   4   0  67 114   0  22]
 [  5  20   0 109  39   2  35]
 [  0   5   0  85  13   0 107]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.14      0.21       210
           2       0.26      0.15      0.19       210
           3       1.00      0.22      0.36       210
           4       0.16      0.42      0.23       210
           5       0.24      0.54      0.33       210
           6       1.00      0.01      0.02       210
           7       0.51      0.51      0.51       210

    accuracy                           0.28      1470
   macro avg       0.52      0.28      0.27      1470
weighted avg       0.52      0.28      0.27      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.32448979591836735
Confusion Matrix of Decision Tree is:
 [[70 23 13 13 39 43  9]
 [27 68 20 22 35 30  8]
 [37 41 88 10 27  5  2]
 [30 29  6 34 54 35 22]
 [41 30  1 23 77 23 15]
 [26 29  9 32 34 46 34]
 [ 9  8  1 20 35 43 94]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.29      0.33      0.31       210
           2       0.30      0.32      0.31       210
           3       0.64      0.42      0.51       210
           4       0.22      0.16      0.19       210
           5       0.26      0.37      0.30       210
           6       0.20      0.22      0.21       210
           7       0.51      0.45      0.48       210

    accuracy                           0.32      1470
   macro avg       0.35      0.32      0.33      1470
weighted avg       0.35      0.32      0.33      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after Standard Scali

Accuracy of Decision Tree after Standard Scaling is: 0.3142857142857143
Confusion Matrix of Decision Tree is:
 [[79 13 13 19 40 37  9]
 [33 57 22 20 31 33 14]
 [44 31 93 11 23  8  0]
 [28 19  7 38 65 33 20]
 [45 16  5 26 71 30 17]
 [32 32 12 22 38 40 34]
 [21  8  4 24 31 38 84]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.28      0.38      0.32       210
           2       0.32      0.27      0.30       210
           3       0.60      0.44      0.51       210
           4       0.24      0.18      0.21       210
           5       0.24      0.34      0.28       210
           6       0.18      0.19      0.19       210
           7       0.47      0.40      0.43       210

    accuracy                           0.31      1470
   macro avg       0.33      0.31      0.32      1470
weighted avg       0.33      0.31      0.32      1470

Random Forest with 1 max_depth
Accuracy of Random Forest after Standard Scaling

Accuracy of Random Forest after Standard Scaling is: 0.5482993197278911
Confusion Matrix of Random Forest is:
 [[ 97  14   8  13  45   8  25]
 [  1 135  10  18  26  10  10]
 [  5  45 148   5   1   4   2]
 [  7  31   6  73  44  10  39]
 [  6  23   1  17 132   6  25]
 [ 10  29   4  31  12  52  72]
 [  3   8   0  10  10  10 169]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.75      0.46      0.57       210
           2       0.47      0.64      0.55       210
           3       0.84      0.70      0.76       210
           4       0.44      0.35      0.39       210
           5       0.49      0.63      0.55       210
           6       0.52      0.25      0.34       210
           7       0.49      0.80      0.61       210

    accuracy                           0.55      1470
   macro avg       0.57      0.55      0.54      1470
weighted avg       0.57      0.55      0.54      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.5591836734693878
Confusion Matrix of Random Forest is:
 [[106  13   6  17  37  18  13]
 [  6 128  11  22  17  23   3]
 [  2  29 162   6   5   5   1]
 [ 11  26   6  78  39  30  20]
 [ 15  22   2  21 119  16  15]
 [ 14  26   7  31   8  77  47]
 [  1   5   0  10   2  40 152]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.68      0.50      0.58       210
           2       0.51      0.61      0.56       210
           3       0.84      0.77      0.80       210
           4       0.42      0.37      0.39       210
           5       0.52      0.57      0.54       210
           6       0.37      0.37      0.37       210
           7       0.61      0.72      0.66       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Random Forest with 17 max_dept

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.5469387755102041
Confusion Matrix of Logistic Regression is:
 [[138   5  13   2   4  34  14]
 [ 19 114  18   2   5  37  15]
 [  3   8 188   2   0   7   2]
 [ 33  15  15  78   2  57  10]
 [103  12   8  19  10  37  21]
 [ 18   9  17   4   2  84  76]
 [  2   2   0   0   0  14 192]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.44      0.66      0.52       210
           2       0.69      0.54      0.61       210
           3       0.73      0.90      0.80       210
           4       0.73      0.37      0.49       210
           5       0.43      0.05      0.09       210
           6       0.31      0.40      0.35       210
           7       0.58      0.91      0.71       210

    accuracy                           0.55      1470
   macro avg       0.56      0.55      0.51      1470
weighted avg       0.56      0.55      0.51      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6034013605442177
Confusion Matrix of SVM is:
 [[120  14   8   5  12  47   4]
 [  6 167   8   2   6  18   3]
 [  3  15 187   1   1   3   0]
 [ 16  30  11  94  18  32   9]
 [ 76  41   2  18  48  13  12]
 [ 13  26  24   5   3  80  59]
 [  3   4   1   0   0  11 191]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.51      0.57      0.54       210
           2       0.56      0.80      0.66       210
           3       0.78      0.89      0.83       210
           4       0.75      0.45      0.56       210
           5       0.55      0.23      0.32       210
           6       0.39      0.38      0.39       210
           7       0.69      0.91      0.78       210

    accuracy                           0.60      1470
   macro avg       0.60      0.60      0.58      1470
weighted avg       0.60      0.60      0.58      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.21972789115646257
Confusion Matrix of Decision Tree is:
 [[  0   0   8   0 202   0   0]
 [  0   0  13   0 197   0   0]
 [  0   0 113   0  97   0   0]
 [  0   0   5   0 205   0   0]
 [  0   0   0   0 210   0   0]
 [  0   0   7   0 203   0   0]
 [  0   0   0   0 210   0   0]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.77      0.54      0.63       210
           4       0.00      0.00      0.00       210
           5       0.16      1.00      0.27       210
           6       0.00      0.00      0.00       210
           7       0.00      0.00      0.00       210

    accuracy                           0.22      1470
   macro avg       0.13      0.22      0.13      1470
weighted avg       0.13      0.22      0.13      1470

Decision Tree with 2 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.29931972789115646
Confusion Matrix of Decision Tree is:
 [[  0   3   5   0 197   0   5]
 [  0   7   6   0 192   0   5]
 [  0   1 112   0  97   0   0]
 [  0   0   5   0 196   0   9]
 [  0   0   0   0 206   0   4]
 [  0   1   6   0 160   0  43]
 [  0   0   0   0  95   0 115]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.58      0.03      0.06       210
           3       0.84      0.53      0.65       210
           4       0.00      0.00      0.00       210
           5       0.18      0.98      0.30       210
           6       0.00      0.00      0.00       210
           7       0.64      0.55      0.59       210

    accuracy                           0.30      1470
   macro avg       0.32      0.30      0.23      1470
weighted avg       0.32      0.30      0.23      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3469387755102041
Confusion Matrix of Decision Tree is:
 [[  2  82   2   0 116   3   5]
 [  1 130   3   0  68   3   5]
 [  0  44  83   0  54  29   0]
 [  0  39   3   0 157   2   9]
 [  0  29   0   0 177   0   4]
 [  0 121   3   0  40   3  43]
 [  0  71   0   0  24   0 115]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.67      0.01      0.02       210
           2       0.25      0.62      0.36       210
           3       0.88      0.40      0.55       210
           4       0.00      0.00      0.00       210
           5       0.28      0.84      0.42       210
           6       0.07      0.01      0.02       210
           7       0.64      0.55      0.59       210

    accuracy                           0.35      1470
   macro avg       0.40      0.35      0.28      1470
weighted avg       0.40      0.35      0.28      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.37891156462585035
Confusion Matrix of Decision Tree is:
 [[  2  45   2  53  66  37   5]
 [  0  98   3  35  36  33   5]
 [  0  27  83  68  15  17   0]
 [  0  11   3  84  76  29   7]
 [  0  11   0  80  98  18   3]
 [  0  42   3  20  29  84  32]
 [  0  17   0   3  24  58 108]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       1.00      0.01      0.02       210
           2       0.39      0.47      0.43       210
           3       0.88      0.40      0.55       210
           4       0.24      0.40      0.30       210
           5       0.28      0.47      0.35       210
           6       0.30      0.40      0.35       210
           7       0.68      0.51      0.58       210

    accuracy                           0.38      1470
   macro avg       0.54      0.38      0.37      1470
weighted avg       0.54      0.38      0.37      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.3653061224489796
Confusion Matrix of Decision Tree is:
 [[ 69  31  22  36  24  19   9]
 [ 27  92  18  30  15  20   8]
 [ 21  24  85  36  10  32   2]
 [ 21  23  21  64  55  19   7]
 [ 38  33  16  33  70  15   5]
 [ 30  22  16  27  25  52  38]
 [  9   8   0  12  18  58 105]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.32      0.33      0.32       210
           2       0.39      0.44      0.42       210
           3       0.48      0.40      0.44       210
           4       0.27      0.30      0.29       210
           5       0.32      0.33      0.33       210
           6       0.24      0.25      0.24       210
           7       0.60      0.50      0.55       210

    accuracy                           0.37      1470
   macro avg       0.38      0.37      0.37      1470
weighted avg       0.38      0.37      0.37      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.3707482993197279
Confusion Matrix of Decision Tree is:
 [[ 76  30  22  27  25  21   9]
 [ 26  81  20  18  28  24  13]
 [ 16  26  91  37  10  28   2]
 [ 23  23  20  72  35  24  13]
 [ 26  39  20  39  62  17   7]
 [ 25  21  12  27  27  52  46]
 [  9   7   1   6  18  58 111]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.36      0.37       210
           2       0.36      0.39      0.37       210
           3       0.49      0.43      0.46       210
           4       0.32      0.34      0.33       210
           5       0.30      0.30      0.30       210
           6       0.23      0.25      0.24       210
           7       0.55      0.53      0.54       210

    accuracy                           0.37      1470
   macro avg       0.38      0.37      0.37      1470
weighted avg       0.38      0.37      0.37      1470

Random Forest with 1 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6054421768707483
Confusion Matrix of Random Forest is:
 [[ 96  12   7  21  37  27  10]
 [  2 138   9  16  19  17   9]
 [  7  11 160  13   3  15   1]
 [ 11  12   2 106  48  22   9]
 [ 14  10   2  22 130  20  12]
 [ 10  16  12  18  11  76  67]
 [  2   2   0   0   3  19 184]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.68      0.46      0.55       210
           2       0.69      0.66      0.67       210
           3       0.83      0.76      0.80       210
           4       0.54      0.50      0.52       210
           5       0.52      0.62      0.56       210
           6       0.39      0.36      0.37       210
           7       0.63      0.88      0.73       210

    accuracy                           0.61      1470
   macro avg       0.61      0.61      0.60      1470
weighted avg       0.61      0.61      0.60      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6231292517006802
Confusion Matrix of Random Forest is:
 [[ 99   8  10  19  37  27  10]
 [  4 135  10  10  21  24   6]
 [  4   6 173  11   3  13   0]
 [ 13  13   5 110  41  20   8]
 [ 15  15   4  21 128  17  10]
 [ 12  14  13   9  11  90  61]
 [  3   1   0   1   3  21 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.66      0.47      0.55       210
           2       0.70      0.64      0.67       210
           3       0.80      0.82      0.81       210
           4       0.61      0.52      0.56       210
           5       0.52      0.61      0.56       210
           6       0.42      0.43      0.43       210
           7       0.66      0.86      0.74       210

    accuracy                           0.62      1470
   macro avg       0.63      0.62      0.62      1470
weighted avg       0.63      0.62      0.62      1470

Random Forest with 17 max_dept

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.5741496598639456
Confusion Matrix of Logistic Regression is:
 [[208   0   0   1   0   1   0]
 [ 64  98   6  16   9  16   1]
 [ 21   1 180   1   1   6   0]
 [ 82   4   2 101  10   9   2]
 [150   4   0  16  37   1   2]
 [ 94   3   2  13   4  73  21]
 [ 42   0   0   2   0  19 147]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.31      0.99      0.48       210
           2       0.89      0.47      0.61       210
           3       0.95      0.86      0.90       210
           4       0.67      0.48      0.56       210
           5       0.61      0.18      0.27       210
           6       0.58      0.35      0.44       210
           7       0.85      0.70      0.77       210

    accuracy                           0.57      1470
   macro avg       0.70      0.57      0.58      1470
weighted avg       0.70      0.57      0.58      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.617687074829932
Confusion Matrix of SVM is:
 [[201   0   1   1   0   5   2]
 [ 38 119  12  13  10  17   1]
 [  4   3 195   1   2   5   0]
 [ 65   8   3 109   7  17   1]
 [150   7   1  18  22   7   5]
 [ 51   9   5  11   2 110  22]
 [ 16   1   0   1   0  40 152]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.38      0.96      0.55       210
           2       0.81      0.57      0.67       210
           3       0.90      0.93      0.91       210
           4       0.71      0.52      0.60       210
           5       0.51      0.10      0.17       210
           6       0.55      0.52      0.54       210
           7       0.83      0.72      0.77       210

    accuracy                           0.62      1470
   macro avg       0.67      0.62      0.60      1470
weighted avg       0.67      0.62      0.60      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard Sc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.24897959183673468
Confusion Matrix of Decision Tree is:
 [[  0   0  49   0   0   0 161]
 [  0   0  62   0   0   0 148]
 [  0   0 159   0   0   0  51]
 [  0   0  11   0   0   0 199]
 [  0   0   4   0   0   0 206]
 [  0   0  25   0   0   0 185]
 [  0   0   3   0   0   0 207]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.51      0.76      0.61       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.18      0.99      0.30       210

    accuracy                           0.25      1470
   macro avg       0.10      0.25      0.13      1470
weighted avg       0.10      0.25      0.13      1470

Decision Tree with 2 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3408163265306122
Confusion Matrix of Decision Tree is:
 [[  0  14  35   0 120   0  41]
 [  0  24  38   0  68   0  80]
 [  0  10 149   0  43   0   8]
 [  0   4   7   0 127   0  72]
 [  0   2   2   0 143   0  63]
 [  0   7  18   0  71   0 114]
 [  0   3   0   0  22   0 185]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.38      0.11      0.18       210
           3       0.60      0.71      0.65       210
           4       0.00      0.00      0.00       210
           5       0.24      0.68      0.36       210
           6       0.00      0.00      0.00       210
           7       0.33      0.88      0.48       210

    accuracy                           0.34      1470
   macro avg       0.22      0.34      0.24      1470
weighted avg       0.22      0.34      0.24      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.37891156462585035
Confusion Matrix of Decision Tree is:
 [[ 96  14  26  33   0  18  23]
 [ 10  34  26  60   0  53  27]
 [ 12  14 144  32   0   5   3]
 [ 40   3   7  88   0  44  28]
 [ 83   2   2  60   0  28  35]
 [ 26  12  13  45   0  38  76]
 [ 11   3   0  11   0  28 157]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.46      0.39       210
           2       0.41      0.16      0.23       210
           3       0.66      0.69      0.67       210
           4       0.27      0.42      0.33       210
           5       0.00      0.00      0.00       210
           6       0.18      0.18      0.18       210
           7       0.45      0.75      0.56       210

    accuracy                           0.38      1470
   macro avg       0.33      0.38      0.34      1470
weighted avg       0.33      0.38      0.34      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4054421768707483
Confusion Matrix of Decision Tree is:
 [[ 51  24  24  22  48  33   8]
 [  3  63  21  44   6  52  21]
 [  3  14 141  35  10   5   2]
 [  9  17   5  79  31  60   9]
 [ 14  33   2  32  69  43  17]
 [  1  14  13  43  25  72  42]
 [  0  15   0   3  11  60 121]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.63      0.24      0.35       210
           2       0.35      0.30      0.32       210
           3       0.68      0.67      0.68       210
           4       0.31      0.38      0.34       210
           5       0.34      0.33      0.34       210
           6       0.22      0.34      0.27       210
           7       0.55      0.58      0.56       210

    accuracy                           0.41      1470
   macro avg       0.44      0.41      0.41      1470
weighted avg       0.44      0.41      0.41      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.41496598639455784
Confusion Matrix of Decision Tree is:
 [[ 76  19  26  21  37  23   8]
 [ 29  70  19  23  21  37  11]
 [ 10   9 142  15  10  19   5]
 [ 32  19  11  68  38  24  18]
 [ 40  19   4  25  76  24  22]
 [ 28  13  10  25  21  71  42]
 [ 18   3   3   7  11  61 107]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.33      0.36      0.34       210
           2       0.46      0.33      0.39       210
           3       0.66      0.68      0.67       210
           4       0.37      0.32      0.35       210
           5       0.36      0.36      0.36       210
           6       0.27      0.34      0.30       210
           7       0.50      0.51      0.51       210

    accuracy                           0.41      1470
   macro avg       0.42      0.41      0.42      1470
weighted avg       0.42      0.41      0.42      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.4054421768707483
Confusion Matrix of Decision Tree is:
 [[ 75  14  30  22  45  16   8]
 [ 32  69  25  17  25  30  12]
 [  8  17 151   7   9  14   4]
 [ 26  17  20  64  48  20  15]
 [ 42  14   9  23  80  20  22]
 [ 26  23  22  19  33  51  36]
 [ 18   4  26  18  14  24 106]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.33      0.36      0.34       210
           2       0.44      0.33      0.37       210
           3       0.53      0.72      0.61       210
           4       0.38      0.30      0.34       210
           5       0.31      0.38      0.34       210
           6       0.29      0.24      0.26       210
           7       0.52      0.50      0.51       210

    accuracy                           0.41      1470
   macro avg       0.40      0.41      0.40      1470
weighted avg       0.40      0.41      0.40      1470

Random Forest with 1 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6394557823129252
Confusion Matrix of Random Forest is:
 [[125   7   5   8  34  10  21]
 [  2 138   7  17  19  20   7]
 [  0  16 182   7   1   4   0]
 [  0  13   6 106  36  31  18]
 [ 17  15   1  21 123  14  19]
 [  7  15  11  27   8  79  63]
 [  0   1   0   2   3  17 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.83      0.60      0.69       210
           2       0.67      0.66      0.67       210
           3       0.86      0.87      0.86       210
           4       0.56      0.50      0.53       210
           5       0.55      0.59      0.57       210
           6       0.45      0.38      0.41       210
           7       0.59      0.89      0.71       210

    accuracy                           0.64      1470
   macro avg       0.65      0.64      0.63      1470
weighted avg       0.65      0.64      0.63      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.64421768707483
Confusion Matrix of Random Forest is:
 [[130   5   6   8  33  17  11]
 [  4 139   6  22  12  19   8]
 [  2  12 182   9   2   2   1]
 [  5  15   6 111  31  35   7]
 [ 19  19   1  23 124  16   8]
 [ 14  18   9  27   6  85  51]
 [  1   2   0   1   4  26 176]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.62      0.68       210
           2       0.66      0.66      0.66       210
           3       0.87      0.87      0.87       210
           4       0.55      0.53      0.54       210
           5       0.58      0.59      0.59       210
           6       0.42      0.40      0.41       210
           7       0.67      0.84      0.75       210

    accuracy                           0.64      1470
   macro avg       0.64      0.64      0.64      1470
weighted avg       0.64      0.64      0.64      1470

Random Forest with 17 max_depth


In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7204081632653061
Confusion Matrix of Logistic Regression is:
 [[146  11   2   4  32  14   1]
 [  1 185   3   9   8   4   0]
 [  1  15 186   3   3   2   0]
 [  3  33   2 144  12  15   1]
 [ 27  25   2  33 111  10   2]
 [  5  45   1  18   6 121  14]
 [  0   5   0   4   2  33 166]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.80      0.70      0.74       210
           2       0.58      0.88      0.70       210
           3       0.95      0.89      0.92       210
           4       0.67      0.69      0.68       210
           5       0.64      0.53      0.58       210
           6       0.61      0.58      0.59       210
           7       0.90      0.79      0.84       210

    accuracy                           0.72      1470
   macro avg       0.73      0.72      0.72      1470
weighted avg       0.73      0.72      0.72      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7
Confusion Matrix of SVM is:
 [[150   7   1   7  33  11   1]
 [  3 179   3  11   7   7   0]
 [  0   8 195   2   3   2   0]
 [  6  36   5 140  16   6   1]
 [ 32  26   1  24 120   4   3]
 [  7  40  10  25   5 105  18]
 [  0   2   0   6   2  60 140]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.76      0.71      0.74       210
           2       0.60      0.85      0.70       210
           3       0.91      0.93      0.92       210
           4       0.65      0.67      0.66       210
           5       0.65      0.57      0.61       210
           6       0.54      0.50      0.52       210
           7       0.86      0.67      0.75       210

    accuracy                           0.70      1470
   macro avg       0.71      0.70      0.70      1470
weighted avg       0.71      0.70      0.70      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard Scaling is: 0.71

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3414965986394558
Confusion Matrix of Decision Tree is:
 [[ 12   0  12   0 133   0  53]
 [ 14   0   7   0 111   0  78]
 [  7   0 131   0  37   0  35]
 [ 10   0  10   0 135   0  55]
 [  1   0   0   0 182   0  27]
 [  2   0   9   0  73   0 126]
 [  1   0   0   0  32   0 177]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.26      0.06      0.09       210
           2       0.00      0.00      0.00       210
           3       0.78      0.62      0.69       210
           4       0.00      0.00      0.00       210
           5       0.26      0.87      0.40       210
           6       0.00      0.00      0.00       210
           7       0.32      0.84      0.47       210

    accuracy                           0.34      1470
   macro avg       0.23      0.34      0.24      1470
weighted avg       0.23      0.34      0.24      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.38571428571428573
Confusion Matrix of Decision Tree is:
 [[ 12  36  10   2 130   0  20]
 [  0  91   6   1  75   0  37]
 [  4  30 111  20  27   0  18]
 [  3  59   2   8 108   0  30]
 [  0  21   0   0 171   0  18]
 [  1  49   1   8  47   0 104]
 [  0   8   0   0  28   0 174]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.06      0.10       210
           2       0.31      0.43      0.36       210
           3       0.85      0.53      0.65       210
           4       0.21      0.04      0.06       210
           5       0.29      0.81      0.43       210
           6       0.00      0.00      0.00       210
           7       0.43      0.83      0.57       210

    accuracy                           0.39      1470
   macro avg       0.38      0.39      0.31      1470
weighted avg       0.38      0.39      0.31      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40816326530612246
Confusion Matrix of Decision Tree is:
 [[ 60   0   0  48  82  20   0]
 [  5  57   5  36  70  30   7]
 [  8  22 111  28  23   9   9]
 [  5   8   2  59 106  25   5]
 [  7   1   0  20 164  13   5]
 [  6  15   1  42  42  81  23]
 [  1   0   0   8  27 106  68]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.65      0.29      0.40       210
           2       0.55      0.27      0.36       210
           3       0.93      0.53      0.67       210
           4       0.24      0.28      0.26       210
           5       0.32      0.78      0.45       210
           6       0.29      0.39      0.33       210
           7       0.58      0.32      0.42       210

    accuracy                           0.41      1470
   macro avg       0.51      0.41      0.41      1470
weighted avg       0.51      0.41      0.41      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.40748299319727893
Confusion Matrix of Decision Tree is:
 [[103   8   3  21  33  30  12]
 [ 28  91  13  21  21  22  14]
 [  9  20 123  25  17  11   5]
 [ 37  26   7  67  31  29  13]
 [ 62  19   1  44  60  15   9]
 [ 22  25   8  34  24  47  50]
 [ 12   6   5  16  17  46 108]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.49      0.43       210
           2       0.47      0.43      0.45       210
           3       0.77      0.59      0.66       210
           4       0.29      0.32      0.31       210
           5       0.30      0.29      0.29       210
           6       0.23      0.22      0.23       210
           7       0.51      0.51      0.51       210

    accuracy                           0.41      1470
   macro avg       0.42      0.41      0.41      1470
weighted avg       0.42      0.41      0.41      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.3707482993197279
Confusion Matrix of Decision Tree is:
 [[ 60  11  28  16  51  28  16]
 [ 19  83  16  24  23  28  17]
 [  7  21 125  20  21  14   2]
 [ 23  27   8  63  42  28  19]
 [ 39  23   6  44  68  22   8]
 [ 17  20  11  34  35  43  50]
 [  9  10   4  18  30  36 103]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.29      0.31       210
           2       0.43      0.40      0.41       210
           3       0.63      0.60      0.61       210
           4       0.29      0.30      0.29       210
           5       0.25      0.32      0.28       210
           6       0.22      0.20      0.21       210
           7       0.48      0.49      0.48       210

    accuracy                           0.37      1470
   macro avg       0.38      0.37      0.37      1470
weighted avg       0.38      0.37      0.37      1470

Random Forest with 1 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6374149659863946
Confusion Matrix of Random Forest is:
 [[120   7   2   8  45  20   8]
 [  3 147   2  15  18  19   6]
 [  1  32 163   8   3   3   0]
 [  0  14   2 114  34  33  13]
 [ 13  10   0  22 140  12  13]
 [  7  23   4  27  12  59  78]
 [  0   0   0   1   3  12 194]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.83      0.57      0.68       210
           2       0.63      0.70      0.66       210
           3       0.94      0.78      0.85       210
           4       0.58      0.54      0.56       210
           5       0.55      0.67      0.60       210
           6       0.37      0.28      0.32       210
           7       0.62      0.92      0.74       210

    accuracy                           0.64      1470
   macro avg       0.65      0.64      0.63      1470
weighted avg       0.65      0.64      0.63      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6571428571428571
Confusion Matrix of Random Forest is:
 [[129   6   5  10  35  20   5]
 [  4 151   1  19  10  21   4]
 [  2  25 164   8   2   9   0]
 [  2  19   3 109  30  38   9]
 [ 17  20   0  21 125  13  14]
 [ 12  18   4  24   4 101  47]
 [  0   0   0   1   2  20 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.61      0.69       210
           2       0.63      0.72      0.67       210
           3       0.93      0.78      0.85       210
           4       0.57      0.52      0.54       210
           5       0.60      0.60      0.60       210
           6       0.45      0.48      0.47       210
           7       0.70      0.89      0.79       210

    accuracy                           0.66      1470
   macro avg       0.67      0.66      0.66      1470
weighted avg       0.67      0.66      0.66      1470

Random Forest with 17 max_dept

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//xlm_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = minmax_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=5000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.482312925170068
Confusion Matrix of Logistic Regression is:
 [[ 97  12   5   4  21  45  26]
 [ 11 111   4   9  22  42  11]
 [  3  18 163   7   2  13   4]
 [ 12  16   8  39  22  88  25]
 [ 29  29   2  21  51  51  27]
 [ 10  18   3  16   7 110  46]
 [  4   3   0   3   2  60 138]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.58      0.46      0.52       210
           2       0.54      0.53      0.53       210
           3       0.88      0.78      0.83       210
           4       0.39      0.19      0.25       210
           5       0.40      0.24      0.30       210
           6       0.27      0.52      0.36       210
           7       0.50      0.66      0.57       210

    accuracy                           0.48      1470
   macro avg       0.51      0.48      0.48      1470
weighted avg       0.51      0.48      0.48      1470

KNN with 3 Ne

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.2680272108843537
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 45   7   3  16  14  11 114]
 [  9  47   1  17  21  26  89]
 [ 12  18 109   7   9  11  44]
 [  8  13   0  23  18  22 126]
 [ 17  10   0  18  16  22 127]
 [ 14  19   1  14  15  25 122]
 [ 10   7   0  25  17  22 129]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.39      0.21      0.28       210
           2       0.39      0.22      0.28       210
           3       0.96      0.52      0.67       210
           4       0.19      0.11      0.14       210
           5       0.15      0.08      0.10       210
           6       0.18      0.12      0.14       210
           7       0.17      0.61      0.27       210

    accuracy                           0.27      1470
   macro avg       0.35      0.27      0.27      1470
weighted avg       0.35      0.27      0.27      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.2653061224489796
Confusion Matrix of Decision Tree is:
 [[  0  79  13   0   0   0 118]
 [  0  88  16   0   0   0 106]
 [  0  51 123   0   0   0  36]
 [  0  57   3   0   0   0 150]
 [  0  55   3   0   0   0 152]
 [  0  57   5   0   0   0 148]
 [  0  29   2   0   0   0 179]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.21      0.42      0.28       210
           3       0.75      0.59      0.66       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.20      0.85      0.33       210

    accuracy                           0.27      1470
   macro avg       0.17      0.27      0.18      1470
weighted avg       0.17      0.27      0.18      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.30680272108843537
Confusion Matrix of Decision Tree is:
 [[ 42  45   5  96   0   0  22]
 [  8  80  16  96   0   0  10]
 [  4  47 123  30   0   0   6]
 [ 11  46   3 129   0   0  21]
 [ 19  36   3 118   0   0  34]
 [ 16  42   4 113   0   0  35]
 [ 12  17   2 102   0   0  77]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.20      0.26       210
           2       0.26      0.38      0.31       210
           3       0.79      0.59      0.67       210
           4       0.19      0.61      0.29       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.38      0.37      0.37       210

    accuracy                           0.31      1470
   macro avg       0.28      0.31      0.27      1470
weighted avg       0.28      0.31      0.27      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.32108843537414966
Confusion Matrix of Decision Tree is:
 [[ 50  46   4  79   9   0  22]
 [  3  86   8  95   8   0  10]
 [  0  52 118  30   4   0   6]
 [  5  49   0 127   8   0  21]
 [  7  39   0 116  14   0  34]
 [  9  43   3 111   9   0  35]
 [  0  19   0 102  12   0  77]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.68      0.24      0.35       210
           2       0.26      0.41      0.32       210
           3       0.89      0.56      0.69       210
           4       0.19      0.60      0.29       210
           5       0.22      0.07      0.10       210
           6       0.00      0.00      0.00       210
           7       0.38      0.37      0.37       210

    accuracy                           0.32      1470
   macro avg       0.37      0.32      0.30      1470
weighted avg       0.37      0.32      0.30      1470

Decision Tree with 5 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.32789115646258504
Confusion Matrix of Decision Tree is:
 [[ 50  43   6  57  38   0  16]
 [  6  81   7  65  40   1  10]
 [  0  39 122  29  10   4   6]
 [  4  48   0  89  52   0  17]
 [  6  38   0  64  74   0  28]
 [  5  48   3  78  46   0  30]
 [  0  19   0  74  51   0  66]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.24      0.36       210
           2       0.26      0.39      0.31       210
           3       0.88      0.58      0.70       210
           4       0.20      0.42      0.27       210
           5       0.24      0.35      0.28       210
           6       0.00      0.00      0.00       210
           7       0.38      0.31      0.34       210

    accuracy                           0.33      1470
   macro avg       0.38      0.33      0.32      1470
weighted avg       0.38      0.33      0.32      1470

Decision Tree with 6 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.32857142857142857
Confusion Matrix of Decision Tree is:
 [[ 74  21  11  28  25  33  18]
 [ 19  57  28  33  23  36  14]
 [ 18  23 137  14   8   7   3]
 [ 20  28  13  57  22  41  29]
 [ 23  24   9  39  52  41  22]
 [ 17  29   8  37  31  45  43]
 [ 24  17   3  38  33  34  61]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.35      0.37       210
           2       0.29      0.27      0.28       210
           3       0.66      0.65      0.65       210
           4       0.23      0.27      0.25       210
           5       0.27      0.25      0.26       210
           6       0.19      0.21      0.20       210
           7       0.32      0.29      0.31       210

    accuracy                           0.33      1470
   macro avg       0.33      0.33      0.33      1470
weighted avg       0.33      0.33      0.33      1470

Decision Tree with 14 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.2755102040816326
Confusion Matrix of Random Forest is:
 [[  6  18  37   2  21   1 125]
 [  3  25  61   9  12   2  98]
 [  1  14 168   3   2   0  22]
 [  4  12  26   8  11   3 146]
 [  2   8  14   8  12   0 166]
 [  1  18  29   6   7   6 143]
 [  2   4   2   5  11   6 180]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.32      0.03      0.05       210
           2       0.25      0.12      0.16       210
           3       0.50      0.80      0.61       210
           4       0.20      0.04      0.06       210
           5       0.16      0.06      0.08       210
           6       0.33      0.03      0.05       210
           7       0.20      0.86      0.33       210

    accuracy                           0.28      1470
   macro avg       0.28      0.28      0.19      1470
weighted avg       0.28      0.28      0.19      1470

Random Forest with 2 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.4326530612244898
Confusion Matrix of Random Forest is:
 [[ 85  29   9  25  31  12  19]
 [  8 119   7  24  26  13  13]
 [  8  35 152   7   2   5   1]
 [  6  42   5  57  37  32  31]
 [ 18  38   4  30  74  16  30]
 [ 11  45   1  36  30  35  52]
 [  4  10   1  31  18  32 114]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.40      0.49       210
           2       0.37      0.57      0.45       210
           3       0.85      0.72      0.78       210
           4       0.27      0.27      0.27       210
           5       0.34      0.35      0.35       210
           6       0.24      0.17      0.20       210
           7       0.44      0.54      0.49       210

    accuracy                           0.43      1470
   macro avg       0.45      0.43      0.43      1470
weighted avg       0.45      0.43      0.43      1470

Random Forest with 10 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.4421768707482993
Confusion Matrix of Random Forest is:
 [[ 89  36   6  26  25  15  13]
 [ 14 123   8  15  25  20   5]
 [  7  29 157  10   3   3   1]
 [ 20  26   6  66  31  35  26]
 [ 29  35   3  30  60  28  25]
 [ 19  36   6  37  25  46  41]
 [ 13   9   0  27  18  34 109]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.47      0.42      0.44       210
           2       0.42      0.59      0.49       210
           3       0.84      0.75      0.79       210
           4       0.31      0.31      0.31       210
           5       0.32      0.29      0.30       210
           6       0.25      0.22      0.24       210
           7       0.50      0.52      0.51       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.44      1470
weighted avg       0.44      0.44      0.44      1470

Random Forest with 18 max_dept