In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import Normalizer
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Nisha//Input//Nisha_dataset_labels.csv")

In [2]:
# Function of Train-test split, Normalize Scaling
def normalize_scaling(x_data, y_data):
    x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=0.30,random_state=21,stratify=y_data)
    # Normalize scaling of train data
    normalize_model = Normalizer()
    np.set_printoptions(precision=3)
    scaled_data_train = normalize_model.fit_transform(x_train)
    # Normalize scaling of test data
    scaled_data_test = normalize_model.fit_transform(x_test)
    return scaled_data_train, scaled_data_test, y_train, y_test

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" after Standard Scaling is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tfidf_500_vectors.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7326530612244898
Confusion Matrix of Logistic Regression is:
 [[170   3   0   5  17  15   0]
 [  1 151  11  18   7  22   0]
 [  0   9 176  17   1   7   0]
 [  2  14  15 143  10  25   1]
 [ 23  18   5  26 124   8   6]
 [  1  15   2  30   0 134  28]
 [  1   1   0   0   1  28 179]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.86      0.81      0.83       210
           2       0.72      0.72      0.72       210
           3       0.84      0.84      0.84       210
           4       0.60      0.68      0.64       210
           5       0.78      0.59      0.67       210
           6       0.56      0.64      0.60       210
           7       0.84      0.85      0.84       210

    accuracy                           0.73      1470
   macro avg       0.74      0.73      0.73      1470
weighted avg       0.74      0.73      0.73      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7360544217687075
Confusion Matrix of SVM is:
 [[164   1   0   1  25  19   0]
 [  1 153  12  16   4  24   0]
 [  0   8 178  17   1   6   0]
 [  2  17  15 134  11  30   1]
 [ 20  20   6  20 136   3   5]
 [  1  11   5  19   1 144  29]
 [  0   1   0   0   1  35 173]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.87      0.78      0.82       210
           2       0.73      0.73      0.73       210
           3       0.82      0.85      0.84       210
           4       0.65      0.64      0.64       210
           5       0.76      0.65      0.70       210
           6       0.55      0.69      0.61       210
           7       0.83      0.82      0.83       210

    accuracy                           0.74      1470
   macro avg       0.74      0.74      0.74      1470
weighted avg       0.74      0.74      0.74      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.31564625850340133
Confusion Matrix of Decision Tree is:
 [[110   0   0   0  56   0  44]
 [  2   0   3   0   0   0 205]
 [  0   0  83   0   0   0 127]
 [  3   0   2   0   3   0 202]
 [ 69   0   1   0  61   0  79]
 [  2   0   0   0   1   0 207]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.59      0.52      0.56       210
           2       0.00      0.00      0.00       210
           3       0.93      0.40      0.56       210
           4       0.00      0.00      0.00       210
           5       0.50      0.29      0.37       210
           6       0.00      0.00      0.00       210
           7       0.20      1.00      0.33       210

    accuracy                           0.32      1470
   macro avg       0.32      0.32      0.26      1470
weighted avg       0.32      0.32      0.26      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4061224489795918
Confusion Matrix of Decision Tree is:
 [[ 98  44   0   0  68   0   0]
 [  1 204   3   0   1   0   1]
 [  0 127  83   0   0   0   0]
 [  0 201   2   0   6   0   1]
 [ 17  76   1   0 113   0   3]
 [  0 197   0   0   3   0  10]
 [  0 110   0   0   0   1  99]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.84      0.47      0.60       210
           2       0.21      0.97      0.35       210
           3       0.93      0.40      0.56       210
           4       0.00      0.00      0.00       210
           5       0.59      0.54      0.56       210
           6       0.00      0.00      0.00       210
           7       0.87      0.47      0.61       210

    accuracy                           0.41      1470
   macro avg       0.49      0.41      0.38      1470
weighted avg       0.49      0.41      0.38      1470

Decision Tree with 6 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.87      0.46      0.60       210
           2       0.68      0.13      0.22       210
           3       0.93      0.40      0.56       210
           4       0.00      0.00      0.00       210
           5       0.58      0.56      0.57       210
           6       0.21      0.94      0.35       210
           7       0.86      0.44      0.58       210

    accuracy                           0.42      1470
   macro avg       0.59      0.42      0.41      1470
weighted avg       0.59      0.42      0.41      1470

Decision Tree with 7 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.44625850340136053
Confusion Matrix of Decision Tree is:
 [[124   1   0   1  41  43   0]
 [  1  75   3   0   1 129   1]
 [  0  35  83   0   0  92   0]
 [  3  14   2   0   3 187   1]
 [ 46  25   1   1  83  51   3]
 [  1   2   0   0   2 195  10]
 [  0   1   0   0   3 110  96]

Accuracy of Decision Tree after Standard Scaling is: 0.5421768707482993
Confusion Matrix of Decision Tree is:
 [[155   0   0   0  18  37   0]
 [  2  92  11   4   4  97   0]
 [  0   9 131   0   0  70   0]
 [  5  11  10  42   3 139   0]
 [ 51  19   4   7  88  39   2]
 [  2   1   2   1   1 194   9]
 [  0   1   0   0   3 111  95]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.72      0.74      0.73       210
           2       0.69      0.44      0.54       210
           3       0.83      0.62      0.71       210
           4       0.78      0.20      0.32       210
           5       0.75      0.42      0.54       210
           6       0.28      0.92      0.43       210
           7       0.90      0.45      0.60       210

    accuracy                           0.54      1470
   macro avg       0.71      0.54      0.55      1470
weighted avg       0.71      0.54      0.55      1470

Decision Tree with 15 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5986394557823129
Confusion Matrix of Random Forest is:
 [[151   2   3  10  18  25   1]
 [  1 119   7  23   4  55   1]
 [  1   0 134   5   0  70   0]
 [  5  15   9 114  10  55   2]
 [ 65  22   2  22  81  13   5]
 [  3  12   4  41   3 119  28]
 [  0   5   1   9   1  32 162]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.67      0.72      0.69       210
           2       0.68      0.57      0.62       210
           3       0.84      0.64      0.72       210
           4       0.51      0.54      0.53       210
           5       0.69      0.39      0.50       210
           6       0.32      0.57      0.41       210
           7       0.81      0.77      0.79       210

    accuracy                           0.60      1470
   macro avg       0.65      0.60      0.61      1470
weighted avg       0.65      0.60      0.61      1470

Random Forest with 3 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6598639455782312
Confusion Matrix of Random Forest is:
 [[154   2   2   4  23  25   0]
 [  1 133  11  19   3  42   1]
 [  0   9 144   4   0  53   0]
 [  3  15   9 119  11  51   2]
 [ 46  26   2  19 103   9   5]
 [  4  10   1  22   2 149  22]
 [  0   5   0   0   1  36 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.73      0.74       210
           2       0.67      0.63      0.65       210
           3       0.85      0.69      0.76       210
           4       0.64      0.57      0.60       210
           5       0.72      0.49      0.58       210
           6       0.41      0.71      0.52       210
           7       0.85      0.80      0.82       210

    accuracy                           0.66      1470
   macro avg       0.70      0.66      0.67      1470
weighted avg       0.70      0.66      0.67      1470

Random Forest with 11 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6673469387755102
Confusion Matrix of Random Forest is:
 [[155   2   2   3  25  22   1]
 [  1 138  12  15   7  36   1]
 [  0  11 146   5   0  48   0]
 [  3  13  11 119  13  49   2]
 [ 47  22   1  20 105   9   6]
 [  3  11   2  20   3 149  22]
 [  0   1   0   0   3  37 169]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.74      0.74       210
           2       0.70      0.66      0.68       210
           3       0.84      0.70      0.76       210
           4       0.65      0.57      0.61       210
           5       0.67      0.50      0.57       210
           6       0.43      0.71      0.53       210
           7       0.84      0.80      0.82       210

    accuracy                           0.67      1470
   macro avg       0.70      0.67      0.67      1470
weighted avg       0.70      0.67      0.67      1470

Random Forest with 19 max_dept

In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//cv_500_vectors.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7163265306122449
Confusion Matrix of Logistic Regression is:
 [[169   1   0   5  16  17   2]
 [  1 141  22  19   4  22   1]
 [  0   7 178  17   0   8   0]
 [  2  14  20 140   9  19   6]
 [ 29  21   6  23 122   4   5]
 [  2  12   6  33   1 123  33]
 [  0   1   2   0   1  26 180]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.83      0.80      0.82       210
           2       0.72      0.67      0.69       210
           3       0.76      0.85      0.80       210
           4       0.59      0.67      0.63       210
           5       0.80      0.58      0.67       210
           6       0.56      0.59      0.57       210
           7       0.79      0.86      0.82       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7217687074829932
Confusion Matrix of SVM is:
 [[161   0   0   4  23  22   0]
 [  1 143  17  22   2  24   1]
 [  0   8 178  17   0   7   0]
 [  1  16  20 137   7  28   1]
 [ 21  20   9  20 130   5   5]
 [  2  10   6  25   1 137  29]
 [  0   1   1   0   1  32 175]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.87      0.77      0.81       210
           2       0.72      0.68      0.70       210
           3       0.77      0.85      0.81       210
           4       0.61      0.65      0.63       210
           5       0.79      0.62      0.70       210
           6       0.54      0.65      0.59       210
           7       0.83      0.83      0.83       210

    accuracy                           0.72      1470
   macro avg       0.73      0.72      0.72      1470
weighted avg       0.73      0.72      0.72      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.34625850340136055
Confusion Matrix of Decision Tree is:
 [[124   0   1   0  41   0  44]
 [  2   9  18   0   0   0 181]
 [  0   0 108   0   0   0 102]
 [  3   0   8   0   3   0 196]
 [ 71   0   5   0  58   0  76]
 [  2   0   1   0   1   0 206]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.61      0.59      0.60       210
           2       1.00      0.04      0.08       210
           3       0.77      0.51      0.62       210
           4       0.00      0.00      0.00       210
           5       0.56      0.28      0.37       210
           6       0.00      0.00      0.00       210
           7       0.21      1.00      0.34       210

    accuracy                           0.35      1470
   macro avg       0.45      0.35      0.29      1470
weighted avg       0.45      0.35      0.29      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4217687074829932
Confusion Matrix of Decision Tree is:
 [[128   1   0   0  37  44   0]
 [  1  24   3   0   1 180   1]
 [  0  15  93   0   0 102   0]
 [  3   6   2   0   3 195   1]
 [ 50   4   1   0  79  73   3]
 [  1   1   0   0   2 196  10]
 [  0   0   0   0   0 110 100]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.61      0.65       210
           2       0.47      0.11      0.18       210
           3       0.94      0.44      0.60       210
           4       0.00      0.00      0.00       210
           5       0.65      0.38      0.48       210
           6       0.22      0.93      0.35       210
           7       0.87      0.48      0.62       210

    accuracy                           0.42      1470
   macro avg       0.55      0.42      0.41      1470
weighted avg       0.55      0.42      0.41      1470

Decision Tree with 6 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5306122448979592
Confusion Matrix of Decision Tree is:
 [[144   2   0   0  21  43   0]
 [  2  91   8   4   9  96   0]
 [  0  10 120   0  12  68   0]
 [  5   7   7  42  11 137   1]
 [ 52  18   1   5  91  40   3]
 [  3   1   1   2   1 193   9]
 [  0   1   0   0   2 108  99]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.69      0.69       210
           2       0.70      0.43      0.54       210
           3       0.88      0.57      0.69       210
           4       0.79      0.20      0.32       210
           5       0.62      0.43      0.51       210
           6       0.28      0.92      0.43       210
           7       0.88      0.47      0.61       210

    accuracy                           0.53      1470
   macro avg       0.69      0.53      0.54      1470
weighted avg       0.69      0.53      0.54      1470

Decision Tree with 14 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5918367346938775
Confusion Matrix of Random Forest is:
 [[163   3   2   6  15  19   2]
 [  1  84  40  27   6  49   3]
 [  0   0 139   9   0  62   0]
 [  3  11  11 122  12  47   4]
 [ 65  20  11  20  79  10   5]
 [  5   6   1  49   2 120  27]
 [  0   1   0  13   2  31 163]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.78      0.73       210
           2       0.67      0.40      0.50       210
           3       0.68      0.66      0.67       210
           4       0.50      0.58      0.54       210
           5       0.68      0.38      0.48       210
           6       0.36      0.57      0.44       210
           7       0.80      0.78      0.79       210

    accuracy                           0.59      1470
   macro avg       0.62      0.59      0.59      1470
weighted avg       0.62      0.59      0.59      1470

Random Forest with 2 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.645578231292517
Confusion Matrix of Random Forest is:
 [[157   2   1   3  23  23   1]
 [  1 113  31  20   4  40   1]
 [  0   3 151   3   0  53   0]
 [  3  12  13 114  13  53   2]
 [ 50  19   4  20 102  10   5]
 [  2   9   2  24   4 145  24]
 [  0   3   0   0   1  39 167]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.75      0.74       210
           2       0.70      0.54      0.61       210
           3       0.75      0.72      0.73       210
           4       0.62      0.54      0.58       210
           5       0.69      0.49      0.57       210
           6       0.40      0.69      0.51       210
           7       0.83      0.80      0.81       210

    accuracy                           0.65      1470
   macro avg       0.68      0.65      0.65      1470
weighted avg       0.68      0.65      0.65      1470

Random Forest with 10 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6693877551020408
Confusion Matrix of Random Forest is:
 [[151   2   1   5  28  22   1]
 [  1 125  25  18   4  36   1]
 [  0   5 158  12   0  35   0]
 [  2  13  13 124  11  45   2]
 [ 38  22   4  18 114   9   5]
 [  2   8   3  24   3 144  26]
 [  0   1   0   0   1  40 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.72      0.75       210
           2       0.71      0.60      0.65       210
           3       0.77      0.75      0.76       210
           4       0.62      0.59      0.60       210
           5       0.71      0.54      0.61       210
           6       0.44      0.69      0.53       210
           7       0.83      0.80      0.81       210

    accuracy                           0.67      1470
   macro avg       0.69      0.67      0.67      1470
weighted avg       0.69      0.67      0.67      1470

Random Forest with 18 max_dept

In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tf_500_vectors.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7163265306122449
Confusion Matrix of Logistic Regression is:
 [[169   1   0   5  16  17   2]
 [  1 141  22  19   4  22   1]
 [  0   7 178  17   0   8   0]
 [  2  14  20 140   9  19   6]
 [ 29  21   6  23 122   4   5]
 [  2  12   6  33   1 123  33]
 [  0   1   2   0   1  26 180]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.83      0.80      0.82       210
           2       0.72      0.67      0.69       210
           3       0.76      0.85      0.80       210
           4       0.59      0.67      0.63       210
           5       0.80      0.58      0.67       210
           6       0.56      0.59      0.57       210
           7       0.79      0.86      0.82       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7217687074829932
Confusion Matrix of SVM is:
 [[161   0   0   4  23  22   0]
 [  1 143  17  22   2  24   1]
 [  0   8 178  17   0   7   0]
 [  1  16  20 137   7  28   1]
 [ 21  20   9  20 130   5   5]
 [  2  10   6  25   1 137  29]
 [  0   1   1   0   1  32 175]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.87      0.77      0.81       210
           2       0.72      0.68      0.70       210
           3       0.77      0.85      0.81       210
           4       0.61      0.65      0.63       210
           5       0.79      0.62      0.70       210
           6       0.54      0.65      0.59       210
           7       0.83      0.83      0.83       210

    accuracy                           0.72      1470
   macro avg       0.73      0.72      0.72      1470
weighted avg       0.73      0.72      0.72      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.34625850340136055
Confusion Matrix of Decision Tree is:
 [[124   0   1   0  41   0  44]
 [  2   9  18   0   0   0 181]
 [  0   0 108   0   0   0 102]
 [  3   0   8   0   3   0 196]
 [ 71   0   5   0  58   0  76]
 [  2   0   1   0   1   0 206]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.61      0.59      0.60       210
           2       1.00      0.04      0.08       210
           3       0.77      0.51      0.62       210
           4       0.00      0.00      0.00       210
           5       0.56      0.28      0.37       210
           6       0.00      0.00      0.00       210
           7       0.21      1.00      0.34       210

    accuracy                           0.35      1470
   macro avg       0.45      0.35      0.29      1470
weighted avg       0.45      0.35      0.29      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4217687074829932
Confusion Matrix of Decision Tree is:
 [[128   1   0   0  37  44   0]
 [  1  24   3   0   1 180   1]
 [  0  15  93   0   0 102   0]
 [  3   6   2   0   3 195   1]
 [ 50   4   1   0  79  73   3]
 [  1   1   0   0   2 196  10]
 [  0   0   0   0   0 110 100]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.61      0.65       210
           2       0.47      0.11      0.18       210
           3       0.94      0.44      0.60       210
           4       0.00      0.00      0.00       210
           5       0.65      0.38      0.48       210
           6       0.22      0.93      0.35       210
           7       0.87      0.48      0.62       210

    accuracy                           0.42      1470
   macro avg       0.55      0.42      0.41      1470
weighted avg       0.55      0.42      0.41      1470

Decision Tree with 6 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5306122448979592
Confusion Matrix of Decision Tree is:
 [[144   2   0   0  21  43   0]
 [  2  91   8   4   9  96   0]
 [  0  10 120   0  12  68   0]
 [  5   7   7  42  11 137   1]
 [ 52  18   1   5  91  40   3]
 [  3   1   1   2   1 193   9]
 [  0   1   0   0   2 108  99]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.69      0.69       210
           2       0.70      0.43      0.54       210
           3       0.88      0.57      0.69       210
           4       0.79      0.20      0.32       210
           5       0.62      0.43      0.51       210
           6       0.28      0.92      0.43       210
           7       0.88      0.47      0.61       210

    accuracy                           0.53      1470
   macro avg       0.69      0.53      0.54      1470
weighted avg       0.69      0.53      0.54      1470

Decision Tree with 14 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5918367346938775
Confusion Matrix of Random Forest is:
 [[163   3   2   6  15  19   2]
 [  1  84  40  27   6  49   3]
 [  0   0 139   9   0  62   0]
 [  3  11  11 122  12  47   4]
 [ 65  20  11  20  79  10   5]
 [  5   6   1  49   2 120  27]
 [  0   1   0  13   2  31 163]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.78      0.73       210
           2       0.67      0.40      0.50       210
           3       0.68      0.66      0.67       210
           4       0.50      0.58      0.54       210
           5       0.68      0.38      0.48       210
           6       0.36      0.57      0.44       210
           7       0.80      0.78      0.79       210

    accuracy                           0.59      1470
   macro avg       0.62      0.59      0.59      1470
weighted avg       0.62      0.59      0.59      1470

Random Forest with 2 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.645578231292517
Confusion Matrix of Random Forest is:
 [[157   2   1   3  23  23   1]
 [  1 113  31  20   4  40   1]
 [  0   3 151   3   0  53   0]
 [  3  12  13 114  13  53   2]
 [ 50  19   4  20 102  10   5]
 [  2   9   2  24   4 145  24]
 [  0   3   0   0   1  39 167]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.75      0.74       210
           2       0.70      0.54      0.61       210
           3       0.75      0.72      0.73       210
           4       0.62      0.54      0.58       210
           5       0.69      0.49      0.57       210
           6       0.40      0.69      0.51       210
           7       0.83      0.80      0.81       210

    accuracy                           0.65      1470
   macro avg       0.68      0.65      0.65      1470
weighted avg       0.68      0.65      0.65      1470

Random Forest with 10 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6693877551020408
Confusion Matrix of Random Forest is:
 [[151   2   1   5  28  22   1]
 [  1 125  25  18   4  36   1]
 [  0   5 158  12   0  35   0]
 [  2  13  13 124  11  45   2]
 [ 38  22   4  18 114   9   5]
 [  2   8   3  24   3 144  26]
 [  0   1   0   0   1  40 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.72      0.75       210
           2       0.71      0.60      0.65       210
           3       0.77      0.75      0.76       210
           4       0.62      0.59      0.60       210
           5       0.71      0.54      0.61       210
           6       0.44      0.69      0.53       210
           7       0.83      0.80      0.81       210

    accuracy                           0.67      1470
   macro avg       0.69      0.67      0.67      1470
weighted avg       0.69      0.67      0.67      1470

Random Forest with 18 max_dept

### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7122448979591837
Confusion Matrix of Logistic Regression is:
 [[150   3   3   1  30  20   3]
 [  3 150  13  10  20  12   2]
 [  0   4 192   7   4   3   0]
 [  4  18   9 124  28  17  10]
 [ 28  13   2  14 139   4  10]
 [ 10   7  11  24   6 106  46]
 [  0   1   0   0   4  19 186]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.77      0.71      0.74       210
           2       0.77      0.71      0.74       210
           3       0.83      0.91      0.87       210
           4       0.69      0.59      0.64       210
           5       0.60      0.66      0.63       210
           6       0.59      0.50      0.54       210
           7       0.72      0.89      0.80       210

    accuracy                           0.71      1470
   macro avg       0.71      0.71      0.71      1470
weighted avg       0.71      0.71      0.71      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5537414965986395
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 97   2  37   4  37  22  11]
 [  0 108  32   9  34  24   3]
 [  0   3 174   6   6  21   0]
 [  6  13  29  54  66  30  12]
 [ 23  19   5   5 132  10  16]
 [ 12   6  18   7  11  94  62]
 [  0   1   0   0   3  51 155]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.70      0.46      0.56       210
           2       0.71      0.51      0.60       210
           3       0.59      0.83      0.69       210
           4       0.64      0.26      0.37       210
           5       0.46      0.63      0.53       210
           6       0.37      0.45      0.41       210
           7       0.60      0.74      0.66       210

    accuracy                           0.55      1470
   macro avg       0.58      0.55      0.54      1470
weighted avg       0.58      0.55      0.54      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.39931972789115644
Confusion Matrix of Decision Tree is:
 [[  0   1   3   0 175   0  31]
 [  0  46  10   0 130   0  24]
 [  0   2 167   0  26   0  15]
 [  0   4  23   0 148   0  35]
 [  0   3   3   0 186   0  18]
 [  0   5  16   0  62   0 127]
 [  0   0   0   0  22   0 188]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.75      0.22      0.34       210
           3       0.75      0.80      0.77       210
           4       0.00      0.00      0.00       210
           5       0.25      0.89      0.39       210
           6       0.00      0.00      0.00       210
           7       0.43      0.90      0.58       210

    accuracy                           0.40      1470
   macro avg       0.31      0.40      0.30      1470
weighted avg       0.31      0.40      0.30      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4993197278911565
Confusion Matrix of Decision Tree is:
 [[139   0   4   0  36  20  11]
 [ 13  45  10   1 117  19   5]
 [  1   0 164   5  25  15   0]
 [ 17   4  13  10 131  19  16]
 [ 32   3   2   1 154   9   9]
 [ 27   1  16   4  35  64  63]
 [ 10   0   0   0  12  30 158]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.66      0.62       210
           2       0.85      0.21      0.34       210
           3       0.78      0.78      0.78       210
           4       0.48      0.05      0.09       210
           5       0.30      0.73      0.43       210
           6       0.36      0.30      0.33       210
           7       0.60      0.75      0.67       210

    accuracy                           0.50      1470
   macro avg       0.57      0.50      0.47      1470
weighted avg       0.57      0.50      0.47      1470

Decision Tree with 4 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5612244897959183
Confusion Matrix of Decision Tree is:
 [[133   8   2  13  32  13   9]
 [  7 125   6  27  30   5  10]
 [  1   7 175  14   7   4   2]
 [  9  32   8  73  54  21  13]
 [ 32  23   6  23 104  10  12]
 [ 16  22  13  25  13  75  46]
 [  6   1   1   9  11  42 140]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.65      0.63      0.64       210
           2       0.57      0.60      0.58       210
           3       0.83      0.83      0.83       210
           4       0.40      0.35      0.37       210
           5       0.41      0.50      0.45       210
           6       0.44      0.36      0.39       210
           7       0.60      0.67      0.63       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Decision Tree with 12 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5591836734693878
Confusion Matrix of Decision Tree is:
 [[128   6   3   9  33  18  13]
 [  7 121  11  19  35  10   7]
 [  2   6 178  14   5   3   2]
 [ 13  28  12  78  44  22  13]
 [ 41  22   4  29  91  17   6]
 [ 21  17  13  20  16  90  33]
 [  3   2   0  19  13  37 136]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.61      0.60       210
           2       0.60      0.58      0.59       210
           3       0.81      0.85      0.83       210
           4       0.41      0.37      0.39       210
           5       0.38      0.43      0.41       210
           6       0.46      0.43      0.44       210
           7       0.65      0.65      0.65       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Decision Tree with 20 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.5333333333333333
Confusion Matrix of Random Forest is:
 [[119   1   3   0  56   3  28]
 [  3 120  24   0  45   3  15]
 [  2   4 181   0   8  10   5]
 [ 17  15  32   1  98   3  44]
 [ 15  20   2   0 146   0  27]
 [ 12   6  18   0  23  11 140]
 [  0   2   0   0   2   0 206]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.57      0.63       210
           2       0.71      0.57      0.63       210
           3       0.70      0.86      0.77       210
           4       1.00      0.00      0.01       210
           5       0.39      0.70      0.50       210
           6       0.37      0.05      0.09       210
           7       0.44      0.98      0.61       210

    accuracy                           0.53      1470
   macro avg       0.62      0.53      0.46      1470
weighted avg       0.62      0.53      0.46      1470

Random Forest with 3 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.7
Confusion Matrix of Random Forest is:
 [[138   2   0   4  42  19   5]
 [  4 150   4  16  22  12   2]
 [  0   7 185   9   3   6   0]
 [  6  13   4 111  40  22  14]
 [ 16  12   0  16 146  10  10]
 [ 10  11   1  16   8 116  48]
 [  0   1   0   0   2  24 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.66      0.72       210
           2       0.77      0.71      0.74       210
           3       0.95      0.88      0.92       210
           4       0.65      0.53      0.58       210
           5       0.56      0.70      0.62       210
           6       0.56      0.55      0.55       210
           7       0.70      0.87      0.78       210

    accuracy                           0.70      1470
   macro avg       0.71      0.70      0.70      1470
weighted avg       0.71      0.70      0.70      1470

Random Forest with 11 max_depth
Accuracy of R

Accuracy of Random Forest after Standard Scaling is: 0.7047619047619048
Confusion Matrix of Random Forest is:
 [[143   2   1   7  36  17   4]
 [  1 149   7  14  22  15   2]
 [  0   7 185   7   3   8   0]
 [  4   9   4 120  37  27   9]
 [ 18  12   0  19 142  10   9]
 [ 13  12   1  19   3 110  52]
 [  0   0   0   1   1  21 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.80      0.68      0.74       210
           2       0.78      0.71      0.74       210
           3       0.93      0.88      0.91       210
           4       0.64      0.57      0.60       210
           5       0.58      0.68      0.63       210
           6       0.53      0.52      0.53       210
           7       0.71      0.89      0.79       210

    accuracy                           0.70      1470
   macro avg       0.71      0.70      0.70      1470
weighted avg       0.71      0.70      0.70      1470

Random Forest with 19 max_dept

In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_gkb.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.35034013605442177
Confusion Matrix of Logistic Regression is:
 [[102   0  41  27   0   9  31]
 [ 25   2 108  25   0  14  36]
 [  7   0 179  11   0   4   9]
 [ 19   0 120  33   0   8  30]
 [ 30   0 124  38   0   3  15]
 [ 34   0  35  24   0  20  97]
 [ 14   1   4   4   0   8 179]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.44      0.49      0.46       210
           2       0.67      0.01      0.02       210
           3       0.29      0.85      0.44       210
           4       0.20      0.16      0.18       210
           5       0.00      0.00      0.00       210
           6       0.30      0.10      0.14       210
           7       0.45      0.85      0.59       210

    accuracy                           0.35      1470
   macro avg       0.34      0.35      0.26      1470
weighted avg       0.34      0.35      0.26      1470

KNN with 3 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after Standard Scaling is: 0.4489795918367347
Confusion Matrix of KNN Model is:
 [[129  19   5  16  23   9   9]
 [ 40 108   8  11  23  11   9]
 [ 12  14 169  11   2   1   1]
 [ 43  45  13  51  37   7  14]
 [ 60  43   8  25  61   7   6]
 [ 39  32  16  17  20  44  42]
 [ 22  19   4  13  15  39  98]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.37      0.61      0.46       210
           2       0.39      0.51      0.44       210
           3       0.76      0.80      0.78       210
           4       0.35      0.24      0.29       210
           5       0.34      0.29      0.31       210
           6       0.37      0.21      0.27       210
           7       0.55      0.47      0.50       210

    accuracy                           0.45      1470
   macro avg       0.45      0.45      0.44      1470
weighted avg       0.45      0.45      0.44      1470

KNN with 4 Neighbors
Accuracy of KNN Model

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.3047619047619048
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 44   0  32  64   0  13  57]
 [ 16   0  97  41   0   3  53]
 [  5   0 166  17   0   9  13]
 [ 10   0 111  50   0   5  34]
 [ 18   0 122  48   0   1  21]
 [ 16   0  31  38   0  11 114]
 [  6   0   4   9   0  14 177]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.38      0.21      0.27       210
           2       0.00      0.00      0.00       210
           3       0.29      0.79      0.43       210
           4       0.19      0.24      0.21       210
           5       0.00      0.00      0.00       210
           6       0.20      0.05      0.08       210
           7       0.38      0.84      0.52       210

    accuracy                           0.30      1470
   macro avg       0.21      0.30      0.22      1470
weighted avg       0.21      0.30      0.22      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after Standard Scaling is: 0.33197278911564626
Confusion Matrix of SVM is:
 [[ 99   0  64   8   0   0  39]
 [ 27   3 124   7   0   0  49]
 [  7   0 190   0   0   1  12]
 [ 18   0 150   3   0   1  38]
 [ 31   0 153   9   0   0  17]
 [ 29   0  55   7   0   5 114]
 [  9   0   7   4   0   2 188]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.45      0.47      0.46       210
           2       1.00      0.01      0.03       210
           3       0.26      0.90      0.40       210
           4       0.08      0.01      0.02       210
           5       0.00      0.00      0.00       210
           6       0.56      0.02      0.05       210
           7       0.41      0.90      0.56       210

    accuracy                           0.33      1470
   macro avg       0.39      0.33      0.22      1470
weighted avg       0.39      0.33      0.22      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after Standard Scaling is: 0.34625850340136055
Confusion Matrix of SVM is:
 [[113   0  51  21   0   5  20]
 [ 24   5 117  15   0  15  34]
 [  9   1 189   1   0   4   6]
 [ 18   1 141  14   0   6  30]
 [ 31   1 145  17   0   2  14]
 [ 31   1  50  14   0  17  97]
 [ 21   0   6   5   0   7 171]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.46      0.54      0.49       210
           2       0.56      0.02      0.05       210
           3       0.27      0.90      0.42       210
           4       0.16      0.07      0.09       210
           5       0.00      0.00      0.00       210
           6       0.30      0.08      0.13       210
           7       0.46      0.81      0.59       210

    accuracy                           0.35      1470
   macro avg       0.32      0.35      0.25      1470
weighted avg       0.32      0.35      0.25      1470

Working on SVM Kernal: rbf


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after Standard Scaling is: 0.33945578231292517
Confusion Matrix of SVM is:
 [[113   0  53  19   0   3  22]
 [ 28   3 117  14   0   4  44]
 [ 10   0 189   1   0   3   7]
 [ 19   1 143  11   0   4  32]
 [ 32   1 148  14   0   1  14]
 [ 34   1  51  12   0  14  98]
 [ 23   0   6   5   0   7 169]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.44      0.54      0.48       210
           2       0.50      0.01      0.03       210
           3       0.27      0.90      0.41       210
           4       0.14      0.05      0.08       210
           5       0.00      0.00      0.00       210
           6       0.39      0.07      0.11       210
           7       0.44      0.80      0.57       210

    accuracy                           0.34      1470
   macro avg       0.31      0.34      0.24      1470
weighted avg       0.31      0.34      0.24      1470

Working on SVM Kernal: sigmoid


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after Standard Scaling is: 0.29183673469387755
Confusion Matrix of SVM is:
 [[ 62   0  70  13   6   0  59]
 [ 21   0 134   3   2   6  44]
 [  5   0 190   2   0   1  12]
 [ 15   0 153   4   0   5  33]
 [ 25   0 160   6   1   0  18]
 [ 25   0  60   3   2  11 109]
 [ 12   0  14   1   3  19 161]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.38      0.30      0.33       210
           2       0.00      0.00      0.00       210
           3       0.24      0.90      0.38       210
           4       0.12      0.02      0.03       210
           5       0.07      0.00      0.01       210
           6       0.26      0.05      0.09       210
           7       0.37      0.77      0.50       210

    accuracy                           0.29      1470
   macro avg       0.21      0.29      0.19      1470
weighted avg       0.21      0.29      0.19      1470

Decision Tree with 1 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.22517006802721087
Confusion Matrix of Decision Tree is:
 [[  0   0   3   0   0   0 207]
 [  0   0  17   0   0   0 193]
 [  0   0 121   0   0   0  89]
 [  0   0   4   0   0   0 206]
 [  0   0   6   0   0   0 204]
 [  0   0   8   0   0   0 202]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.76      0.58      0.66       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.16      1.00      0.28       210

    accuracy                           0.23      1470
   macro avg       0.13      0.23      0.13      1470
weighted avg       0.13      0.23      0.13      1470

Decision Tree with 2 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3197278911564626
Confusion Matrix of Decision Tree is:
 [[  0   2   1 172   0   0  35]
 [  0   8   9 148   0   0  45]
 [  0  13 108  67   0   0  22]
 [  0   1   3 170   0   0  36]
 [  0   4   2 187   0   0  17]
 [  0   3   5  93   0   0 109]
 [  0   0   0  26   0   0 184]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.26      0.04      0.07       210
           3       0.84      0.51      0.64       210
           4       0.20      0.81      0.32       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.41      0.88      0.56       210

    accuracy                           0.32      1470
   macro avg       0.24      0.32      0.23      1470
weighted avg       0.24      0.32      0.23      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3904761904761905
Confusion Matrix of Decision Tree is:
 [[ 91   0   3  81   0   0  35]
 [ 24  11   9 124   0   0  42]
 [  4   3 132  63   0   0   8]
 [ 12   1   4 158   0   0  35]
 [ 21   2   5 166   0   0  16]
 [ 30   0  13  63   0   0 104]
 [ 12   0   2  14   0   0 182]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.43      0.45       210
           2       0.65      0.05      0.10       210
           3       0.79      0.63      0.70       210
           4       0.24      0.75      0.36       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.43      0.87      0.58       210

    accuracy                           0.39      1470
   macro avg       0.37      0.39      0.31      1470
weighted avg       0.37      0.39      0.31      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.41904761904761906
Confusion Matrix of Decision Tree is:
 [[ 97   7   1  76   0   8  21]
 [ 15  54   5  82   0  13  41]
 [  4  36 125  35   0   2   8]
 [  9  20   3 139   0   4  35]
 [ 18  15   3 155   0   3  16]
 [ 16   5   5  61   0  20 103]
 [  7   2   0  12   0   8 181]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.46      0.52       210
           2       0.39      0.26      0.31       210
           3       0.88      0.60      0.71       210
           4       0.25      0.66      0.36       210
           5       0.00      0.00      0.00       210
           6       0.34      0.10      0.15       210
           7       0.45      0.86      0.59       210

    accuracy                           0.42      1470
   macro avg       0.41      0.42      0.38      1470
weighted avg       0.41      0.42      0.38      1470

Decision Tree with 5 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.43537414965986393
Confusion Matrix of Decision Tree is:
 [[ 97   3   1  52  28  17  12]
 [ 15  45   5  42  52  37  14]
 [  4  20 125  32  19   5   5]
 [  9  13   3  67  79  17  22]
 [ 18   5   2  52 114   8  11]
 [ 16   5   5  38  23  51  72]
 [  7   2   0   6  10  44 141]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.46      0.52       210
           2       0.48      0.21      0.30       210
           3       0.89      0.60      0.71       210
           4       0.23      0.32      0.27       210
           5       0.35      0.54      0.43       210
           6       0.28      0.24      0.26       210
           7       0.51      0.67      0.58       210

    accuracy                           0.44      1470
   macro avg       0.48      0.44      0.44      1470
weighted avg       0.48      0.44      0.44      1470

Decision Tree with 6 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.46870748299319726
Confusion Matrix of Decision Tree is:
 [[129  13   3  19  19  14  13]
 [ 19  84  12  33  28  24  10]
 [  5  17 164  13   3   5   3]
 [ 29  31   9  68  32  18  23]
 [ 49  23   5  40  64  18  11]
 [ 30  19  13  20  16  43  69]
 [ 14   8   1   8  11  31 137]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.61      0.53       210
           2       0.43      0.40      0.41       210
           3       0.79      0.78      0.79       210
           4       0.34      0.32      0.33       210
           5       0.37      0.30      0.33       210
           6       0.28      0.20      0.24       210
           7       0.52      0.65      0.58       210

    accuracy                           0.47      1470
   macro avg       0.46      0.47      0.46      1470
weighted avg       0.46      0.47      0.46      1470

Decision Tree with 14 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.3510204081632653
Confusion Matrix of Random Forest is:
 [[ 32   0   8 129   0   0  41]
 [  5   0  34 118   0   0  53]
 [  0   0 142  55   0   0  13]
 [  1   0  17 154   0   0  38]
 [  0   0  16 172   0   0  22]
 [  8   0  14  67   0   0 121]
 [  4   0   0  18   0   0 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.64      0.15      0.25       210
           2       0.00      0.00      0.00       210
           3       0.61      0.68      0.64       210
           4       0.22      0.73      0.33       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.39      0.90      0.55       210

    accuracy                           0.35      1470
   macro avg       0.27      0.35      0.25      1470
weighted avg       0.27      0.35      0.25      1470

Random Forest with 2 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.37142857142857144
Confusion Matrix of Random Forest is:
 [[ 63   0   4 116   0   2  25]
 [  9   0  36 109   5   1  50]
 [  1   0 150  46   0   0  13]
 [  4   0  21 141   6   0  38]
 [  9   0  11 170   1   0  19]
 [ 12   0  13  64   3   5 113]
 [  6   0   1  16   0   1 186]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.30      0.40       210
           2       0.00      0.00      0.00       210
           3       0.64      0.71      0.67       210
           4       0.21      0.67      0.32       210
           5       0.07      0.00      0.01       210
           6       0.56      0.02      0.05       210
           7       0.42      0.89      0.57       210

    accuracy                           0.37      1470
   macro avg       0.36      0.37      0.29      1470
weighted avg       0.36      0.37      0.29      1470

Random Forest with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.44421768707482995
Confusion Matrix of Random Forest is:
 [[132   2   2  35  14   6  19]
 [ 42  26  17  42  33  18  32]
 [  8   8 142  33   6   3  10]
 [ 30   8   7  83  45   4  33]
 [ 43   4   4  67  75   2  15]
 [ 47   2  11  25  12  14  99]
 [ 18   0   0   4   2   5 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.41      0.63      0.50       210
           2       0.52      0.12      0.20       210
           3       0.78      0.68      0.72       210
           4       0.29      0.40      0.33       210
           5       0.40      0.36      0.38       210
           6       0.27      0.07      0.11       210
           7       0.47      0.86      0.60       210

    accuracy                           0.44      1470
   macro avg       0.45      0.44      0.41      1470
weighted avg       0.45      0.44      0.41      1470

Random Forest with 4 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5517006802721088
Confusion Matrix of Random Forest is:
 [[138   4   1  17  27   8  15]
 [ 13  93   6  39  26  15  18]
 [  2  13 170  15   3   3   4]
 [ 18  13   8  83  52  10  26]
 [ 36   2   3  40 108   9  12]
 [ 20  18   7  25  18  57  65]
 [ 10   4   0   3   8  23 162]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.66      0.62       210
           2       0.63      0.44      0.52       210
           3       0.87      0.81      0.84       210
           4       0.37      0.40      0.38       210
           5       0.45      0.51      0.48       210
           6       0.46      0.27      0.34       210
           7       0.54      0.77      0.63       210

    accuracy                           0.55      1470
   macro avg       0.56      0.55      0.54      1470
weighted avg       0.56      0.55      0.54      1470

Random Forest with 12 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5530612244897959
Confusion Matrix of Random Forest is:
 [[130   4   0  17  31  11  17]
 [ 14 103   5  39  20  13  16]
 [  0  12 176  13   4   3   2]
 [ 15  20   8  77  53  16  21]
 [ 30  11   3  46 103   7  10]
 [ 18  20   7  26  12  69  58]
 [  4   3   0   8   9  31 155]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.62      0.62       210
           2       0.60      0.49      0.54       210
           3       0.88      0.84      0.86       210
           4       0.34      0.37      0.35       210
           5       0.44      0.49      0.47       210
           6       0.46      0.33      0.38       210
           7       0.56      0.74      0.63       210

    accuracy                           0.55      1470
   macro avg       0.56      0.55      0.55      1470
weighted avg       0.56      0.55      0.55      1470

Random Forest with 20 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_ndisbert.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.680952380952381
Confusion Matrix of Logistic Regression is:
 [[141   4   4   8  31  17   5]
 [  3 124  20  17  26  15   5]
 [  1   4 185  10   1   9   0]
 [  7   4  12 139  27  18   3]
 [ 26  11   3  31 131   2   6]
 [ 18  10   9  25   9  95  44]
 [  0   5   0   1   1  17 186]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.72      0.67      0.69       210
           2       0.77      0.59      0.67       210
           3       0.79      0.88      0.84       210
           4       0.60      0.66      0.63       210
           5       0.58      0.62      0.60       210
           6       0.55      0.45      0.50       210
           7       0.75      0.89      0.81       210

    accuracy                           0.68      1470
   macro avg       0.68      0.68      0.68      1470
weighted avg       0.68      0.68      0.68      1470

KNN with 3 Ne

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5306122448979592
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 88  12  24  11  47  23   5]
 [  2  91  16  11  54  28   8]
 [  9  15 142  16  12  15   1]
 [ 11  10  18  67  76  26   2]
 [ 12  11   7  24 141  11   4]
 [ 20  15   3  16  10  81  65]
 [  0   5   0   1   0  34 170]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.62      0.42      0.50       210
           2       0.57      0.43      0.49       210
           3       0.68      0.68      0.68       210
           4       0.46      0.32      0.38       210
           5       0.41      0.67      0.51       210
           6       0.37      0.39      0.38       210
           7       0.67      0.81      0.73       210

    accuracy                           0.53      1470
   macro avg       0.54      0.53      0.52      1470
weighted avg       0.54      0.53      0.52      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.36054421768707484
Confusion Matrix of Decision Tree is:
 [[  0   0   9   0 177  21   3]
 [  0   0   6   0 173  23   8]
 [  0   0 113   0  81  14   2]
 [  0   0   8   0 188  10   4]
 [  0   0   5   0 197   5   3]
 [  0   0   4   0  73  68  65]
 [  0   0   0   0  23  35 152]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.78      0.54      0.64       210
           4       0.00      0.00      0.00       210
           5       0.22      0.94      0.35       210
           6       0.39      0.32      0.35       210
           7       0.64      0.72      0.68       210

    accuracy                           0.36      1470
   macro avg       0.29      0.36      0.29      1470
weighted avg       0.29      0.36      0.29      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.42857142857142855
Confusion Matrix of Decision Tree is:
 [[112   0   5  69   0  21   3]
 [ 21   9   4 154   0  18   4]
 [  5   0 107  82   0  16   0]
 [ 23   0   4 169   0  13   1]
 [ 48   0   2 152   0   5   3]
 [ 23   0   3  51   0  90  43]
 [ 16   0   0   7   0  44 143]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.53      0.49       210
           2       1.00      0.04      0.08       210
           3       0.86      0.51      0.64       210
           4       0.25      0.80      0.38       210
           5       0.00      0.00      0.00       210
           6       0.43      0.43      0.43       210
           7       0.73      0.68      0.70       210

    accuracy                           0.43      1470
   macro avg       0.53      0.43      0.39      1470
weighted avg       0.53      0.43      0.39      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.45034013605442175
Confusion Matrix of Decision Tree is:
 [[101  17   3  65   0  20   4]
 [  9  74   4 101   0  16   6]
 [  3   7 107  77   0  15   1]
 [ 13  29   4 150   0  13   1]
 [ 33  26   2 141   0   5   3]
 [  9  24   2  42   0  75  58]
 [  0  17   0   6   0  32 155]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.48      0.53       210
           2       0.38      0.35      0.37       210
           3       0.88      0.51      0.64       210
           4       0.26      0.71      0.38       210
           5       0.00      0.00      0.00       210
           6       0.43      0.36      0.39       210
           7       0.68      0.74      0.71       210

    accuracy                           0.45      1470
   macro avg       0.46      0.45      0.43      1470
weighted avg       0.46      0.45      0.43      1470

Decision Tree with 5 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.47210884353741495
Confusion Matrix of Decision Tree is:
 [[115   4   6  64   0  19   2]
 [ 11  71  21  86   1  15   5]
 [  7   6 135  49   0  12   1]
 [ 24  19   4 148   0  15   0]
 [ 49  11   6 135   0   6   3]
 [ 22  11   9  39   0  83  46]
 [ 16   1   3   4   0  44 142]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.55      0.51       210
           2       0.58      0.34      0.43       210
           3       0.73      0.64      0.69       210
           4       0.28      0.70      0.40       210
           5       0.00      0.00      0.00       210
           6       0.43      0.40      0.41       210
           7       0.71      0.68      0.69       210

    accuracy                           0.47      1470
   macro avg       0.46      0.47      0.45      1470
weighted avg       0.46      0.47      0.45      1470

Decision Tree with 6 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5408163265306123
Confusion Matrix of Decision Tree is:
 [[118  22   8  11  35  10   6]
 [ 16 108  14  19  31  16   6]
 [  3  13 163  12   7   9   3]
 [ 16  23  11  89  40  30   1]
 [ 31  24   4  40  94  12   5]
 [ 14  29   7  26  19  77  38]
 [  5   9   2   6   3  39 146]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.56      0.57       210
           2       0.47      0.51      0.49       210
           3       0.78      0.78      0.78       210
           4       0.44      0.42      0.43       210
           5       0.41      0.45      0.43       210
           6       0.40      0.37      0.38       210
           7       0.71      0.70      0.70       210

    accuracy                           0.54      1470
   macro avg       0.54      0.54      0.54      1470
weighted avg       0.54      0.54      0.54      1470

Decision Tree with 14 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.2836734693877551
Confusion Matrix of Random Forest is:
 [[  1   0 177   1   2   3  26]
 [  0   5 143   0   4   0  58]
 [  0   0 194   0   0   0  16]
 [  0   0 176   3   6   1  24]
 [  0   2 190   0   5   1  12]
 [  0   5  60   1   1   3 140]
 [  0   0   4   0   0   0 206]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       1.00      0.00      0.01       210
           2       0.42      0.02      0.05       210
           3       0.21      0.92      0.34       210
           4       0.60      0.01      0.03       210
           5       0.28      0.02      0.04       210
           6       0.38      0.01      0.03       210
           7       0.43      0.98      0.60       210

    accuracy                           0.28      1470
   macro avg       0.47      0.28      0.16      1470
weighted avg       0.47      0.28      0.16      1470

Random Forest with 2 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6619047619047619
Confusion Matrix of Random Forest is:
 [[129   5   0  12  36  24   4]
 [  2 132   3  22  30  17   4]
 [  2   6 171  13   8  10   0]
 [  9  10   3 132  33  22   1]
 [ 17  15   0  40 128   6   4]
 [ 12  18   0  22   8  98  52]
 [  0   4   0   1   1  21 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.75      0.61      0.68       210
           2       0.69      0.63      0.66       210
           3       0.97      0.81      0.88       210
           4       0.55      0.63      0.58       210
           5       0.52      0.61      0.56       210
           6       0.49      0.47      0.48       210
           7       0.74      0.87      0.80       210

    accuracy                           0.66      1470
   macro avg       0.67      0.66      0.66      1470
weighted avg       0.67      0.66      0.66      1470

Random Forest with 10 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6782312925170068
Confusion Matrix of Random Forest is:
 [[134   4   3   8  38  20   3]
 [  5 141   4  20  22  14   4]
 [  4   9 176   9   3   9   0]
 [ 11   8   5 130  32  22   2]
 [ 26  16   0  38 117  10   3]
 [ 10  15   2  23   6 119  35]
 [  0   4   0   1   1  24 180]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.64      0.67       210
           2       0.72      0.67      0.69       210
           3       0.93      0.84      0.88       210
           4       0.57      0.62      0.59       210
           5       0.53      0.56      0.55       210
           6       0.55      0.57      0.56       210
           7       0.79      0.86      0.82       210

    accuracy                           0.68      1470
   macro avg       0.68      0.68      0.68      1470
weighted avg       0.68      0.68      0.68      1470

Random Forest with 18 max_dept

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_vbert.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7408163265306122
Confusion Matrix of Logistic Regression is:
 [[156   1   3   2  34  11   3]
 [  2 146   8  17  16  18   3]
 [  1   7 190   4   4   4   0]
 [  2   8  11 141  32  14   2]
 [ 16   9   2  18 152   3  10]
 [ 10  12   8  17   6 116  41]
 [  0   0   0   0   3  19 188]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.83      0.74      0.79       210
           2       0.80      0.70      0.74       210
           3       0.86      0.90      0.88       210
           4       0.71      0.67      0.69       210
           5       0.62      0.72      0.67       210
           6       0.63      0.55      0.59       210
           7       0.76      0.90      0.82       210

    accuracy                           0.74      1470
   macro avg       0.74      0.74      0.74      1470
weighted avg       0.74      0.74      0.74      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5755102040816327
Confusion Matrix of Bernoulli Naive Bayes is:
 [[109   7  16   1  44  10  23]
 [  1 124  15  17  31  13   9]
 [  4  22 168   5   7   3   1]
 [  6  14  16  83  50  16  25]
 [ 16  16   0   6 137   3  32]
 [  8  20  23  20  18  57  64]
 [  0   6   0   5   9  22 168]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.76      0.52      0.62       210
           2       0.59      0.59      0.59       210
           3       0.71      0.80      0.75       210
           4       0.61      0.40      0.48       210
           5       0.46      0.65      0.54       210
           6       0.46      0.27      0.34       210
           7       0.52      0.80      0.63       210

    accuracy                           0.58      1470
   macro avg       0.59      0.58      0.56      1470
weighted avg       0.59      0.58      0.56      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3653061224489796
Confusion Matrix of Decision Tree is:
 [[  0   0   4   3 185   0  18]
 [  0   0  16  37 135   0  22]
 [  0   0 145  30  30   0   5]
 [  0   0   9  36 140   0  25]
 [  0   0   2   3 187   0  18]
 [  0   0   8  23  85   0  94]
 [  0   0   1   2  38   0 169]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.78      0.69      0.73       210
           4       0.27      0.17      0.21       210
           5       0.23      0.89      0.37       210
           6       0.00      0.00      0.00       210
           7       0.48      0.80      0.60       210

    accuracy                           0.37      1470
   macro avg       0.25      0.37      0.27      1470
weighted avg       0.25      0.37      0.27      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4306122448979592
Confusion Matrix of Decision Tree is:
 [[ 78   6   0   1 107  14   4]
 [  4  40  10   3 131  16   6]
 [  0  31 140   4  30   5   0]
 [  9  13   6  26 131  19   6]
 [ 12   1   2   2 175  10   8]
 [  6  14   5  12  79  54  40]
 [  0   2   1   0  38  49 120]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.72      0.37      0.49       210
           2       0.37      0.19      0.25       210
           3       0.85      0.67      0.75       210
           4       0.54      0.12      0.20       210
           5       0.25      0.83      0.39       210
           6       0.32      0.26      0.29       210
           7       0.65      0.57      0.61       210

    accuracy                           0.43      1470
   macro avg       0.53      0.43      0.43      1470
weighted avg       0.53      0.43      0.43      1470

Decision Tree with 4 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5217687074829932
Confusion Matrix of Decision Tree is:
 [[114  11   1  13  45  14  12]
 [ 12 100  19  18  33  20   8]
 [  4  10 174  13   5   4   0]
 [ 19  15   8  91  43  25   9]
 [ 32  20   3  31  96  17  11]
 [ 21  28   9  31  14  66  41]
 [  5   7   1  19   6  46 126]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.54      0.55       210
           2       0.52      0.48      0.50       210
           3       0.81      0.83      0.82       210
           4       0.42      0.43      0.43       210
           5       0.40      0.46      0.42       210
           6       0.34      0.31      0.33       210
           7       0.61      0.60      0.60       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.52      1470
weighted avg       0.52      0.52      0.52      1470

Decision Tree with 12 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5006802721088436
Confusion Matrix of Decision Tree is:
 [[113  13   3  14  39  17  11]
 [ 11 100  11  16  40  25   7]
 [  2  13 170   9   5  11   0]
 [ 21  23   9  91  31  30   5]
 [ 39  25   8  32  80  16  10]
 [ 23  17  12  34  14  62  48]
 [  6   5   0  15   7  57 120]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.54      0.53       210
           2       0.51      0.48      0.49       210
           3       0.80      0.81      0.80       210
           4       0.43      0.43      0.43       210
           5       0.37      0.38      0.38       210
           6       0.28      0.30      0.29       210
           7       0.60      0.57      0.58       210

    accuracy                           0.50      1470
   macro avg       0.50      0.50      0.50      1470
weighted avg       0.50      0.50      0.50      1470

Decision Tree with 20 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.49727891156462584
Confusion Matrix of Random Forest is:
 [[ 78   8  13   0  76   0  35]
 [  1  97  24   0  60   0  28]
 [  0  15 175   0  17   0   3]
 [  2  14  19  16 106   0  53]
 [  0   9   2   0 160   0  39]
 [  4  16  35   1  22   3 129]
 [  0   1   1   1   5   0 202]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.92      0.37      0.53       210
           2       0.61      0.46      0.52       210
           3       0.65      0.83      0.73       210
           4       0.89      0.08      0.14       210
           5       0.36      0.76      0.49       210
           6       1.00      0.01      0.03       210
           7       0.41      0.96      0.58       210

    accuracy                           0.50      1470
   macro avg       0.69      0.50      0.43      1470
weighted avg       0.69      0.50      0.43      1470

Random Forest with 3 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6925170068027211
Confusion Matrix of Random Forest is:
 [[131   3   2   3  47  20   4]
 [  1 143   7  19  20  13   7]
 [  0  13 180   7   5   5   0]
 [  1  11   7 136  35  13   7]
 [ 10  15   0  21 148   7   9]
 [  5  18  11  18  12  92  54]
 [  1   1   0   0   2  18 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.88      0.62      0.73       210
           2       0.70      0.68      0.69       210
           3       0.87      0.86      0.86       210
           4       0.67      0.65      0.66       210
           5       0.55      0.70      0.62       210
           6       0.55      0.44      0.49       210
           7       0.70      0.90      0.78       210

    accuracy                           0.69      1470
   macro avg       0.70      0.69      0.69      1470
weighted avg       0.70      0.69      0.69      1470

Random Forest with 11 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.7068027210884353
Confusion Matrix of Random Forest is:
 [[138   5   2   4  36  20   5]
 [  2 150   5  16  17  16   4]
 [  1  11 185   6   3   4   0]
 [  2  11   6 131  40  15   5]
 [ 21  14   2  22 140   5   6]
 [ 11  10   8  16   5 110  50]
 [  1   1   0   1   2  20 185]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.66      0.72       210
           2       0.74      0.71      0.73       210
           3       0.89      0.88      0.89       210
           4       0.67      0.62      0.65       210
           5       0.58      0.67      0.62       210
           6       0.58      0.52      0.55       210
           7       0.73      0.88      0.80       210

    accuracy                           0.71      1470
   macro avg       0.71      0.71      0.71      1470
weighted avg       0.71      0.71      0.71      1470

Random Forest with 19 max_dept

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//gpt_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7238095238095238
Confusion Matrix of Logistic Regression is:
 [[160   3   0   2  26  13   6]
 [  2 160   7  10  13  17   1]
 [  0   4 194   7   2   3   0]
 [  1  14  10 134  24  20   7]
 [ 20  12   3  25 137   7   6]
 [ 13  19  11  24   6  95  42]
 [  0   1   0   3   2  20 184]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.82      0.76      0.79       210
           2       0.75      0.76      0.76       210
           3       0.86      0.92      0.89       210
           4       0.65      0.64      0.65       210
           5       0.65      0.65      0.65       210
           6       0.54      0.45      0.49       210
           7       0.75      0.88      0.81       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5795918367346938
Confusion Matrix of Bernoulli Naive Bayes is:
 [[112   4  13   3  30  28  20]
 [  0 121  19  12  25  23  10]
 [ 20  11 147  16   3  13   0]
 [  6  13   8  97  31  23  32]
 [ 21  10   1  16 122  13  27]
 [ 16  15   6  24   6  79  64]
 [  0   2   0   1   3  30 174]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.64      0.53      0.58       210
           2       0.69      0.58      0.63       210
           3       0.76      0.70      0.73       210
           4       0.57      0.46      0.51       210
           5       0.55      0.58      0.57       210
           6       0.38      0.38      0.38       210
           7       0.53      0.83      0.65       210

    accuracy                           0.58      1470
   macro avg       0.59      0.58      0.58      1470
weighted avg       0.59      0.58      0.58      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3380952380952381
Confusion Matrix of Decision Tree is:
 [[  0   0  21   0 150   0  39]
 [  0   0  19   0 151   0  40]
 [  0   0 162   0  36   0  12]
 [  0   0  23   0 124   0  63]
 [  0   0   4   0 172   0  34]
 [  0   0  25   0  89   0  96]
 [  0   0  15   0  32   0 163]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.60      0.77      0.68       210
           4       0.00      0.00      0.00       210
           5       0.23      0.82      0.36       210
           6       0.00      0.00      0.00       210
           7       0.36      0.78      0.50       210

    accuracy                           0.34      1470
   macro avg       0.17      0.34      0.22      1470
weighted avg       0.17      0.34      0.22      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.38639455782312926
Confusion Matrix of Decision Tree is:
 [[124  46   1   0   0   0  39]
 [ 20 144   5   1   0   0  40]
 [ 26  34 131   7   0   0  12]
 [ 31 105   5   6   0   0  63]
 [ 36 138   1   1   0   0  34]
 [ 35  73   4   2   0   0  96]
 [ 19  26   1   1   0   0 163]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.43      0.59      0.50       210
           2       0.25      0.69      0.37       210
           3       0.89      0.62      0.73       210
           4       0.33      0.03      0.05       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.36      0.78      0.50       210

    accuracy                           0.39      1470
   macro avg       0.32      0.39      0.31      1470
weighted avg       0.32      0.39      0.31      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.44081632653061226
Confusion Matrix of Decision Tree is:
 [[110   7   1  14  39  29  10]
 [  7  79   6  13  65  27  13]
 [  3   9 135  25  25  13   0]
 [ 20  12   5  15  93  51  14]
 [ 34  13   1   3 125  23  11]
 [ 16  13   5  20  60  54  42]
 [  7   0   1  13  26  33 130]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.56      0.52      0.54       210
           2       0.59      0.38      0.46       210
           3       0.88      0.64      0.74       210
           4       0.15      0.07      0.10       210
           5       0.29      0.60      0.39       210
           6       0.23      0.26      0.25       210
           7       0.59      0.62      0.60       210

    accuracy                           0.44      1470
   macro avg       0.47      0.44      0.44      1470
weighted avg       0.47      0.44      0.44      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.48707482993197276
Confusion Matrix of Decision Tree is:
 [[115  13   7  18  22  18  17]
 [ 20 107   9  20  24  21   9]
 [  8  11 154  17   7  11   2]
 [ 21  26   9  79  29  25  21]
 [ 28  29   3  27  97  16  10]
 [ 22  25  22  27  13  53  48]
 [ 12  14   8  14   6  45 111]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.51      0.55      0.53       210
           2       0.48      0.51      0.49       210
           3       0.73      0.73      0.73       210
           4       0.39      0.38      0.38       210
           5       0.49      0.46      0.48       210
           6       0.28      0.25      0.27       210
           7       0.51      0.53      0.52       210

    accuracy                           0.49      1470
   macro avg       0.48      0.49      0.48      1470
weighted avg       0.48      0.49      0.48      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.49387755102040815
Confusion Matrix of Decision Tree is:
 [[113  13   7  13  29  26   9]
 [ 16  98   9  24  31  27   5]
 [  8  13 159  13   5   9   3]
 [ 21  19  11  87  23  31  18]
 [ 31  23   3  33  96  16   8]
 [ 24  29  20  28  17  55  37]
 [ 14  10   6  11  15  36 118]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.54      0.52       210
           2       0.48      0.47      0.47       210
           3       0.74      0.76      0.75       210
           4       0.42      0.41      0.42       210
           5       0.44      0.46      0.45       210
           6       0.28      0.26      0.27       210
           7       0.60      0.56      0.58       210

    accuracy                           0.49      1470
   macro avg       0.49      0.49      0.49      1470
weighted avg       0.49      0.49      0.49      1470

Random Forest with 1 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6666666666666666
Confusion Matrix of Random Forest is:
 [[133   5   0   5  28  30   9]
 [  1 139   4  12  28  22   4]
 [  2   7 180  10   4   7   0]
 [  5  11   4 119  29  28  14]
 [ 13  13   1  23 129  16  15]
 [  7  17   8  18   7  90  63]
 [  0   1   0   1   4  14 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.83      0.63      0.72       210
           2       0.72      0.66      0.69       210
           3       0.91      0.86      0.88       210
           4       0.63      0.57      0.60       210
           5       0.56      0.61      0.59       210
           6       0.43      0.43      0.43       210
           7       0.64      0.90      0.75       210

    accuracy                           0.67      1470
   macro avg       0.68      0.67      0.67      1470
weighted avg       0.68      0.67      0.67      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6843537414965987
Confusion Matrix of Random Forest is:
 [[136   4   1   2  37  22   8]
 [  1 149   6  17  14  20   3]
 [  0   4 188   8   5   5   0]
 [  1  10   3 127  29  28  12]
 [ 22  18   0  23 128   9  10]
 [ 10  17   4  23   6  91  59]
 [  0   1   0   1   3  18 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.80      0.65      0.72       210
           2       0.73      0.71      0.72       210
           3       0.93      0.90      0.91       210
           4       0.63      0.60      0.62       210
           5       0.58      0.61      0.59       210
           6       0.47      0.43      0.45       210
           7       0.67      0.89      0.76       210

    accuracy                           0.68      1470
   macro avg       0.69      0.68      0.68      1470
weighted avg       0.69      0.68      0.68      1470

Random Forest with 17 max_dept

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//xlm_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7476190476190476
Confusion Matrix of Logistic Regression is:
 [[155   2   3   2  28  18   2]
 [  3 169   5  10  11  10   2]
 [  0   4 195   3   2   6   0]
 [  1  14   8 130  28  24   5]
 [ 22  16   1  19 142   2   8]
 [  8  10   7  23   4 121  37]
 [  0   0   0   1   1  21 187]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.82      0.74      0.78       210
           2       0.79      0.80      0.80       210
           3       0.89      0.93      0.91       210
           4       0.69      0.62      0.65       210
           5       0.66      0.68      0.67       210
           6       0.60      0.58      0.59       210
           7       0.78      0.89      0.83       210

    accuracy                           0.75      1470
   macro avg       0.75      0.75      0.75      1470
weighted avg       0.75      0.75      0.75      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.6149659863945578
Confusion Matrix of Bernoulli Naive Bayes is:
 [[135   1   1   7  38  21   7]
 [  4 137  14   7  22  21   5]
 [ 21   5 153  16   3  12   0]
 [  4  18  14  79  45  34  16]
 [ 31  14   2  11 130   6  16]
 [ 13  14  11  13   7  96  56]
 [  0   0   0   1   3  32 174]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.65      0.64      0.65       210
           2       0.72      0.65      0.69       210
           3       0.78      0.73      0.76       210
           4       0.59      0.38      0.46       210
           5       0.52      0.62      0.57       210
           6       0.43      0.46      0.44       210
           7       0.64      0.83      0.72       210

    accuracy                           0.61      1470
   macro avg       0.62      0.61      0.61      1470
weighted avg       0.62      0.61      0.61      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3435374149659864
Confusion Matrix of Decision Tree is:
 [[  0   0   2   0 180   8  20]
 [  0   0   6   0 176   6  22]
 [  0   0 120   0  67  15   8]
 [  0   0   8   0 171   3  28]
 [  0   0   3   0 192   2  13]
 [  0   0   3   0  75  19 113]
 [  0   0   0   0  32   4 174]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.85      0.57      0.68       210
           4       0.00      0.00      0.00       210
           5       0.22      0.91      0.35       210
           6       0.33      0.09      0.14       210
           7       0.46      0.83      0.59       210

    accuracy                           0.34      1470
   macro avg       0.26      0.34      0.25      1470
weighted avg       0.26      0.34      0.25      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.45102040816326533
Confusion Matrix of Decision Tree is:
 [[162  19   8   0   0   9  12]
 [ 17 161   9   0   0  15   8]
 [  4  65 130   0   0  11   0]
 [ 55 117  10   0   0  13  15]
 [127  67   3   0   0   2  11]
 [ 42  33  15   0   0  46  74]
 [ 25   7   3   0   0  11 164]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.77      0.50       210
           2       0.34      0.77      0.47       210
           3       0.73      0.62      0.67       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.43      0.22      0.29       210
           7       0.58      0.78      0.66       210

    accuracy                           0.45      1470
   macro avg       0.35      0.45      0.37      1470
weighted avg       0.35      0.45      0.37      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.5115646258503401
Confusion Matrix of Decision Tree is:
 [[ 96   5   2  13  67  15  12]
 [  1 127   3  35  16  20   8]
 [  0  19 124  51   5  11   0]
 [  1  47   7  70  55  15  15]
 [ 13  34   0  32 116   4  11]
 [  2  19   4  14  42  55  74]
 [  0   5   0   2  25  14 164]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.85      0.46      0.59       210
           2       0.50      0.60      0.55       210
           3       0.89      0.59      0.71       210
           4       0.32      0.33      0.33       210
           5       0.36      0.55      0.43       210
           6       0.41      0.26      0.32       210
           7       0.58      0.78      0.66       210

    accuracy                           0.51      1470
   macro avg       0.56      0.51      0.51      1470
weighted avg       0.56      0.51      0.51      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5693877551020409
Confusion Matrix of Decision Tree is:
 [[116   5   2  14  49  19   5]
 [  5 132   9  29  19  12   4]
 [  6   9 175  11   4   5   0]
 [ 10  18  10  98  35  26  13]
 [ 30  29   1  25 106  11   8]
 [ 20  21   8  30  18  68  45]
 [  7   2   3  10   4  42 142]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.55      0.57       210
           2       0.61      0.63      0.62       210
           3       0.84      0.83      0.84       210
           4       0.45      0.47      0.46       210
           5       0.45      0.50      0.48       210
           6       0.37      0.32      0.35       210
           7       0.65      0.68      0.67       210

    accuracy                           0.57      1470
   macro avg       0.57      0.57      0.57      1470
weighted avg       0.57      0.57      0.57      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5510204081632653
Confusion Matrix of Decision Tree is:
 [[122   6   6   7  44  15  10]
 [  4 119  12  33  21  17   4]
 [  3   9 175  14   4   3   2]
 [  8  14  14  97  34  29  14]
 [ 45  22   5  32  87  13   6]
 [ 19  22  13  27  18  66  45]
 [  3   4   2  12   1  44 144]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.60      0.58      0.59       210
           2       0.61      0.57      0.59       210
           3       0.77      0.83      0.80       210
           4       0.44      0.46      0.45       210
           5       0.42      0.41      0.42       210
           6       0.35      0.31      0.33       210
           7       0.64      0.69      0.66       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Random Forest with 1 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.7040816326530612
Confusion Matrix of Random Forest is:
 [[134   2   1   7  39  18   9]
 [  2 164   1  10  13  18   2]
 [  0   5 181  13   5   6   0]
 [  1  11   4 115  36  33  10]
 [  9  20   0  13 151   8   9]
 [  6   7   1  29   5 109  53]
 [  0   0   0   1   2  26 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.88      0.64      0.74       210
           2       0.78      0.78      0.78       210
           3       0.96      0.86      0.91       210
           4       0.61      0.55      0.58       210
           5       0.60      0.72      0.66       210
           6       0.50      0.52      0.51       210
           7       0.69      0.86      0.76       210

    accuracy                           0.70      1470
   macro avg       0.72      0.70      0.71      1470
weighted avg       0.72      0.70      0.71      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.7210884353741497
Confusion Matrix of Random Forest is:
 [[142   2   1   5  35  20   5]
 [  3 165   1  12  11  16   2]
 [  0   5 185  12   3   5   0]
 [  2   9   5 126  34  26   8]
 [ 17  15   0  21 143   3  11]
 [  4  12   2  26   4 116  46]
 [  1   0   0   1   1  24 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.84      0.68      0.75       210
           2       0.79      0.79      0.79       210
           3       0.95      0.88      0.92       210
           4       0.62      0.60      0.61       210
           5       0.62      0.68      0.65       210
           6       0.55      0.55      0.55       210
           7       0.72      0.87      0.79       210

    accuracy                           0.72      1470
   macro avg       0.73      0.72      0.72      1470
weighted avg       0.73      0.72      0.72      1470

Random Forest with 17 max_dept

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//bert_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6469387755102041
Confusion Matrix of Logistic Regression is:
 [[138   4   3   7  39   8  11]
 [  7 132  12  12  20  20   7]
 [  5  22 170   4   1   6   2]
 [  5  18   7 107  27  24  22]
 [ 22  18   2   9 136  10  13]
 [ 17  17   5  16   5  86  64]
 [  1   1   0   1   4  21 182]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.71      0.66      0.68       210
           2       0.62      0.63      0.63       210
           3       0.85      0.81      0.83       210
           4       0.69      0.51      0.58       210
           5       0.59      0.65      0.62       210
           6       0.49      0.41      0.45       210
           7       0.60      0.87      0.71       210

    accuracy                           0.65      1470
   macro avg       0.65      0.65      0.64      1470
weighted avg       0.65      0.65      0.64      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.46258503401360546
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 71  17  35  11  36   4  36]
 [  2 118  22  12  24  17  15]
 [  2  68 125   4   5   5   1]
 [  2  33   9  60  49   8  49]
 [  9  22   3  10 123   6  37]
 [ 14  32  19  21  20  33  71]
 [ 12   9   2  15  11  11 150]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.63      0.34      0.44       210
           2       0.39      0.56      0.46       210
           3       0.58      0.60      0.59       210
           4       0.45      0.29      0.35       210
           5       0.46      0.59      0.51       210
           6       0.39      0.16      0.22       210
           7       0.42      0.71      0.53       210

    accuracy                           0.46      1470
   macro avg       0.48      0.46      0.44      1470
weighted avg       0.48      0.46      0.44      1470

Worki

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.29591836734693877
Confusion Matrix of Decision Tree is:
 [[ 14 166   1   0   0   0  29]
 [  6 175   2   0   0   0  27]
 [  1 114  91   0   0   0   4]
 [  1 147   0   0   0   0  62]
 [  0 156   0   0   0   0  54]
 [  3 132   1   0   0   0  74]
 [  0  55   0   0   0   0 155]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.56      0.07      0.12       210
           2       0.19      0.83      0.30       210
           3       0.96      0.43      0.60       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.38      0.74      0.50       210

    accuracy                           0.30      1470
   macro avg       0.30      0.30      0.22      1470
weighted avg       0.30      0.30      0.22      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.32653061224489793
Confusion Matrix of Decision Tree is:
 [[ 13  64   0   0 103   1  29]
 [  1  84   2   0  91   5  27]
 [  0  61  91   0  53   1   4]
 [  0  40   0   0 107   1  62]
 [  0  22   0   0 134   0  54]
 [  0  48   0   0  85   3  74]
 [  0  18   0   0  37   0 155]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.93      0.06      0.12       210
           2       0.25      0.40      0.31       210
           3       0.98      0.43      0.60       210
           4       0.00      0.00      0.00       210
           5       0.22      0.64      0.33       210
           6       0.27      0.01      0.03       210
           7       0.38      0.74      0.50       210

    accuracy                           0.33      1470
   macro avg       0.43      0.33      0.27      1470
weighted avg       0.43      0.33      0.27      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.35306122448979593
Confusion Matrix of Decision Tree is:
 [[ 60  16   0  59  47   2  26]
 [ 13  74   2  64  46   4   7]
 [ 19  42  90  23  32   2   2]
 [ 13  28   0  67  66   6  30]
 [  7  15   0  60  96   3  29]
 [  9  40   0  74  29   5  53]
 [  6  12   0  45  10  10 127]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.29      0.36       210
           2       0.33      0.35      0.34       210
           3       0.98      0.43      0.60       210
           4       0.17      0.32      0.22       210
           5       0.29      0.46      0.36       210
           6       0.16      0.02      0.04       210
           7       0.46      0.60      0.52       210

    accuracy                           0.35      1470
   macro avg       0.41      0.35      0.35      1470
weighted avg       0.41      0.35      0.35      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.41904761904761906
Confusion Matrix of Decision Tree is:
 [[ 93  17  11  18  32  26  13]
 [ 27  76  17  27  27  29   7]
 [  8  20 149  12  11   8   2]
 [ 22  27   8  68  31  32  22]
 [ 40  22   8  43  69  15  13]
 [ 30  18  14  30  30  55  33]
 [ 15   9   1  27  12  40 106]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.44      0.42       210
           2       0.40      0.36      0.38       210
           3       0.72      0.71      0.71       210
           4       0.30      0.32      0.31       210
           5       0.33      0.33      0.33       210
           6       0.27      0.26      0.27       210
           7       0.54      0.50      0.52       210

    accuracy                           0.42      1470
   macro avg       0.42      0.42      0.42      1470
weighted avg       0.42      0.42      0.42      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.41564625850340137
Confusion Matrix of Decision Tree is:
 [[ 95  17  12  15  29  26  16]
 [ 31  82  12  19  28  25  13]
 [  7  15 153  15  11   5   4]
 [ 21  21  11  69  33  34  21]
 [ 37  22  12  39  62  25  13]
 [ 32  24   9  33  25  48  39]
 [ 16  14   4  26  15  33 102]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.45      0.42       210
           2       0.42      0.39      0.40       210
           3       0.72      0.73      0.72       210
           4       0.32      0.33      0.32       210
           5       0.31      0.30      0.30       210
           6       0.24      0.23      0.24       210
           7       0.49      0.49      0.49       210

    accuracy                           0.42      1470
   macro avg       0.41      0.42      0.41      1470
weighted avg       0.41      0.42      0.41      1470

Random Forest with 1 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.41836734693877553
Confusion Matrix of Random Forest is:
 [[ 59  45  15   0  39   0  52]
 [  5 109  27   1  38   0  30]
 [  1  55 149   1   1   0   3]
 [  9  36  13   7  63   0  82]
 [  4  27   6   0 117   0  56]
 [  8  41  20   2  32   1 106]
 [  2  15   3   0  14   3 173]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.67      0.28      0.40       210
           2       0.33      0.52      0.41       210
           3       0.64      0.71      0.67       210
           4       0.64      0.03      0.06       210
           5       0.38      0.56      0.46       210
           6       0.25      0.00      0.01       210
           7       0.34      0.82      0.49       210

    accuracy                           0.42      1470
   macro avg       0.47      0.42      0.36      1470
weighted avg       0.47      0.42      0.36      1470

Random Forest with 3 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5748299319727891
Confusion Matrix of Random Forest is:
 [[112  10   4  11  43  12  18]
 [  5 132  11  11  24  19   8]
 [  2  39 156   4   2   6   1]
 [  2  21   7  79  47  22  32]
 [ 10  20   1   7 142   8  22]
 [ 12  31   5  24  11  52  75]
 [  2   6   1   5   6  18 172]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.77      0.53      0.63       210
           2       0.51      0.63      0.56       210
           3       0.84      0.74      0.79       210
           4       0.56      0.38      0.45       210
           5       0.52      0.68      0.59       210
           6       0.38      0.25      0.30       210
           7       0.52      0.82      0.64       210

    accuracy                           0.57      1470
   macro avg       0.59      0.57      0.57      1470
weighted avg       0.59      0.57      0.57      1470

Random Forest with 11 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5741496598639456
Confusion Matrix of Random Forest is:
 [[121   6   7  12  37  12  15]
 [  5 119   9  31  19  19   8]
 [  5  42 156   3   0   3   1]
 [  5  25   7  72  51  23  27]
 [ 20  16   1  15 130   7  21]
 [ 17  23   6  22  17  74  51]
 [  1   2   0   1   9  25 172]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.70      0.58      0.63       210
           2       0.51      0.57      0.54       210
           3       0.84      0.74      0.79       210
           4       0.46      0.34      0.39       210
           5       0.49      0.62      0.55       210
           6       0.45      0.35      0.40       210
           7       0.58      0.82      0.68       210

    accuracy                           0.57      1470
   macro avg       0.58      0.57      0.57      1470
weighted avg       0.58      0.57      0.57      1470

Random Forest with 19 max_dept

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6421768707482993
Confusion Matrix of Logistic Regression is:
 [[111   5   9  17  41  22   5]
 [  7 142  10  14  22  11   4]
 [  4  15 174   8   2   7   0]
 [ 14  15   4 112  48  11   6]
 [ 19  10   1  18 150   4   8]
 [ 14  24  13  25   9  74  51]
 [  4   1   1   3   3  17 181]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.64      0.53      0.58       210
           2       0.67      0.68      0.67       210
           3       0.82      0.83      0.82       210
           4       0.57      0.53      0.55       210
           5       0.55      0.71      0.62       210
           6       0.51      0.35      0.42       210
           7       0.71      0.86      0.78       210

    accuracy                           0.64      1470
   macro avg       0.64      0.64      0.63      1470
weighted avg       0.64      0.64      0.63      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.4448979591836735
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 68  13   2  11  68  36  12]
 [  5  93   2   6  54  39  11]
 [ 16   8  64  15  74  33   0]
 [  9  11   1  36 116  27  10]
 [  6  20   0   5 155  16   8]
 [ 12  21   3  15  21  72  66]
 [  2   8   0   1   2  31 166]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.58      0.32      0.41       210
           2       0.53      0.44      0.48       210
           3       0.89      0.30      0.45       210
           4       0.40      0.17      0.24       210
           5       0.32      0.74      0.44       210
           6       0.28      0.34      0.31       210
           7       0.61      0.79      0.69       210

    accuracy                           0.44      1470
   macro avg       0.52      0.44      0.43      1470
weighted avg       0.52      0.44      0.43      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3258503401360544
Confusion Matrix of Decision Tree is:
 [[  0   4   4   0 174   0  28]
 [  0   9   4   0 178   0  19]
 [  0   7 108   0  93   0   2]
 [  0   0   4   0 182   0  24]
 [  0   0   0   0 189   0  21]
 [  0   6   6   0 111   0  87]
 [  0   0   0   0  37   0 173]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.35      0.04      0.08       210
           3       0.86      0.51      0.64       210
           4       0.00      0.00      0.00       210
           5       0.20      0.90      0.32       210
           6       0.00      0.00      0.00       210
           7       0.49      0.82      0.61       210

    accuracy                           0.33      1470
   macro avg       0.27      0.33      0.24      1470
weighted avg       0.27      0.33      0.24      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.38503401360544215
Confusion Matrix of Decision Tree is:
 [[  4  32   4   0 142  20   8]
 [  1 114   4   0  72  12   7]
 [  4  45 108   0  51   0   2]
 [  0  18   4   0 164  18   6]
 [  0  27   0   0 162  15   6]
 [  4  44   6   0  69  38  49]
 [  0  20   0   0  17  33 140]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.31      0.02      0.04       210
           2       0.38      0.54      0.45       210
           3       0.86      0.51      0.64       210
           4       0.00      0.00      0.00       210
           5       0.24      0.77      0.37       210
           6       0.28      0.18      0.22       210
           7       0.64      0.67      0.65       210

    accuracy                           0.39      1470
   macro avg       0.39      0.39      0.34      1470
weighted avg       0.39      0.39      0.34      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.42585034013605444
Confusion Matrix of Decision Tree is:
 [[ 66  16   3  10  78  29   8]
 [  9  92   4   2  63  33   7]
 [ 10  30 110   4  41  13   2]
 [ 14  11   2   8 150  19   6]
 [  9  19   0   7 153  16   6]
 [ 25  20   7   8  44  57  49]
 [ 13   6   0   5   4  42 140]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.31      0.37       210
           2       0.47      0.44      0.46       210
           3       0.87      0.52      0.65       210
           4       0.18      0.04      0.06       210
           5       0.29      0.73      0.41       210
           6       0.27      0.27      0.27       210
           7       0.64      0.67      0.65       210

    accuracy                           0.43      1470
   macro avg       0.45      0.43      0.41      1470
weighted avg       0.45      0.43      0.41      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.48435374149659866
Confusion Matrix of Decision Tree is:
 [[ 92  18  11  19  45  19   6]
 [ 18 105  12  24  21  21   9]
 [  7  15 151  15   8  10   4]
 [ 21  17  12  77  57  16  10]
 [ 24  24   6  29 104  14   9]
 [ 29  27  10  27  13  59  45]
 [  9  12   0   6   6  53 124]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.44      0.45       210
           2       0.48      0.50      0.49       210
           3       0.75      0.72      0.73       210
           4       0.39      0.37      0.38       210
           5       0.41      0.50      0.45       210
           6       0.31      0.28      0.29       210
           7       0.60      0.59      0.59       210

    accuracy                           0.48      1470
   macro avg       0.49      0.48      0.48      1470
weighted avg       0.49      0.48      0.48      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.46802721088435373
Confusion Matrix of Decision Tree is:
 [[101  16   9  15  38  22   9]
 [ 26 103  11  15  18  29   8]
 [ 12  10 155   9  10  10   4]
 [ 33  20  15  66  38  24  14]
 [ 42  21   7  33  79  21   7]
 [ 28  24  14  22  12  63  47]
 [ 13  11   3   6   8  48 121]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.48      0.43       210
           2       0.50      0.49      0.50       210
           3       0.72      0.74      0.73       210
           4       0.40      0.31      0.35       210
           5       0.39      0.38      0.38       210
           6       0.29      0.30      0.30       210
           7       0.58      0.58      0.58       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.47      1470
weighted avg       0.47      0.47      0.47      1470

Random Forest with 1 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5972789115646259
Confusion Matrix of Random Forest is:
 [[ 94  15   7  24  37  22  11]
 [  5 140   8   9  23  17   8]
 [ 10  11 160   9   4  16   0]
 [ 10  16   2  99  56  17  10]
 [  8  20   0  17 138  14  13]
 [ 12  21   7  15  15  68  72]
 [  1   5   1   0   3  21 179]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.67      0.45      0.54       210
           2       0.61      0.67      0.64       210
           3       0.86      0.76      0.81       210
           4       0.57      0.47      0.52       210
           5       0.50      0.66      0.57       210
           6       0.39      0.32      0.35       210
           7       0.61      0.85      0.71       210

    accuracy                           0.60      1470
   macro avg       0.60      0.60      0.59      1470
weighted avg       0.60      0.60      0.59      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6190476190476191
Confusion Matrix of Random Forest is:
 [[107  14   4  17  40  17  11]
 [  4 133   7  12  22  26   6]
 [  4  10 170   9   5  12   0]
 [ 10  12   2 111  49  17   9]
 [ 13  15   2  24 135  10  11]
 [ 14  22   9  17  10  70  68]
 [  4   2   0   1   2  17 184]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.51      0.58       210
           2       0.64      0.63      0.64       210
           3       0.88      0.81      0.84       210
           4       0.58      0.53      0.55       210
           5       0.51      0.64      0.57       210
           6       0.41      0.33      0.37       210
           7       0.64      0.88      0.74       210

    accuracy                           0.62      1470
   macro avg       0.62      0.62      0.61      1470
weighted avg       0.62      0.62      0.61      1470

Random Forest with 17 max_dept

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.37142857142857144
Confusion Matrix of Logistic Regression is:
 [[ 89   1  30  12  31  10  37]
 [ 18  51  30  22  21   7  61]
 [ 25  20 114  25   9   4  13]
 [ 25  10   9  44  52   8  62]
 [ 26   6   3  28  80   2  65]
 [ 27   9  12  30  27   6  99]
 [  1   2   0  22  20   3 162]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.42      0.42      0.42       210
           2       0.52      0.24      0.33       210
           3       0.58      0.54      0.56       210
           4       0.24      0.21      0.22       210
           5       0.33      0.38      0.36       210
           6       0.15      0.03      0.05       210
           7       0.32      0.77      0.46       210

    accuracy                           0.37      1470
   macro avg       0.37      0.37      0.34      1470
weighted avg       0.37      0.37      0.34      1470

KNN with 3 

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5285714285714286
Confusion Matrix of Bernoulli Naive Bayes is:
 [[113   7  16   5  29   8  32]
 [  1 135  15  12  18  15  14]
 [  3  23 162  15   3   3   1]
 [  4  20  11  81  33  15  46]
 [ 21  26   1  10 107   6  39]
 [ 11  42   9  22  16  32  78]
 [  2  20   0  12   6  23 147]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.73      0.54      0.62       210
           2       0.49      0.64      0.56       210
           3       0.76      0.77      0.76       210
           4       0.52      0.39      0.44       210
           5       0.50      0.51      0.51       210
           6       0.31      0.15      0.21       210
           7       0.41      0.70      0.52       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.52      1470
weighted avg       0.53      0.53      0.52      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.23469387755102042
Confusion Matrix of Decision Tree is:
 [[  0   0  22   0   0   0 188]
 [  0   0  10   0   0   0 200]
 [  0   0 135   0   0   0  75]
 [  0   0   6   0   0   0 204]
 [  0   0   2   0   0   0 208]
 [  0   0   5   0   0   0 205]
 [  0   0   0   0   0   0 210]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.75      0.64      0.69       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.16      1.00      0.28       210

    accuracy                           0.23      1470
   macro avg       0.13      0.23      0.14      1470
weighted avg       0.13      0.23      0.14      1470

Decision Tree with 2 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.319047619047619
Confusion Matrix of Decision Tree is:
 [[ 19 170   3   0   0   0  18]
 [  4 185   6   0   0   0  15]
 [  1  70 134   0   0   0   5]
 [  1 156   5   0   0   0  48]
 [  2 178   0   0   0   0  30]
 [  2 135   3   0   0   0  70]
 [  0  79   0   0   0   0 131]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.66      0.09      0.16       210
           2       0.19      0.88      0.31       210
           3       0.89      0.64      0.74       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.41      0.62      0.50       210

    accuracy                           0.32      1470
   macro avg       0.31      0.32      0.24      1470
weighted avg       0.31      0.32      0.24      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.354421768707483
Confusion Matrix of Decision Tree is:
 [[ 18   8   3   9 162   0  10]
 [  1  67   6  14 118   0   4]
 [  1  17 134   5  53   0   0]
 [  0  11   5  41 145   0   8]
 [  0  10   0  23 168   0   9]
 [  0  12   3  44 123   0  28]
 [  0   4   0  38  75   0  93]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.90      0.09      0.16       210
           2       0.52      0.32      0.40       210
           3       0.89      0.64      0.74       210
           4       0.24      0.20      0.21       210
           5       0.20      0.80      0.32       210
           6       0.00      0.00      0.00       210
           7       0.61      0.44      0.51       210

    accuracy                           0.35      1470
   macro avg       0.48      0.35      0.33      1470
weighted avg       0.48      0.35      0.33      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.37755102040816324
Confusion Matrix of Decision Tree is:
 [[ 63   8   3   6 117   7   6]
 [  2  67   6  10 117   1   7]
 [  3  17 129   1  51   5   4]
 [  5  11   4  31 140   7  12]
 [ 10  10   0  22 158   5   5]
 [  1  12   1  22 122  14  38]
 [  0   4   0  14  75  24  93]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.75      0.30      0.43       210
           2       0.52      0.32      0.40       210
           3       0.90      0.61      0.73       210
           4       0.29      0.15      0.20       210
           5       0.20      0.75      0.32       210
           6       0.22      0.07      0.10       210
           7       0.56      0.44      0.50       210

    accuracy                           0.38      1470
   macro avg       0.49      0.38      0.38      1470
weighted avg       0.49      0.38      0.38      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.4666666666666667
Confusion Matrix of Decision Tree is:
 [[100  20  10  16  43  15   6]
 [ 20  96  18  26  27  15   8]
 [  2  10 166  11  12   8   1]
 [ 19  24   8  81  46  22  10]
 [ 36  27   3  30  95  12   7]
 [ 28  29   4  47  28  41  33]
 [ 12  11   0  22  19  39 107]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.48      0.47       210
           2       0.44      0.46      0.45       210
           3       0.79      0.79      0.79       210
           4       0.35      0.39      0.37       210
           5       0.35      0.45      0.40       210
           6       0.27      0.20      0.23       210
           7       0.62      0.51      0.56       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.47      1470
weighted avg       0.47      0.47      0.47      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.47346938775510206
Confusion Matrix of Decision Tree is:
 [[108  17  10  16  30  21   8]
 [ 18  91  27  28  14  21  11]
 [  7   4 166  10   9  11   3]
 [ 25  22  11  76  32  31  13]
 [ 44  24   5  25  82  20  10]
 [ 25  19  13  43  20  56  34]
 [ 11   9   3  17  14  39 117]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.51      0.48       210
           2       0.49      0.43      0.46       210
           3       0.71      0.79      0.75       210
           4       0.35      0.36      0.36       210
           5       0.41      0.39      0.40       210
           6       0.28      0.27      0.27       210
           7       0.60      0.56      0.58       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.47      1470
weighted avg       0.47      0.47      0.47      1470

Random Forest with 1 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6401360544217687
Confusion Matrix of Random Forest is:
 [[118   9   3   9  43  14  14]
 [  1 140   2  17  28  14   8]
 [  0  11 185   6   3   5   0]
 [  0  17   7 105  39  30  12]
 [ 14  18   0  20 131  10  17]
 [  6  22   7  29  11  68  67]
 [  0   4   0   1   3   8 194]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.85      0.56      0.68       210
           2       0.63      0.67      0.65       210
           3       0.91      0.88      0.89       210
           4       0.56      0.50      0.53       210
           5       0.51      0.62      0.56       210
           6       0.46      0.32      0.38       210
           7       0.62      0.92      0.74       210

    accuracy                           0.64      1470
   macro avg       0.65      0.64      0.63      1470
weighted avg       0.65      0.64      0.63      1470

Random Forest with 9 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6680272108843538
Confusion Matrix of Random Forest is:
 [[132   6   4   9  36  12  11]
 [  3 144   5  12  22  19   5]
 [  1  16 182   6   4   1   0]
 [  2  16   6 111  36  34   5]
 [ 14  16   1  22 141   6  10]
 [  9  17  11  28   7  82  56]
 [  0   1   0   3   2  14 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.63      0.71       210
           2       0.67      0.69      0.68       210
           3       0.87      0.87      0.87       210
           4       0.58      0.53      0.55       210
           5       0.57      0.67      0.62       210
           6       0.49      0.39      0.43       210
           7       0.69      0.90      0.78       210

    accuracy                           0.67      1470
   macro avg       0.67      0.67      0.66      1470
weighted avg       0.67      0.67      0.66      1470

Random Forest with 17 max_dept

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.3380952380952381
Confusion Matrix of Logistic Regression is:
 [[ 57   6  63   2  55  17  10]
 [  2  26 100   8  59   8   7]
 [  1   1 179   8  21   0   0]
 [  3   6  98  13  66  11  13]
 [ 20  11  37  11  97  12  22]
 [  7   7  77   5  51  27  36]
 [  0   0  34   6  48  24  98]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.63      0.27      0.38       210
           2       0.46      0.12      0.19       210
           3       0.30      0.85      0.45       210
           4       0.25      0.06      0.10       210
           5       0.24      0.46      0.32       210
           6       0.27      0.13      0.17       210
           7       0.53      0.47      0.49       210

    accuracy                           0.34      1470
   macro avg       0.38      0.34      0.30      1470
weighted avg       0.38      0.34      0.30      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.535374149659864
Confusion Matrix of Bernoulli Naive Bayes is:
 [[106   6  15   3  32  19  29]
 [  5 106  18  13  27  29  12]
 [ 16  27 140   7   5  14   1]
 [  5  20   5  86  30  29  35]
 [ 16  12   0   6 125  16  35]
 [ 12  20   6  34  10  52  76]
 [  1   3   0  12   6  16 172]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.66      0.50      0.57       210
           2       0.55      0.50      0.52       210
           3       0.76      0.67      0.71       210
           4       0.53      0.41      0.46       210
           5       0.53      0.60      0.56       210
           6       0.30      0.25      0.27       210
           7       0.48      0.82      0.60       210

    accuracy                           0.54      1470
   macro avg       0.54      0.54      0.53      1470
weighted avg       0.54      0.54      0.53      1470

Working

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.31700680272108844
Confusion Matrix of Decision Tree is:
 [[162   0   1   0   0   0  47]
 [ 43   0   4   0   0   0 163]
 [ 46   0 102   0   0   0  62]
 [104   0   6   0   0   0 100]
 [137   0   0   0   0   0  73]
 [ 54   0   1   0   0   0 155]
 [  8   0   0   0   0   0 202]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.29      0.77      0.42       210
           2       0.00      0.00      0.00       210
           3       0.89      0.49      0.63       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.25      0.96      0.40       210

    accuracy                           0.32      1470
   macro avg       0.21      0.32      0.21      1470
weighted avg       0.21      0.32      0.21      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40816326530612246
Confusion Matrix of Decision Tree is:
 [[ 69  19   0   2  92   0  28]
 [  2 116   3   1  41   0  47]
 [  9  51  96  10  33   0  11]
 [  1  45   3   5 101   0  55]
 [  4  23   0   0 133   0  50]
 [  4  34   1   2  48   0 121]
 [  0  21   0   0   8   0 181]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.78      0.33      0.46       210
           2       0.38      0.55      0.45       210
           3       0.93      0.46      0.61       210
           4       0.25      0.02      0.04       210
           5       0.29      0.63      0.40       210
           6       0.00      0.00      0.00       210
           7       0.37      0.86      0.51       210

    accuracy                           0.41      1470
   macro avg       0.43      0.41      0.35      1470
weighted avg       0.43      0.41      0.35      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4496598639455782
Confusion Matrix of Decision Tree is:
 [[ 66  19   3  29  85   1   7]
 [  1 116   3  32  30   0  28]
 [  0  51 109  30  10   4   6]
 [  1  45   1  75  56   1  31]
 [  2  23   2  37 130   0  16]
 [  3  34   1  34  45   3  90]
 [  0  21   0   6  21   0 162]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.90      0.31      0.47       210
           2       0.38      0.55      0.45       210
           3       0.92      0.52      0.66       210
           4       0.31      0.36      0.33       210
           5       0.34      0.62      0.44       210
           6       0.33      0.01      0.03       210
           7       0.48      0.77      0.59       210

    accuracy                           0.45      1470
   macro avg       0.52      0.45      0.42      1470
weighted avg       0.52      0.45      0.42      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5047619047619047
Confusion Matrix of Decision Tree is:
 [[107  17   5  25  33  22   1]
 [ 18 101  11  27  18  29   6]
 [  5  12 154  18   5  13   3]
 [ 14  25   7  92  29  32  11]
 [ 44  21   4  28  83  21   9]
 [ 19  28  11  36   8  78  30]
 [  7   4   3  21   6  42 127]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.51      0.50       210
           2       0.49      0.48      0.48       210
           3       0.79      0.73      0.76       210
           4       0.37      0.44      0.40       210
           5       0.46      0.40      0.42       210
           6       0.33      0.37      0.35       210
           7       0.68      0.60      0.64       210

    accuracy                           0.50      1470
   macro avg       0.52      0.50      0.51      1470
weighted avg       0.52      0.50      0.51      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5047619047619047
Confusion Matrix of Decision Tree is:
 [[107  15   5  20  37  21   5]
 [ 11 105  13  31  17  23  10]
 [  3  11 154  16   8  16   2]
 [ 13  22  10  88  31  34  12]
 [ 39  18   6  34  83  20  10]
 [ 18  27  11  35  13  75  31]
 [  7   7   0  14  12  40 130]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.54      0.51      0.52       210
           2       0.51      0.50      0.51       210
           3       0.77      0.73      0.75       210
           4       0.37      0.42      0.39       210
           5       0.41      0.40      0.40       210
           6       0.33      0.36      0.34       210
           7       0.65      0.62      0.63       210

    accuracy                           0.50      1470
   macro avg       0.51      0.50      0.51      1470
weighted avg       0.51      0.50      0.51      1470

Random Forest with 1 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.49183673469387756
Confusion Matrix of Random Forest is:
 [[ 95   8   8   0  67   2  30]
 [  5  89  24   2  58   3  29]
 [ 10  24 164   0   8   1   3]
 [  4  17  14  20  99   1  55]
 [  8  10   1   1 148   0  42]
 [ 12  22  13   7  29   2 125]
 [  0   0   0   0   5   0 205]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.45      0.55       210
           2       0.52      0.42      0.47       210
           3       0.73      0.78      0.76       210
           4       0.67      0.10      0.17       210
           5       0.36      0.70      0.47       210
           6       0.22      0.01      0.02       210
           7       0.42      0.98      0.59       210

    accuracy                           0.49      1470
   macro avg       0.52      0.49      0.43      1470
weighted avg       0.52      0.49      0.43      1470

Random Forest with 3 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6523809523809524
Confusion Matrix of Random Forest is:
 [[123   5   3   5  47  21   6]
 [  1 146   1  18  21  18   5]
 [  1  25 173   6   2   3   0]
 [  1  22   6 105  31  37   8]
 [ 10  15   0  26 134  14  11]
 [  7  18   6  27  11  80  61]
 [  0   1   0   1   2   8 198]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.86      0.59      0.70       210
           2       0.63      0.70      0.66       210
           3       0.92      0.82      0.87       210
           4       0.56      0.50      0.53       210
           5       0.54      0.64      0.59       210
           6       0.44      0.38      0.41       210
           7       0.69      0.94      0.79       210

    accuracy                           0.65      1470
   macro avg       0.66      0.65      0.65      1470
weighted avg       0.66      0.65      0.65      1470

Random Forest with 11 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6775510204081633
Confusion Matrix of Random Forest is:
 [[133   5   4   6  40  18   4]
 [  1 153   2  18  16  17   3]
 [  0  20 180   7   1   2   0]
 [  2  17   7 111  30  32  11]
 [ 14  13   0  21 139  12  11]
 [  8  19   4  26   6  92  55]
 [  1   2   0   0   2  17 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.84      0.63      0.72       210
           2       0.67      0.73      0.70       210
           3       0.91      0.86      0.88       210
           4       0.59      0.53      0.56       210
           5       0.59      0.66      0.63       210
           6       0.48      0.44      0.46       210
           7       0.69      0.90      0.78       210

    accuracy                           0.68      1470
   macro avg       0.68      0.68      0.68      1470
weighted avg       0.68      0.68      0.68      1470

Random Forest with 19 max_dept

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//xlm_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = normalize_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.5258503401360545
Confusion Matrix of Logistic Regression is:
 [[108  19   7  11  36  12  17]
 [ 10 111  15  20  30  17   7]
 [  5  21 168   7   4   4   1]
 [ 14  23   9  90  30  23  21]
 [ 27  24   5  23 103  11  17]
 [ 15  32  10  30  21  52  50]
 [  3   5   0  20  11  30 141]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.59      0.51      0.55       210
           2       0.47      0.53      0.50       210
           3       0.79      0.80      0.79       210
           4       0.45      0.43      0.44       210
           5       0.44      0.49      0.46       210
           6       0.35      0.25      0.29       210
           7       0.56      0.67      0.61       210

    accuracy                           0.53      1470
   macro avg       0.52      0.53      0.52      1470
weighted avg       0.52      0.53      0.52      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.3986394557823129
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 84  52   6  15  27   5  21]
 [ 14 112  24  18  20   6  16]
 [ 18  52 135   2   2   0   1]
 [ 18  52   5  49  32  19  35]
 [ 21  47   3  20  80   6  33]
 [ 25  67   0  26  25  17  50]
 [ 14  22   0  24  22  19 109]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.43      0.40      0.42       210
           2       0.28      0.53      0.36       210
           3       0.78      0.64      0.70       210
           4       0.32      0.23      0.27       210
           5       0.38      0.38      0.38       210
           6       0.24      0.08      0.12       210
           7       0.41      0.52      0.46       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.39      1470
weighted avg       0.41      0.40      0.39      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.2612244897959184
Confusion Matrix of Decision Tree is:
 [[  9  75   4   0   0   0 122]
 [  6  88   6   0   0   0 110]
 [  1  63 107   0   0   0  39]
 [  2  54   1   0   0   0 153]
 [  1  49   2   0   0   0 158]
 [  3  54   2   0   0   0 151]
 [  1  28   1   0   0   0 180]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.39      0.04      0.08       210
           2       0.21      0.42      0.28       210
           3       0.87      0.51      0.64       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.20      0.86      0.32       210

    accuracy                           0.26      1470
   macro avg       0.24      0.26      0.19      1470
weighted avg       0.24      0.26      0.19      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.30680272108843537
Confusion Matrix of Decision Tree is:
 [[ 46  36   6  55   0   0  67]
 [ 12  78  10  69   0   0  41]
 [  6  57 108  23   0   0  16]
 [ 11  43   3  92   0   0  61]
 [ 20  29   3  79   0   0  79]
 [ 19  35   5  74   0   0  77]
 [ 13  15   2  53   0   0 127]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.36      0.22      0.27       210
           2       0.27      0.37      0.31       210
           3       0.79      0.51      0.62       210
           4       0.21      0.44      0.28       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.27      0.60      0.37       210

    accuracy                           0.31      1470
   macro avg       0.27      0.31      0.27      1470
weighted avg       0.27      0.31      0.27      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3346938775510204
Confusion Matrix of Decision Tree is:
 [[ 47   8  30  55  16   0  54]
 [  3  53  35  69  10   0  40]
 [  1   9 159  20   5   0  16]
 [  1  11  36  91  10   0  61]
 [  4  12  20  79  16   0  79]
 [  5  12  28  74  14   0  77]
 [  1   1  16  53  13   0 126]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.76      0.22      0.35       210
           2       0.50      0.25      0.34       210
           3       0.49      0.76      0.60       210
           4       0.21      0.43      0.28       210
           5       0.19      0.08      0.11       210
           6       0.00      0.00      0.00       210
           7       0.28      0.60      0.38       210

    accuracy                           0.33      1470
   macro avg       0.35      0.33      0.29      1470
weighted avg       0.35      0.33      0.29      1470

Decision Tree with 5 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.33877551020408164
Confusion Matrix of Decision Tree is:
 [[ 60   8   6  42   4  23  67]
 [ 13  54  14  58   4  16  51]
 [  5   9 147  20   2  11  16]
 [  8  11   5  64   4  29  89]
 [ 12  12   5  57   9  14 101]
 [ 12  13   9  58   8  17  93]
 [ 11   1   3  32   3  13 147]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.29      0.36       210
           2       0.50      0.26      0.34       210
           3       0.78      0.70      0.74       210
           4       0.19      0.30      0.24       210
           5       0.26      0.04      0.07       210
           6       0.14      0.08      0.10       210
           7       0.26      0.70      0.38       210

    accuracy                           0.34      1470
   macro avg       0.38      0.34      0.32      1470
weighted avg       0.38      0.34      0.32      1470

Decision Tree with 6 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.36054421768707484
Confusion Matrix of Decision Tree is:
 [[ 95  23   6  20  23  24  19]
 [ 15  79  16  34  25  25  16]
 [ 10  16 143  13  10  15   3]
 [ 46  34  10  42  34  18  26]
 [ 33  37  10  21  49  39  21]
 [ 26  27   9  29  34  46  39]
 [ 24  23   5  20  26  36  76]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.45      0.41       210
           2       0.33      0.38      0.35       210
           3       0.72      0.68      0.70       210
           4       0.23      0.20      0.22       210
           5       0.24      0.23      0.24       210
           6       0.23      0.22      0.22       210
           7       0.38      0.36      0.37       210

    accuracy                           0.36      1470
   macro avg       0.36      0.36      0.36      1470
weighted avg       0.36      0.36      0.36      1470

Decision Tree with 14 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.29863945578231293
Confusion Matrix of Random Forest is:
 [[ 34  16  21  14   4   2 119]
 [ 10  26  46   9  15   4 100]
 [  4  11 161   9   2   1  22]
 [  4   8  14  13   8   3 160]
 [  3   5  13   6  10   1 172]
 [  8   4  25  16   6   4 147]
 [  7   1   0   3   7   1 191]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.49      0.16      0.24       210
           2       0.37      0.12      0.19       210
           3       0.57      0.77      0.66       210
           4       0.19      0.06      0.09       210
           5       0.19      0.05      0.08       210
           6       0.25      0.02      0.04       210
           7       0.21      0.91      0.34       210

    accuracy                           0.30      1470
   macro avg       0.32      0.30      0.23      1470
weighted avg       0.32      0.30      0.23      1470

Random Forest with 2 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.45034013605442175
Confusion Matrix of Random Forest is:
 [[ 88  19   6  33  35   7  22]
 [  6 115   9  33  25  10  12]
 [  6  38 145  12   2   3   4]
 [ 11  29   5  60  39  29  37]
 [ 17  28   1  29  92   8  35]
 [ 15  40   0  41  26  35  53]
 [  3   9   0  23  23  25 127]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.60      0.42      0.49       210
           2       0.41      0.55      0.47       210
           3       0.87      0.69      0.77       210
           4       0.26      0.29      0.27       210
           5       0.38      0.44      0.41       210
           6       0.30      0.17      0.21       210
           7       0.44      0.60      0.51       210

    accuracy                           0.45      1470
   macro avg       0.47      0.45      0.45      1470
weighted avg       0.47      0.45      0.45      1470

Random Forest with 10 max_dep

Accuracy of Random Forest after Standard Scaling is: 0.44421768707482995
Confusion Matrix of Random Forest is:
 [[ 93  24   7  19  35  13  19]
 [  9 110   9  22  29  25   6]
 [ 10  28 154   7   4   6   1]
 [ 16  28   3  63  38  34  28]
 [ 25  24   3  33  78  19  28]
 [ 12  36   2  34  33  37  56]
 [  9   6   0  22  23  32 118]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.44      0.48       210
           2       0.43      0.52      0.47       210
           3       0.87      0.73      0.79       210
           4       0.32      0.30      0.31       210
           5       0.33      0.37      0.35       210
           6       0.22      0.18      0.20       210
           7       0.46      0.56      0.51       210

    accuracy                           0.44      1470
   macro avg       0.45      0.44      0.44      1470
weighted avg       0.45      0.44      0.44      1470

Random Forest with 18 max_dep