In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import StandardScaler
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Nisha//Input//Nisha_dataset_labels.csv")

In [2]:
# Function of Train-test split, Standard Scaling
def standard_scaling(x_data, y_data):
    x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=0.30,random_state=21,stratify=y_data)
    # Standard scaling of train data
    scaler_model = StandardScaler()
    scaled_data_train = scaler_model.fit_transform(x_train)
    # Standard scaling of test data
    scaled_data_test = scaler_model.fit_transform(x_test)
    return scaled_data_train, scaled_data_test, y_train, y_test

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" after Standard Scaling is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tfidf_500_vectors.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6693877551020408
Confusion Matrix of Logistic Regression is:
 [[158   1   1   3  26  17   4]
 [  6 146  13  11  19  15   0]
 [  0  12 170  19   3   6   0]
 [  6  11  17 124  28  21   3]
 [ 28  22   3  22 119  10   6]
 [  8  19   9  26   3 110  35]
 [  5   2   0   2   6  38 157]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.75      0.75      0.75       210
           2       0.69      0.70      0.69       210
           3       0.80      0.81      0.80       210
           4       0.60      0.59      0.59       210
           5       0.58      0.57      0.57       210
           6       0.51      0.52      0.52       210
           7       0.77      0.75      0.76       210

    accuracy                           0.67      1470
   macro avg       0.67      0.67      0.67      1470
weighted avg       0.67      0.67      0.67      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6680272108843538
Confusion Matrix of SVM is:
 [[166   1   0   4  21  17   1]
 [  4 152  13  14  11  16   0]
 [  0  10 171  19   1   9   0]
 [  9  20  21 119  19  20   2]
 [ 38  22   8  24 112   2   4]
 [ 13  22  15  20   3 117  20]
 [  4   7   2   2   1  49 145]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.71      0.79      0.75       210
           2       0.65      0.72      0.68       210
           3       0.74      0.81      0.78       210
           4       0.59      0.57      0.58       210
           5       0.67      0.53      0.59       210
           6       0.51      0.56      0.53       210
           7       0.84      0.69      0.76       210

    accuracy                           0.67      1470
   macro avg       0.67      0.67      0.67      1470
weighted avg       0.67      0.67      0.67      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40408163265306124
Confusion Matrix of Decision Tree is:
 [[ 94  44   0   0  72   0   0]
 [  1 204   3   0   1   0   1]
 [  0 127  83   0   0   0   0]
 [  0 201   2   0   6   0   1]
 [ 16  76   1   0 114   0   3]
 [  0 197   0   0   3   0  10]
 [  0 110   0   0   0   1  99]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.85      0.45      0.59       210
           2       0.21      0.97      0.35       210
           3       0.93      0.40      0.56       210
           4       0.00      0.00      0.00       210
           5       0.58      0.54      0.56       210
           6       0.00      0.00      0.00       210
           7       0.87      0.47      0.61       210

    accuracy                           0.40      1470
   macro avg       0.49      0.40      0.38      1470
weighted avg       0.49      0.40      0.38      1470

Decision Tree with 6 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4639455782312925
Confusion Matrix of Decision Tree is:
 [[119   1   0   0  47  43   0]
 [  1  74  29   1   1 104   0]
 [  0   0 119   0   0  91   0]
 [  3   2  16   0   3 185   1]
 [ 46  19   9   3  81  49   3]
 [  1   0   2   0   2 195  10]
 [  0   1   0   0   4 111  94]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.57      0.63       210
           2       0.76      0.35      0.48       210
           3       0.68      0.57      0.62       210
           4       0.00      0.00      0.00       210
           5       0.59      0.39      0.47       210
           6       0.25      0.93      0.39       210
           7       0.87      0.45      0.59       210

    accuracy                           0.46      1470
   macro avg       0.55      0.46      0.45      1470
weighted avg       0.55      0.46      0.45      1470

Decision Tree with 9 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5523809523809524
Confusion Matrix of Decision Tree is:
 [[154   0   0   2  21  33   0]
 [  2 109  11   2   4  81   1]
 [  0   7 134   0   0  69   0]
 [  5  21  12  39   4 129   0]
 [ 48  21   5   6  95  32   3]
 [  2   5   2   2   1 189   9]
 [  0   1   0   0   6 111  92]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.73      0.73      0.73       210
           2       0.66      0.52      0.58       210
           3       0.82      0.64      0.72       210
           4       0.76      0.19      0.30       210
           5       0.73      0.45      0.56       210
           6       0.29      0.90      0.44       210
           7       0.88      0.44      0.58       210

    accuracy                           0.55      1470
   macro avg       0.70      0.55      0.56      1470
weighted avg       0.70      0.55      0.56      1470

Decision Tree with 17 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6108843537414966
Confusion Matrix of Random Forest is:
 [[147   2   1   4  22  33   1]
 [  1 114   8  20   7  59   1]
 [  0   0 135   3   1  71   0]
 [  4  15   5 110  10  64   2]
 [ 52  26   2  21  92  12   5]
 [  2  15   1  29   2 136  25]
 [  0   4   0   1   1  40 164]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.70      0.71       210
           2       0.65      0.54      0.59       210
           3       0.89      0.64      0.75       210
           4       0.59      0.52      0.55       210
           5       0.68      0.44      0.53       210
           6       0.33      0.65      0.44       210
           7       0.83      0.78      0.80       210

    accuracy                           0.61      1470
   macro avg       0.67      0.61      0.62      1470
weighted avg       0.67      0.61      0.62      1470

Random Forest with 4 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6632653061224489
Confusion Matrix of Random Forest is:
 [[156   1   0   4  23  26   0]
 [  1 130  11  21   2  44   1]
 [  0   8 146   2   0  54   0]
 [  3  15   9 122  11  48   2]
 [ 45  23   3  18 106  10   5]
 [  2  13   1  23   4 143  24]
 [  0   3   0   0   1  34 172]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.75      0.74      0.75       210
           2       0.67      0.62      0.65       210
           3       0.86      0.70      0.77       210
           4       0.64      0.58      0.61       210
           5       0.72      0.50      0.59       210
           6       0.40      0.68      0.50       210
           7       0.84      0.82      0.83       210

    accuracy                           0.66      1470
   macro avg       0.70      0.66      0.67      1470
weighted avg       0.70      0.66      0.67      1470

Random Forest with 12 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6707482993197279
Confusion Matrix of Random Forest is:
 [[153   2   1   5  24  25   0]
 [  1 141   9  17   4  37   1]
 [  0  11 147   4   0  48   0]
 [  2  13  11 126  11  45   2]
 [ 45  24   2  21 107   6   5]
 [  4  14   1  21   0 144  26]
 [  0   1   0   2   3  36 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.75      0.73      0.74       210
           2       0.68      0.67      0.68       210
           3       0.86      0.70      0.77       210
           4       0.64      0.60      0.62       210
           5       0.72      0.51      0.60       210
           6       0.42      0.69      0.52       210
           7       0.83      0.80      0.82       210

    accuracy                           0.67      1470
   macro avg       0.70      0.67      0.68      1470
weighted avg       0.70      0.67      0.68      1470

Random Forest with 20 max_dept

In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//cv_500_vectors.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6836734693877551
Confusion Matrix of Logistic Regression is:
 [[153   2   0   6  28  19   2]
 [  4 149  10  14  15  18   0]
 [  0  11 178  15   1   5   0]
 [  5   9  16 125  30  22   3]
 [ 25  20   2  19 127  10   7]
 [  5  20   4  27   4 115  35]
 [  5   2   1   3   6  35 158]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.78      0.73      0.75       210
           2       0.70      0.71      0.70       210
           3       0.84      0.85      0.85       210
           4       0.60      0.60      0.60       210
           5       0.60      0.60      0.60       210
           6       0.51      0.55      0.53       210
           7       0.77      0.75      0.76       210

    accuracy                           0.68      1470
   macro avg       0.69      0.68      0.68      1470
weighted avg       0.69      0.68      0.68      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.689795918367347
Confusion Matrix of SVM is:
 [[162   1   1   2  24  19   1]
 [  6 154  11  15   6  18   0]
 [  0   8 191   5   0   6   0]
 [  7  15  27 116  17  23   5]
 [ 41  20   8  14 119   4   4]
 [  7  21  14  19   1 124  24]
 [  9   3   6   5   2  37 148]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.70      0.77      0.73       210
           2       0.69      0.73      0.71       210
           3       0.74      0.91      0.82       210
           4       0.66      0.55      0.60       210
           5       0.70      0.57      0.63       210
           6       0.54      0.59      0.56       210
           7       0.81      0.70      0.76       210

    accuracy                           0.69      1470
   macro avg       0.69      0.69      0.69      1470
weighted avg       0.69      0.69      0.69      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard Sc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[148   1   0   0  18  43   0]
 [  1  57  26   0   1 124   1]
 [  0   0 123   0   0  87   0]
 [  5   3  17   0   1 183   1]
 [ 68  18  14   1  61  46   2]
 [  2   2   2   0   1 195   8]
 [  0   1   3   0   0 131  75]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.66      0.70      0.68       210
           2       0.70      0.27      0.39       210
           3       0.66      0.59      0.62       210
           4       0.00      0.00      0.00       210
           5       0.74      0.29      0.42       210
           6       0.24      0.93      0.38       210
           7       0.86      0.36      0.51       210

    accuracy                           0.45      1470
   macro avg       0.55      0.45      0.43      1470
weighted avg       0.55      0.45      0.43      1470

Decision Tree with 6 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.463265306122449


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.5197278911564626
Confusion Matrix of Decision Tree is:
 [[142   1   0   0  21  46   0]
 [  1  91  19   5   4  89   1]
 [  0   0 123   0   0  87   0]
 [  5   6  16  38   2 142   1]
 [ 56  16  11   2  78  44   3]
 [  3   3   2   0   0 191  11]
 [  0   1   3   0   0 105 101]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.69      0.68      0.68       210
           2       0.77      0.43      0.55       210
           3       0.71      0.59      0.64       210
           4       0.84      0.18      0.30       210
           5       0.74      0.37      0.50       210
           6       0.27      0.91      0.42       210
           7       0.86      0.48      0.62       210

    accuracy                           0.52      1470
   macro avg       0.70      0.52      0.53      1470
weighted avg       0.70      0.52      0.53      1470

Decision Tree with 10 max_dept

Confusion Matrix of Decision Tree is:
 [[152   1   0   0  23  34   0]
 [  6 103  25   1   6  68   1]
 [  0   0 155   1   2  52   0]
 [  5   3  25  47   7 122   1]
 [ 52  19  11   6  92  27   3]
 [  3   0   8   3   1 182  13]
 [  0   1   4   1   0  99 105]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.72      0.71       210
           2       0.81      0.49      0.61       210
           3       0.68      0.74      0.71       210
           4       0.80      0.22      0.35       210
           5       0.70      0.44      0.54       210
           6       0.31      0.87      0.46       210
           7       0.85      0.50      0.63       210

    accuracy                           0.57      1470
   macro avg       0.69      0.57      0.57      1470
weighted avg       0.69      0.57      0.57      1470

Decision Tree with 18 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.566666666666666

Accuracy of Random Forest after Standard Scaling is: 0.6170068027210884
Confusion Matrix of Random Forest is:
 [[158   2   1   6  21  21   1]
 [  1 105  35  25   3  40   1]
 [  0   0 146   5   0  59   0]
 [  4  12  17 124  10  41   2]
 [ 58  24  13  17  86   5   7]
 [  3  10   6  43   2 120  26]
 [  0   5   2   6   0  29 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.75      0.73       210
           2       0.66      0.50      0.57       210
           3       0.66      0.70      0.68       210
           4       0.55      0.59      0.57       210
           5       0.70      0.41      0.52       210
           6       0.38      0.57      0.46       210
           7       0.82      0.80      0.81       210

    accuracy                           0.62      1470
   macro avg       0.64      0.62      0.62      1470
weighted avg       0.64      0.62      0.62      1470

Random Forest with 5 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.636734693877551
Confusion Matrix of Random Forest is:
 [[157   2   0   4  22  24   1]
 [  1 112  36  23   3  34   1]
 [  0   0 162  13   0  35   0]
 [  3  13  20 116  11  45   2]
 [ 55  26   9  19  90   6   5]
 [  3   9   7  29   1 136  25]
 [  0   4   2   0   0  41 163]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.72      0.75      0.73       210
           2       0.67      0.53      0.60       210
           3       0.69      0.77      0.73       210
           4       0.57      0.55      0.56       210
           5       0.71      0.43      0.53       210
           6       0.42      0.65      0.51       210
           7       0.83      0.78      0.80       210

    accuracy                           0.64      1470
   macro avg       0.66      0.64      0.64      1470
weighted avg       0.66      0.64      0.64      1470

Random Forest with 13 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6537414965986394
Confusion Matrix of Random Forest is:
 [[157   1   1   4  22  25   0]
 [  1 122  29  20   3  34   1]
 [  0   3 167  10   0  30   0]
 [  3  15  21 122   7  40   2]
 [ 54  27   9  17  91   8   4]
 [  3   9   9  28   0 137  24]
 [  0   4   2   0   0  39 165]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.72      0.75      0.73       210
           2       0.67      0.58      0.62       210
           3       0.70      0.80      0.75       210
           4       0.61      0.58      0.59       210
           5       0.74      0.43      0.55       210
           6       0.44      0.65      0.52       210
           7       0.84      0.79      0.81       210

    accuracy                           0.65      1470
   macro avg       0.67      0.65      0.65      1470
weighted avg       0.67      0.65      0.65      1470

Accuracy of Random Forest afte

In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tf_500_vectors.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6727891156462585
Confusion Matrix of Logistic Regression is:
 [[153   2   2   5  24  20   4]
 [  6 150  11  13  16  14   0]
 [  0  11 174  16   3   6   0]
 [  7  10  17 124  28  22   2]
 [ 25  21   3  22 121  10   8]
 [  8  18   9  26   6 109  34]
 [  5   1   1   2   6  37 158]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.75      0.73      0.74       210
           2       0.70      0.71      0.71       210
           3       0.80      0.83      0.81       210
           4       0.60      0.59      0.59       210
           5       0.59      0.58      0.58       210
           6       0.50      0.52      0.51       210
           7       0.77      0.75      0.76       210

    accuracy                           0.67      1470
   macro avg       0.67      0.67      0.67      1470
weighted avg       0.67      0.67      0.67      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6782312925170068
Confusion Matrix of SVM is:
 [[160   2   0   4  22  20   2]
 [  5 155  11  14   7  18   0]
 [  1   9 178  13   1   8   0]
 [  7  20  27 115  14  25   2]
 [ 38  21   7  23 114   3   4]
 [  9  22  12  22   1 123  21]
 [  7   5   6   4   2  34 152]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.70      0.76      0.73       210
           2       0.66      0.74      0.70       210
           3       0.74      0.85      0.79       210
           4       0.59      0.55      0.57       210
           5       0.71      0.54      0.61       210
           6       0.53      0.59      0.56       210
           7       0.84      0.72      0.78       210

    accuracy                           0.68      1470
   macro avg       0.68      0.68      0.68      1470
weighted avg       0.68      0.68      0.68      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[128   1   0   0  37  44   0]
 [  1  24   3   0   1 180   1]
 [  0  15  93   0   0 102   0]
 [  3   6   2   0   3 195   1]
 [ 50   4   1   0  79  73   3]
 [  1   1   0   0   2 196  10]
 [  0   0   0   0   0 110 100]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.61      0.65       210
           2       0.47      0.11      0.18       210
           3       0.94      0.44      0.60       210
           4       0.00      0.00      0.00       210
           5       0.65      0.38      0.48       210
           6       0.22      0.93      0.35       210
           7       0.87      0.48      0.62       210

    accuracy                           0.42      1470
   macro avg       0.55      0.42      0.41      1470
weighted avg       0.55      0.42      0.41      1470

Decision Tree with 6 max_depth
Accuracy of Decision Tree after Standard Scaling is: 0.4414965986394558

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.47346938775510206
Confusion Matrix of Decision Tree is:
 [[123   1   0   2  40  44   0]
 [  1  75   5   0   1 127   1]
 [  0  12 110   0   0  88   0]
 [  3   7   2   0   3 194   1]
 [ 34  18   1   2  93  60   2]
 [  2   2   0   0   1 196   9]
 [  0   0   0   0   0 111  99]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.75      0.59      0.66       210
           2       0.65      0.36      0.46       210
           3       0.93      0.52      0.67       210
           4       0.00      0.00      0.00       210
           5       0.67      0.44      0.53       210
           6       0.24      0.93      0.38       210
           7       0.88      0.47      0.61       210

    accuracy                           0.47      1470
   macro avg       0.59      0.47      0.47      1470
weighted avg       0.59      0.47      0.47      1470

Decision Tree with 9 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5612244897959183
Confusion Matrix of Decision Tree is:
 [[147   1   0   1  29  32   0]
 [  2 111  11   1   7  77   1]
 [  0  10 139   0   2  59   0]
 [  4  18  10  42   9 126   1]
 [ 44  22   5   4 102  29   4]
 [  1   6   1   2   4 187   9]
 [  0   1   0   0   3 109  97]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.74      0.70      0.72       210
           2       0.66      0.53      0.59       210
           3       0.84      0.66      0.74       210
           4       0.84      0.20      0.32       210
           5       0.65      0.49      0.56       210
           6       0.30      0.89      0.45       210
           7       0.87      0.46      0.60       210

    accuracy                           0.56      1470
   macro avg       0.70      0.56      0.57      1470
weighted avg       0.70      0.56      0.57      1470

Decision Tree with 18 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6258503401360545
Confusion Matrix of Random Forest is:
 [[157   2   2   6  20  21   2]
 [  1 101  31  26   4  46   1]
 [  0   0 146   8   0  56   0]
 [  3  13  13 126  11  43   1]
 [ 58  23   6  20  88   9   6]
 [  2  10   2  31   4 135  26]
 [  0   1   0   2   4  36 167]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.75      0.73       210
           2       0.67      0.48      0.56       210
           3       0.73      0.70      0.71       210
           4       0.58      0.60      0.59       210
           5       0.67      0.42      0.52       210
           6       0.39      0.64      0.49       210
           7       0.82      0.80      0.81       210

    accuracy                           0.63      1470
   macro avg       0.65      0.63      0.63      1470
weighted avg       0.65      0.63      0.63      1470

Random Forest with 5 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6578231292517007
Confusion Matrix of Random Forest is:
 [[153   1   1   4  26  24   1]
 [  1 116  27  20   4  41   1]
 [  0   1 155   5   0  49   0]
 [  3  12  13 121  11  48   2]
 [ 39  22   4  22 110   8   5]
 [  4   9   2  22   1 143  29]
 [  0   2   0   0   2  37 169]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.77      0.73      0.75       210
           2       0.71      0.55      0.62       210
           3       0.77      0.74      0.75       210
           4       0.62      0.58      0.60       210
           5       0.71      0.52      0.60       210
           6       0.41      0.68      0.51       210
           7       0.82      0.80      0.81       210

    accuracy                           0.66      1470
   macro avg       0.69      0.66      0.66      1470
weighted avg       0.69      0.66      0.66      1470

Random Forest with 13 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.673469387755102
Confusion Matrix of Random Forest is:
 [[156   2   1   4  24  22   1]
 [  1 122  23  23   4  36   1]
 [  0   3 159  11   0  37   0]
 [  2  13  12 125  11  45   2]
 [ 36  21   4  21 116   7   5]
 [  2   8   3  23   3 145  26]
 [  0   2   0   0   0  41 167]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.74      0.77       210
           2       0.71      0.58      0.64       210
           3       0.79      0.76      0.77       210
           4       0.60      0.60      0.60       210
           5       0.73      0.55      0.63       210
           6       0.44      0.69      0.53       210
           7       0.83      0.80      0.81       210

    accuracy                           0.67      1470
   macro avg       0.70      0.67      0.68      1470
weighted avg       0.70      0.67      0.68      1470

Accuracy of Random Forest after

### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7387755102040816
Confusion Matrix of Logistic Regression is:
 [[157   2   6   7  25  13   0]
 [  2 168   4  14  15   5   2]
 [  0   3 200   3   0   4   0]
 [  3  18   5 145  20  11   8]
 [ 36  13   3  20 127   7   4]
 [ 11  20   9  15   7 118  30]
 [  4   0   0   3   4  28 171]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.74      0.75      0.74       210
           2       0.75      0.80      0.77       210
           3       0.88      0.95      0.92       210
           4       0.70      0.69      0.70       210
           5       0.64      0.60      0.62       210
           6       0.63      0.56      0.60       210
           7       0.80      0.81      0.80       210

    accuracy                           0.74      1470
   macro avg       0.73      0.74      0.74      1470
weighted avg       0.73      0.74      0.74      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7299319727891157
Confusion Matrix of SVM is:
 [[165   4   3   4  22  12   0]
 [  2 170   3  17   8   8   2]
 [  0   4 200   2   1   3   0]
 [  4  16   6 149  22   9   4]
 [ 48  14   2  23 115   1   7]
 [ 13  20   8  24   6 109  30]
 [  3   1   0   2   3  36 165]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.70      0.79      0.74       210
           2       0.74      0.81      0.77       210
           3       0.90      0.95      0.93       210
           4       0.67      0.71      0.69       210
           5       0.65      0.55      0.59       210
           6       0.61      0.52      0.56       210
           7       0.79      0.79      0.79       210

    accuracy                           0.73      1470
   macro avg       0.72      0.73      0.73      1470
weighted avg       0.72      0.73      0.73      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4
Confusion Matrix of Decision Tree is:
 [[  0   1   3   0 179   0  27]
 [  0  46   5   0 140   0  19]
 [  0   4 165   0  27   0  14]
 [  0   2  22   0 144   0  42]
 [  0   3   3   0 181   0  23]
 [  0   3  16   0  58   0 133]
 [  0   0   0   0  14   0 196]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.78      0.22      0.34       210
           3       0.77      0.79      0.78       210
           4       0.00      0.00      0.00       210
           5       0.24      0.86      0.38       210
           6       0.00      0.00      0.00       210
           7       0.43      0.93      0.59       210

    accuracy                           0.40      1470
   macro avg       0.32      0.40      0.30      1470
weighted avg       0.32      0.40      0.30      1470

Decision Tree with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.49523809523809526
Confusion Matrix of Decision Tree is:
 [[145   0   4   0  34  11  16]
 [ 20  46   5   0 120  13   6]
 [  2   3 158   8  25   9   5]
 [ 19   1  14   9 125  15  27]
 [ 35   3   2   1 146   3  20]
 [ 32   1  14   4  26  60  73]
 [  9   0   0   0   5  32 164]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.69      0.61       210
           2       0.85      0.22      0.35       210
           3       0.80      0.75      0.78       210
           4       0.41      0.04      0.08       210
           5       0.30      0.70      0.42       210
           6       0.42      0.29      0.34       210
           7       0.53      0.78      0.63       210

    accuracy                           0.50      1470
   macro avg       0.55      0.50      0.46      1470
weighted avg       0.55      0.50      0.46      1470

Decision Tree with 4 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5741496598639456
Confusion Matrix of Decision Tree is:
 [[130   4   2  19  34  14   7]
 [  6 133   5  24  27  12   3]
 [  4   9 162  20  10   3   2]
 [  8  19   9  90  47  25  12]
 [ 24  19   1  29 111  16  10]
 [ 18  17   9  31  11  92  32]
 [  5   6   1   8   6  58 126]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.67      0.62      0.64       210
           2       0.64      0.63      0.64       210
           3       0.86      0.77      0.81       210
           4       0.41      0.43      0.42       210
           5       0.45      0.53      0.49       210
           6       0.42      0.44      0.43       210
           7       0.66      0.60      0.63       210

    accuracy                           0.57      1470
   macro avg       0.59      0.57      0.58      1470
weighted avg       0.59      0.57      0.58      1470

Decision Tree with 12 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5557823129251701
Confusion Matrix of Decision Tree is:
 [[132   5   4  12  35  13   9]
 [ 18 127   4  15  31   9   6]
 [  3  13 166  19   4   5   0]
 [ 17  24  14  87  37  21  10]
 [ 31  26   4  29  93  15  12]
 [ 19  19   8  33  18  84  29]
 [  5   5   1  10   8  53 128]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.59      0.63      0.61       210
           2       0.58      0.60      0.59       210
           3       0.83      0.79      0.81       210
           4       0.42      0.41      0.42       210
           5       0.41      0.44      0.43       210
           6       0.42      0.40      0.41       210
           7       0.66      0.61      0.63       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Decision Tree with 20 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6482993197278911
Confusion Matrix of Random Forest is:
 [[131   1   0   4  47  20   7]
 [  2 147   1   8  32  17   3]
 [  0   7 174  12   4  13   0]
 [  5  20   4  78  62  23  18]
 [ 19  18   1   8 141   5  18]
 [  8  14   2  15  10  97  64]
 [  0   0   0   1   3  21 185]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.62      0.70       210
           2       0.71      0.70      0.71       210
           3       0.96      0.83      0.89       210
           4       0.62      0.37      0.46       210
           5       0.47      0.67      0.55       210
           6       0.49      0.46      0.48       210
           7       0.63      0.88      0.73       210

    accuracy                           0.65      1470
   macro avg       0.67      0.65      0.65      1470
weighted avg       0.67      0.65      0.65      1470

Random Forest with 7 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.7
Confusion Matrix of Random Forest is:
 [[140   1   0   7  40  18   4]
 [  3 150   4  18  22  11   2]
 [  0   6 186   7   3   7   1]
 [  3   7   4 121  37  25  13]
 [ 17  13   0  24 137  10   9]
 [  9  12   1  17   6 112  53]
 [  0   0   0   0   4  23 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.81      0.67      0.73       210
           2       0.79      0.71      0.75       210
           3       0.95      0.89      0.92       210
           4       0.62      0.58      0.60       210
           5       0.55      0.65      0.60       210
           6       0.54      0.53      0.54       210
           7       0.69      0.87      0.77       210

    accuracy                           0.70      1470
   macro avg       0.71      0.70      0.70      1470
weighted avg       0.71      0.70      0.70      1470

Random Forest with 15 max_depth
Accuracy of R

In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_gkb.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6156462585034014
Confusion Matrix of Logistic Regression is:
 [[141   7   4   9  33   8   8]
 [  3 108  19  30  26  19   5]
 [  0  13 181   8   2   1   5]
 [  6  19  11 102  41  11  20]
 [ 24  12   2  23 136   8   5]
 [  6  22  18  28  11  72  53]
 [  3   2   2   4   7  27 165]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.77      0.67      0.72       210
           2       0.59      0.51      0.55       210
           3       0.76      0.86      0.81       210
           4       0.50      0.49      0.49       210
           5       0.53      0.65      0.58       210
           6       0.49      0.34      0.40       210
           7       0.63      0.79      0.70       210

    accuracy                           0.62      1470
   macro avg       0.61      0.62      0.61      1470
weighted avg       0.61      0.62      0.61      1470

KNN with 3 N

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after Standard Scaling is: 0.6020408163265306
Confusion Matrix of SVM is:
 [[138   7   2   9  38  12   4]
 [  8 109  16  30  24  18   5]
 [  0  14 182   7   1   1   5]
 [  5  20  10 103  41  15  16]
 [ 25   9   2  30 134   5   5]
 [ 10  22  17  32  13  64  52]
 [  6   8   2   4   7  28 155]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.72      0.66      0.69       210
           2       0.58      0.52      0.55       210
           3       0.79      0.87      0.83       210
           4       0.48      0.49      0.48       210
           5       0.52      0.64      0.57       210
           6       0.45      0.30      0.36       210
           7       0.64      0.74      0.69       210

    accuracy                           0.60      1470
   macro avg       0.60      0.60      0.59      1470
weighted avg       0.60      0.60      0.59      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.24353741496598638
Confusion Matrix of Decision Tree is:
 [[  0   0   0 174   0   0  36]
 [  0   0   0 165   0   0  45]
 [  0   0   0 188   0   0  22]
 [  0   0   0 174   0   0  36]
 [  0   0   0 192   0   0  18]
 [  0   0   0 100   0   0 110]
 [  0   0   0  26   0   0 184]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.00      0.00      0.00       210
           4       0.17      0.83      0.28       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.41      0.88      0.56       210

    accuracy                           0.24      1470
   macro avg       0.08      0.24      0.12      1470
weighted avg       0.08      0.24      0.12      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.29931972789115646
Confusion Matrix of Decision Tree is:
 [[100   0   0  88   0   0  22]
 [ 24   0   0 142   0   0  44]
 [  8   0   0 183   0   0  19]
 [ 12   0   0 163   0   0  35]
 [ 19   0   0 175   0   0  16]
 [ 37   0   0  67   0   0 106]
 [ 18   0   0  15   0   0 177]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.48      0.47       210
           2       0.00      0.00      0.00       210
           3       0.00      0.00      0.00       210
           4       0.20      0.78      0.31       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.42      0.84      0.56       210

    accuracy                           0.30      1470
   macro avg       0.15      0.30      0.19      1470
weighted avg       0.15      0.30      0.19      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3149659863945578
Confusion Matrix of Decision Tree is:
 [[ 84  24   2  64   0  16  20]
 [ 13  80   1  62   0  11  43]
 [  7 153   6  30   0   1  13]
 [ 11  40   8 123   0   1  27]
 [ 15  41   1 134   0   4  15]
 [ 23  23  19  44   0  14  87]
 [ 10   4  21  11   0   8 156]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.52      0.40      0.45       210
           2       0.22      0.38      0.28       210
           3       0.10      0.03      0.04       210
           4       0.26      0.59      0.36       210
           5       0.00      0.00      0.00       210
           6       0.25      0.07      0.11       210
           7       0.43      0.74      0.55       210

    accuracy                           0.31      1470
   macro avg       0.26      0.31      0.26      1470
weighted avg       0.26      0.31      0.26      1470

Decision Tree with 5 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.31564625850340133
Confusion Matrix of Decision Tree is:
 [[ 90  13   0  58  22  12  15]
 [ 13  46   0  58  55  12  26]
 [  7  71   5  29  96   2   0]
 [ 11  21   5 112  35   4  22]
 [ 17  14   1 120  46   2  10]
 [ 26  20   9  35  19  21  80]
 [ 10  13  11  11   3  18 144]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.52      0.43      0.47       210
           2       0.23      0.22      0.23       210
           3       0.16      0.02      0.04       210
           4       0.26      0.53      0.35       210
           5       0.17      0.22      0.19       210
           6       0.30      0.10      0.15       210
           7       0.48      0.69      0.57       210

    accuracy                           0.32      1470
   macro avg       0.30      0.32      0.29      1470
weighted avg       0.30      0.32      0.29      1470

Decision Tree with 6 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.31564625850340133
Confusion Matrix of Decision Tree is:
 [[ 75  24  10  57   8  24  12]
 [ 12  56  25  72  12  18  15]
 [  7  17  60 105   3  17   1]
 [  9  30  15 108   8  25  15]
 [ 18  26  16 115  12  17   6]
 [ 26  25   9  45  11  58  36]
 [ 13  16   2  14  14  56  95]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.36      0.41       210
           2       0.29      0.27      0.28       210
           3       0.44      0.29      0.35       210
           4       0.21      0.51      0.30       210
           5       0.18      0.06      0.09       210
           6       0.27      0.28      0.27       210
           7       0.53      0.45      0.49       210

    accuracy                           0.32      1470
   macro avg       0.34      0.32      0.31      1470
weighted avg       0.34      0.32      0.31      1470

Decision Tree with 14 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.30952380952380953
Confusion Matrix of Decision Tree is:
 [[ 97  26   7  22  20  26  12]
 [ 24  68  25  41  14  18  20]
 [ 10  27 111  27  11  22   2]
 [ 52  23  29  30  20  42  14]
 [ 58  21  19  36  26  37  13]
 [ 29  25  25  25  15  56  35]
 [ 14  23  11   9  22  64  67]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.46      0.39       210
           2       0.32      0.32      0.32       210
           3       0.49      0.53      0.51       210
           4       0.16      0.14      0.15       210
           5       0.20      0.12      0.15       210
           6       0.21      0.27      0.24       210
           7       0.41      0.32      0.36       210

    accuracy                           0.31      1470
   macro avg       0.30      0.31      0.30      1470
weighted avg       0.30      0.31      0.30      1470

Random Forest with 1 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.38095238095238093
Confusion Matrix of Random Forest is:
 [[ 51   0   5  75  38   2  39]
 [  9   0  27  91  32   0  51]
 [  0   0 140  51   6   0  13]
 [  2   0  14 113  43   0  38]
 [  5   0   7 114  65   0  19]
 [ 12   0  12  49  20   4 113]
 [  4   0   1  11   6   1 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.24      0.35       210
           2       0.00      0.00      0.00       210
           3       0.68      0.67      0.67       210
           4       0.22      0.54      0.32       210
           5       0.31      0.31      0.31       210
           6       0.57      0.02      0.04       210
           7       0.41      0.89      0.56       210

    accuracy                           0.38      1470
   macro avg       0.40      0.38      0.32      1470
weighted avg       0.40      0.38      0.32      1470

Random Forest with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.43333333333333335
Confusion Matrix of Random Forest is:
 [[133   0   0  38  14   8  17]
 [ 42  10   7  64  37  15  35]
 [  8   6 123  57   3   2  11]
 [ 31   0   3  99  41   2  34]
 [ 45   0   2  65  80   3  15]
 [ 47   0   8  31  11  11 102]
 [ 17   0   0   5   1   6 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.41      0.63      0.50       210
           2       0.62      0.05      0.09       210
           3       0.86      0.59      0.70       210
           4       0.28      0.47      0.35       210
           5       0.43      0.38      0.40       210
           6       0.23      0.05      0.09       210
           7       0.46      0.86      0.60       210

    accuracy                           0.43      1470
   macro avg       0.47      0.43      0.39      1470
weighted avg       0.47      0.43      0.39      1470

Random Forest with 4 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5190476190476191
Confusion Matrix of Random Forest is:
 [[135   7   2  21  23  10  12]
 [ 19  97   1  44  19  13  17]
 [  2  26 141  30   1   8   2]
 [ 22  14   5 100  34  16  19]
 [ 30  12   2  65  83   8  10]
 [ 21  19  14  30  14  59  53]
 [  8   1  10   4   6  33 148]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.57      0.64      0.60       210
           2       0.55      0.46      0.50       210
           3       0.81      0.67      0.73       210
           4       0.34      0.48      0.40       210
           5       0.46      0.40      0.43       210
           6       0.40      0.28      0.33       210
           7       0.57      0.70      0.63       210

    accuracy                           0.52      1470
   macro avg       0.53      0.52      0.52      1470
weighted avg       0.53      0.52      0.52      1470

Random Forest with 12 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.5122448979591837
Confusion Matrix of Random Forest is:
 [[138   5   3  17  22  12  13]
 [ 15  92   7  41  22  16  17]
 [  1  25 144  30   3   6   1]
 [ 17  22   6  86  42  19  18]
 [ 35  12   3  56  84  11   9]
 [ 18  14  10  31  18  67  52]
 [  6   3   6   5   8  40 142]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.60      0.66      0.63       210
           2       0.53      0.44      0.48       210
           3       0.80      0.69      0.74       210
           4       0.32      0.41      0.36       210
           5       0.42      0.40      0.41       210
           6       0.39      0.32      0.35       210
           7       0.56      0.68      0.61       210

    accuracy                           0.51      1470
   macro avg       0.52      0.51      0.51      1470
weighted avg       0.52      0.51      0.51      1470

Random Forest with 20 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_ndisbert.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7163265306122449
Confusion Matrix of Logistic Regression is:
 [[157   3   1   1  36  11   1]
 [  3 161   4  16  15  11   0]
 [  1   6 188  10   2   3   0]
 [  3  13   6 144  25  17   2]
 [ 38  17   1  19 127   4   4]
 [ 13  20   6  24   7 113  27]
 [  3   3   0   2   6  33 163]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.72      0.75      0.73       210
           2       0.72      0.77      0.74       210
           3       0.91      0.90      0.90       210
           4       0.67      0.69      0.68       210
           5       0.58      0.60      0.59       210
           6       0.59      0.54      0.56       210
           7       0.83      0.78      0.80       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6863945578231293
Confusion Matrix of SVM is:
 [[152   4   0   3  35  14   2]
 [  5 160   4  18  12  11   0]
 [  0   7 190   6   2   5   0]
 [  4  19   6 134  25  20   2]
 [ 55  16   1  22 110   4   2]
 [ 15  15   8  23  13 105  31]
 [  3   2   0   1   2  44 158]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.65      0.72      0.68       210
           2       0.72      0.76      0.74       210
           3       0.91      0.90      0.91       210
           4       0.65      0.64      0.64       210
           5       0.55      0.52      0.54       210
           6       0.52      0.50      0.51       210
           7       0.81      0.75      0.78       210

    accuracy                           0.69      1470
   macro avg       0.69      0.69      0.69      1470
weighted avg       0.69      0.69      0.69      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.38095238095238093
Confusion Matrix of Decision Tree is:
 [[  0   0  12   0 173  15  10]
 [  0   0  11   0 167  25   7]
 [  0   0 131   0  63  15   1]
 [  0   0  21   0 172  13   4]
 [  0   0   4   0 197   5   4]
 [  0   0   3   0  70  69  68]
 [  0   0   0   0  20  27 163]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.72      0.62      0.67       210
           4       0.00      0.00      0.00       210
           5       0.23      0.94      0.37       210
           6       0.41      0.33      0.36       210
           7       0.63      0.78      0.70       210

    accuracy                           0.38      1470
   macro avg       0.28      0.38      0.30      1470
weighted avg       0.28      0.38      0.30      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.40068027210884355
Confusion Matrix of Decision Tree is:
 [[166   9   4   8   0  19   4]
 [100  72   3   8   0  22   5]
 [ 44  19 103  28   0  16   0]
 [159  13   4  17   0  14   3]
 [184  13   1   3   0   5   4]
 [ 64   8   1   2   0  82  53]
 [ 20   1   0   0   0  40 149]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.23      0.79      0.35       210
           2       0.53      0.34      0.42       210
           3       0.89      0.49      0.63       210
           4       0.26      0.08      0.12       210
           5       0.00      0.00      0.00       210
           6       0.41      0.39      0.40       210
           7       0.68      0.71      0.70       210

    accuracy                           0.40      1470
   macro avg       0.43      0.40      0.37      1470
weighted avg       0.43      0.40      0.37      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4714285714285714
Confusion Matrix of Decision Tree is:
 [[110   5   9   8  56  15   7]
 [ 13  70   8   8  87  20   4]
 [  2   6 126  23  42  10   1]
 [ 21  13   4  17 138  17   0]
 [ 51  12   3   3 133   5   3]
 [ 19   6   7   2  45  83  48]
 [ 15   1   0   0   5  35 154]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.48      0.52      0.50       210
           2       0.62      0.33      0.43       210
           3       0.80      0.60      0.69       210
           4       0.28      0.08      0.13       210
           5       0.26      0.63      0.37       210
           6       0.45      0.40      0.42       210
           7       0.71      0.73      0.72       210

    accuracy                           0.47      1470
   macro avg       0.51      0.47      0.47      1470
weighted avg       0.51      0.47      0.47      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5224489795918368
Confusion Matrix of Decision Tree is:
 [[105  19   9  23  34  15   5]
 [ 16 111  12  26  19  22   4]
 [  8  14 160  12   7   7   2]
 [ 20  31   8  94  35  20   2]
 [ 28  35   6  38  86  15   2]
 [ 21  25   4  31  10  79  40]
 [  6   6   1   8   4  52 133]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.51      0.50      0.51       210
           2       0.46      0.53      0.49       210
           3       0.80      0.76      0.78       210
           4       0.41      0.45      0.43       210
           5       0.44      0.41      0.42       210
           6       0.38      0.38      0.38       210
           7       0.71      0.63      0.67       210

    accuracy                           0.52      1470
   macro avg       0.53      0.52      0.52      1470
weighted avg       0.53      0.52      0.52      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5108843537414965
Confusion Matrix of Decision Tree is:
 [[104  22   7  24  38   7   8]
 [ 21 103  11  26  24  21   4]
 [  7  16 162  11   6   7   1]
 [ 19  30  11  90  34  24   2]
 [ 34  33   4  38  81  15   5]
 [ 24  20   8  27  13  80  38]
 [  4   7   2   6  10  50 131]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.50      0.49       210
           2       0.45      0.49      0.47       210
           3       0.79      0.77      0.78       210
           4       0.41      0.43      0.42       210
           5       0.39      0.39      0.39       210
           6       0.39      0.38      0.39       210
           7       0.69      0.62      0.66       210

    accuracy                           0.51      1470
   macro avg       0.52      0.51      0.51      1470
weighted avg       0.52      0.51      0.51      1470

Accuracy of Decision Tree afte

Accuracy of Random Forest after Standard Scaling is: 0.6346938775510204
Confusion Matrix of Random Forest is:
 [[122   9   0  13  37  25   4]
 [  3 123   2  26  30  22   4]
 [  5  12 163  11   9  10   0]
 [  7  11   4 128  36  21   3]
 [ 18  17   0  39 121  10   5]
 [  9  15   2  28   6  90  60]
 [  0   4   0   1   0  19 186]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.58      0.65       210
           2       0.64      0.59      0.61       210
           3       0.95      0.78      0.86       210
           4       0.52      0.61      0.56       210
           5       0.51      0.58      0.54       210
           6       0.46      0.43      0.44       210
           7       0.71      0.89      0.79       210

    accuracy                           0.63      1470
   macro avg       0.65      0.63      0.64      1470
weighted avg       0.65      0.63      0.64      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6789115646258503
Confusion Matrix of Random Forest is:
 [[135   6   2   9  35  20   3]
 [  2 134   5  22  25  18   4]
 [  4   8 173  12   5   8   0]
 [ 10   7   3 138  30  20   2]
 [ 22  17   0  35 124   9   3]
 [ 13  11   1  23   4 108  50]
 [  0   2   0   1   0  21 186]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.73      0.64      0.68       210
           2       0.72      0.64      0.68       210
           3       0.94      0.82      0.88       210
           4       0.57      0.66      0.61       210
           5       0.56      0.59      0.57       210
           6       0.53      0.51      0.52       210
           7       0.75      0.89      0.81       210

    accuracy                           0.68      1470
   macro avg       0.69      0.68      0.68      1470
weighted avg       0.69      0.68      0.68      1470

Random Forest with 16 max_dept

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_vbert.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7244897959183674
Confusion Matrix of Logistic Regression is:
 [[165   1   1   2  25  15   1]
 [  3 157   2  14  15  18   1]
 [  0   5 191  10   2   2   0]
 [  5  16   5 136  30  15   3]
 [ 34  16   1  30 118   5   6]
 [  8  15   6  23   7 119  32]
 [  0   0   0   1   3  27 179]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.77      0.79      0.78       210
           2       0.75      0.75      0.75       210
           3       0.93      0.91      0.92       210
           4       0.63      0.65      0.64       210
           5       0.59      0.56      0.58       210
           6       0.59      0.57      0.58       210
           7       0.81      0.85      0.83       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7217687074829932
Confusion Matrix of SVM is:
 [[167   2   1   2  27  11   0]
 [  3 162   6  10  13  16   0]
 [  0   8 194   7   0   1   0]
 [  4  18   9 141  21  14   3]
 [ 42  15   2  26 116   5   4]
 [ 13  21   6  28   5 104  33]
 [  4   0   0   1   3  25 177]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.72      0.80      0.75       210
           2       0.72      0.77      0.74       210
           3       0.89      0.92      0.91       210
           4       0.66      0.67      0.66       210
           5       0.63      0.55      0.59       210
           6       0.59      0.50      0.54       210
           7       0.82      0.84      0.83       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3619047619047619
Confusion Matrix of Decision Tree is:
 [[  0   0   1   5 183   0  21]
 [  0   0  12  34 121   0  43]
 [  0   0 132  42  29   0   7]
 [  0   0   9  36 138   0  27]
 [  0   0   1   3 189   0  17]
 [  0   0   4  23  76   0 107]
 [  0   0   1   2  32   0 175]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.82      0.63      0.71       210
           4       0.25      0.17      0.20       210
           5       0.25      0.90      0.39       210
           6       0.00      0.00      0.00       210
           7       0.44      0.83      0.58       210

    accuracy                           0.36      1470
   macro avg       0.25      0.36      0.27      1470
weighted avg       0.25      0.36      0.27      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.43537414965986393
Confusion Matrix of Decision Tree is:
 [[ 67   4   1   1 116  13   8]
 [  4  36   7   3 117  30  13]
 [  0  41 127   6  29   5   2]
 [  8  18   4  23 130   8  19]
 [ 11   1   1   2 178   7  10]
 [  2  10   4  13  74  51  56]
 [  0   1   1   1  32  17 158]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.73      0.32      0.44       210
           2       0.32      0.17      0.22       210
           3       0.88      0.60      0.72       210
           4       0.47      0.11      0.18       210
           5       0.26      0.85      0.40       210
           6       0.39      0.24      0.30       210
           7       0.59      0.75      0.66       210

    accuracy                           0.44      1470
   macro avg       0.52      0.44      0.42      1470
weighted avg       0.52      0.44      0.42      1470

Decision Tree with 4 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5095238095238095
Confusion Matrix of Decision Tree is:
 [[125   9   6  11  46   7   6]
 [ 16 114  10  12  30  22   6]
 [  3  18 155  16   7   9   2]
 [ 22  27  10  68  51  22  10]
 [ 49  13   3  25  98  11  11]
 [ 24  27  13  28  12  57  49]
 [ 10   5   2  13   8  40 132]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.60      0.54       210
           2       0.54      0.54      0.54       210
           3       0.78      0.74      0.76       210
           4       0.39      0.32      0.36       210
           5       0.39      0.47      0.42       210
           6       0.34      0.27      0.30       210
           7       0.61      0.63      0.62       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Decision Tree with 12 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5054421768707483
Confusion Matrix of Decision Tree is:
 [[113   8   3  22  48  11   5]
 [ 11 116   7  19  28  23   6]
 [  1  21 155  12  12   8   1]
 [ 27  32  13  70  43  17   8]
 [ 36  20   3  30  96  11  14]
 [ 21  16  15  36  17  63  42]
 [  6  10   1  11  10  42 130]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.54      0.53       210
           2       0.52      0.55      0.54       210
           3       0.79      0.74      0.76       210
           4       0.35      0.33      0.34       210
           5       0.38      0.46      0.41       210
           6       0.36      0.30      0.33       210
           7       0.63      0.62      0.62       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Decision Tree with 20 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.6659863945578232
Confusion Matrix of Random Forest is:
 [[121   8   4   5  47  13  12]
 [  1 141   9  15  23  11  10]
 [  0  24 173   3   4   5   1]
 [  2  13   8 119  45  11  12]
 [  3  18   0  14 159   4  12]
 [  3  20  10  12  15  72  78]
 [  0   3   0   1   4   8 194]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.93      0.58      0.71       210
           2       0.62      0.67      0.65       210
           3       0.85      0.82      0.84       210
           4       0.70      0.57      0.63       210
           5       0.54      0.76      0.63       210
           6       0.58      0.34      0.43       210
           7       0.61      0.92      0.73       210

    accuracy                           0.67      1470
   macro avg       0.69      0.67      0.66      1470
weighted avg       0.69      0.67      0.66      1470

Random Forest with 7 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6959183673469388
Confusion Matrix of Random Forest is:
 [[131   4   3   5  42  20   5]
 [  1 147   8  14  19  17   4]
 [  0  11 184   6   4   5   0]
 [  2   9   7 127  42  15   8]
 [ 10  17   1  18 147   9   8]
 [  5  14   7  23   6 100  55]
 [  1   1   0   2   2  17 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.87      0.62      0.73       210
           2       0.72      0.70      0.71       210
           3       0.88      0.88      0.88       210
           4       0.65      0.60      0.63       210
           5       0.56      0.70      0.62       210
           6       0.55      0.48      0.51       210
           7       0.70      0.89      0.78       210

    accuracy                           0.70      1470
   macro avg       0.70      0.70      0.69      1470
weighted avg       0.70      0.70      0.69      1470

Random Forest with 15 max_dept

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//gpt_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7272108843537415
Confusion Matrix of Logistic Regression is:
 [[158   4   1   3  24  17   3]
 [  3 156   5  17  12  17   0]
 [  2   3 197   1   2   5   0]
 [  3  13   7 146  18  20   3]
 [ 30  12   1  30 128   6   3]
 [  9  17  11  24  11 106  32]
 [  4   1   0   3   3  21 178]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.76      0.75      0.75       210
           2       0.76      0.74      0.75       210
           3       0.89      0.94      0.91       210
           4       0.65      0.70      0.67       210
           5       0.65      0.61      0.63       210
           6       0.55      0.50      0.53       210
           7       0.81      0.85      0.83       210

    accuracy                           0.73      1470
   macro avg       0.72      0.73      0.72      1470
weighted avg       0.72      0.73      0.72      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5714285714285714
Confusion Matrix of Bernoulli Naive Bayes is:
 [[108   3  14   6  27  35  17]
 [  0 121  18   8  27  27   9]
 [ 21  11 150  16   0  12   0]
 [  7  14   8  88  32  32  29]
 [ 19  12   1  14 125  14  25]
 [ 15  19   7  22   7  77  63]
 [  0   7   0   1   3  28 171]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.64      0.51      0.57       210
           2       0.65      0.58      0.61       210
           3       0.76      0.71      0.74       210
           4       0.57      0.42      0.48       210
           5       0.57      0.60      0.58       210
           6       0.34      0.37      0.35       210
           7       0.54      0.81      0.65       210

    accuracy                           0.57      1470
   macro avg       0.58      0.57      0.57      1470
weighted avg       0.58      0.57      0.57      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3326530612244898
Confusion Matrix of Decision Tree is:
 [[  0   0  24   0 140   0  46]
 [  0   0  11   0 154   0  45]
 [  0   0 152   0  42   0  16]
 [  0   0  17   0 123   0  70]
 [  0   0   3   0 170   0  37]
 [  0   0  19   0  91   0 100]
 [  0   0  11   0  32   0 167]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.64      0.72      0.68       210
           4       0.00      0.00      0.00       210
           5       0.23      0.81      0.35       210
           6       0.00      0.00      0.00       210
           7       0.35      0.80      0.48       210

    accuracy                           0.33      1470
   macro avg       0.17      0.33      0.22      1470
weighted avg       0.17      0.33      0.22      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.408843537414966
Confusion Matrix of Decision Tree is:
 [[106  50   8   0   0  23  23]
 [  3 152   9   1   0  33  12]
 [  3  40 143   8   0  14   2]
 [ 10 113  15   2   0  40  30]
 [ 26 145   1   1   0  16  21]
 [ 17  78  14   1   0  65  35]
 [ 14  27   2   0   0  34 133]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.59      0.50      0.54       210
           2       0.25      0.72      0.37       210
           3       0.74      0.68      0.71       210
           4       0.15      0.01      0.02       210
           5       0.00      0.00      0.00       210
           6       0.29      0.31      0.30       210
           7       0.52      0.63      0.57       210

    accuracy                           0.41      1470
   macro avg       0.36      0.41      0.36      1470
weighted avg       0.36      0.41      0.36      1470

Decision Tree with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.45918367346938777
Confusion Matrix of Decision Tree is:
 [[105  15   5   8  40  18  19]
 [  6 105   5   3  61  20  10]
 [ 16  30 125  13  24   2   0]
 [ 10  32  10  23  96  25  14]
 [ 26  20   0   5 127  15  17]
 [ 15  28  12   5  61  55  34]
 [  5   3   1   8  25  33 135]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.57      0.50      0.53       210
           2       0.45      0.50      0.47       210
           3       0.79      0.60      0.68       210
           4       0.35      0.11      0.17       210
           5       0.29      0.60      0.39       210
           6       0.33      0.26      0.29       210
           7       0.59      0.64      0.62       210

    accuracy                           0.46      1470
   macro avg       0.48      0.46      0.45      1470
weighted avg       0.48      0.46      0.45      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5054421768707483
Confusion Matrix of Decision Tree is:
 [[116  11   2  14  28  28  11]
 [ 20 106   6  18  27  25   8]
 [  6  14 153  17   4  15   1]
 [ 26  21  13  70  35  31  14]
 [ 34  16   1  34 106  13   6]
 [ 15  26  11  35  25  62  36]
 [ 19   2   2   8  14  35 130]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.55      0.52       210
           2       0.54      0.50      0.52       210
           3       0.81      0.73      0.77       210
           4       0.36      0.33      0.34       210
           5       0.44      0.50      0.47       210
           6       0.30      0.30      0.30       210
           7       0.63      0.62      0.62       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5
Confusion Matrix of Decision Tree is:
 [[121  10   4  13  28  28   6]
 [ 14 107  11  19  26  24   9]
 [  4  22 154  14   4  11   1]
 [ 17  30  11  74  34  26  18]
 [ 41  19   3  31  92  17   7]
 [ 20  26  12  33  24  60  35]
 [ 11   4   2  16  15  35 127]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.58      0.55       210
           2       0.49      0.51      0.50       210
           3       0.78      0.73      0.76       210
           4       0.37      0.35      0.36       210
           5       0.41      0.44      0.42       210
           6       0.30      0.29      0.29       210
           7       0.63      0.60      0.62       210

    accuracy                           0.50      1470
   macro avg       0.50      0.50      0.50      1470
weighted avg       0.50      0.50      0.50      1470

Accuracy of Decision Tree after Standard Scal

Accuracy of Random Forest after Standard Scaling is: 0.6666666666666666
Confusion Matrix of Random Forest is:
 [[129   3   1   1  29  29  18]
 [  2 145   5  11  21  19   7]
 [  0   3 186  10   4   6   1]
 [  1   7   6 115  31  34  16]
 [ 18  12   1  17 127  17  18]
 [  6  17   5  24   6  83  69]
 [  0   0   0   2   2  11 195]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.83      0.61      0.70       210
           2       0.78      0.69      0.73       210
           3       0.91      0.89      0.90       210
           4       0.64      0.55      0.59       210
           5       0.58      0.60      0.59       210
           6       0.42      0.40      0.41       210
           7       0.60      0.93      0.73       210

    accuracy                           0.67      1470
   macro avg       0.68      0.67      0.66      1470
weighted avg       0.68      0.67      0.66      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6965986394557823
Confusion Matrix of Random Forest is:
 [[132   2   2   4  31  29  10]
 [  5 155   4  10  17  15   4]
 [  0   2 192   8   4   4   0]
 [  2   8   3 137  23  27  10]
 [ 20  17   0  26 127  10  10]
 [ 12  12   6  23   6  91  60]
 [  0   1   0   0   3  16 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.77      0.63      0.69       210
           2       0.79      0.74      0.76       210
           3       0.93      0.91      0.92       210
           4       0.66      0.65      0.66       210
           5       0.60      0.60      0.60       210
           6       0.47      0.43      0.45       210
           7       0.67      0.90      0.77       210

    accuracy                           0.70      1470
   macro avg       0.70      0.70      0.69      1470
weighted avg       0.70      0.70      0.69      1470

Random Forest with 16 max_dept

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//xlm_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7476190476190476
Confusion Matrix of Logistic Regression is:
 [[161   1   2   4  31   8   3]
 [  4 175   2   6  11  11   1]
 [  0   4 200   1   0   5   0]
 [  5  16   2 143  20  21   3]
 [ 34  16   1  22 130   3   4]
 [ 13  10   8  29  12 118  20]
 [  0   1   1   4   2  30 172]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.74      0.77      0.75       210
           2       0.78      0.83      0.81       210
           3       0.93      0.95      0.94       210
           4       0.68      0.68      0.68       210
           5       0.63      0.62      0.62       210
           6       0.60      0.56      0.58       210
           7       0.85      0.82      0.83       210

    accuracy                           0.75      1470
   macro avg       0.75      0.75      0.75      1470
weighted avg       0.75      0.75      0.75      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.6108843537414966
Confusion Matrix of Bernoulli Naive Bayes is:
 [[132   1   1   6  37  26   7]
 [  2 126  18  10  24  20  10]
 [  6   7 163  14   3  17   0]
 [  3  19  13  75  41  42  17]
 [ 29  18   3  12 124   4  20]
 [ 11  11  10  15   5  97  61]
 [  0   0   0   1   3  25 181]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.72      0.63      0.67       210
           2       0.69      0.60      0.64       210
           3       0.78      0.78      0.78       210
           4       0.56      0.36      0.44       210
           5       0.52      0.59      0.55       210
           6       0.42      0.46      0.44       210
           7       0.61      0.86      0.72       210

    accuracy                           0.61      1470
   macro avg       0.62      0.61      0.61      1470
weighted avg       0.62      0.61      0.61      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3448979591836735
Confusion Matrix of Decision Tree is:
 [[  0   0   0   0 183   0  27]
 [  0   0   2   0 187   0  21]
 [  0   0 120   0  72   0  18]
 [  0   0   2   2 168   0  38]
 [  0   0   0   0 197   0  13]
 [  0   0   2   0  74   0 134]
 [  0   0   2   0  20   0 188]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.94      0.57      0.71       210
           4       1.00      0.01      0.02       210
           5       0.22      0.94      0.35       210
           6       0.00      0.00      0.00       210
           7       0.43      0.90      0.58       210

    accuracy                           0.34      1470
   macro avg       0.37      0.34      0.24      1470
weighted avg       0.37      0.34      0.24      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.46258503401360546
Confusion Matrix of Decision Tree is:
 [[160  23   0   0   0  10  17]
 [ 18 169   2   0   0  10  11]
 [  7  65 120   0   0  16   2]
 [ 56 112   1   2   0  15  24]
 [131  66   0   0   0   1  12]
 [ 42  32   2   0   0  49  85]
 [ 18   2   0   0   0  10 180]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.37      0.76      0.50       210
           2       0.36      0.80      0.50       210
           3       0.96      0.57      0.72       210
           4       1.00      0.01      0.02       210
           5       0.00      0.00      0.00       210
           6       0.44      0.23      0.31       210
           7       0.54      0.86      0.67       210

    accuracy                           0.46      1470
   macro avg       0.53      0.46      0.39      1470
weighted avg       0.53      0.46      0.39      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.5149659863945578
Confusion Matrix of Decision Tree is:
 [[ 96  10   0  15  64  19   6]
 [  1 140   1  35  17  15   1]
 [  0  18 120  55   7   9   1]
 [  1  43   1  72  55  33   5]
 [ 15  35   0  31 116   9   4]
 [  4  31   0  13  38  86  38]
 [  0   3   0   1  18  61 127]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.82      0.46      0.59       210
           2       0.50      0.67      0.57       210
           3       0.98      0.57      0.72       210
           4       0.32      0.34      0.33       210
           5       0.37      0.55      0.44       210
           6       0.37      0.41      0.39       210
           7       0.70      0.60      0.65       210

    accuracy                           0.51      1470
   macro avg       0.58      0.51      0.53      1470
weighted avg       0.58      0.51      0.53      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.5727891156462585
Confusion Matrix of Decision Tree is:
 [[122   6   1  12  48  14   7]
 [  8 118  12  29  20  19   4]
 [  4   5 180  15   3   3   0]
 [ 16  17   4  96  41  23  13]
 [ 42  33   0  22  98  11   4]
 [ 21  19   6  29   8  82  45]
 [  7   6   4   9   3  35 146]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.58      0.57       210
           2       0.58      0.56      0.57       210
           3       0.87      0.86      0.86       210
           4       0.45      0.46      0.45       210
           5       0.44      0.47      0.45       210
           6       0.44      0.39      0.41       210
           7       0.67      0.70      0.68       210

    accuracy                           0.57      1470
   macro avg       0.57      0.57      0.57      1470
weighted avg       0.57      0.57      0.57      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.5598639455782313
Confusion Matrix of Decision Tree is:
 [[118   6   2  16  45  14   9]
 [  7 116  12  29  23  20   3]
 [  2   7 177   9   6   7   2]
 [ 22  12  10  94  32  28  12]
 [ 47  25   1  24  92  17   4]
 [ 16  18   9  30   8  79  50]
 [  7   5   0   8   6  37 147]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.54      0.56      0.55       210
           2       0.61      0.55      0.58       210
           3       0.84      0.84      0.84       210
           4       0.45      0.45      0.45       210
           5       0.43      0.44      0.44       210
           6       0.39      0.38      0.38       210
           7       0.65      0.70      0.67       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Accuracy of Decision Tree afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after Standard Scaling is: 0.5414965986394558
Confusion Matrix of Random Forest is:
 [[139   2   1   0  31   6  31]
 [  2 139   6   0  34   4  25]
 [  1  14 175   0   8   6   6]
 [  6  38  27   2  73   8  56]
 [ 41  20   2   0 122   1  24]
 [ 13  10  14   1  18  11 143]
 [  0   0   0   0   2   0 208]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.66      0.67       210
           2       0.62      0.66      0.64       210
           3       0.78      0.83      0.80       210
           4       0.67      0.01      0.02       210
           5       0.42      0.58      0.49       210
           6       0.31      0.05      0.09       210
           7       0.42      0.99      0.59       210

    accuracy                           0.54      1470
   macro avg       0.56      0.54      0.47      1470
weighted avg       0.56      0.54      0.47      1470

Random Forest with 3 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.7163265306122449
Confusion Matrix of Random Forest is:
 [[139   2   1   6  38  19   5]
 [  1 165   2   9  12  18   3]
 [  0   7 184   9   4   6   0]
 [  1  11   3 120  31  35   9]
 [ 12  18   0  15 150   4  11]
 [  5  12   3  18   8 112  52]
 [  0   0   0   0   2  25 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.88      0.66      0.76       210
           2       0.77      0.79      0.78       210
           3       0.95      0.88      0.91       210
           4       0.68      0.57      0.62       210
           5       0.61      0.71      0.66       210
           6       0.51      0.53      0.52       210
           7       0.70      0.87      0.77       210

    accuracy                           0.72      1470
   macro avg       0.73      0.72      0.72      1470
weighted avg       0.73      0.72      0.72      1470

Random Forest with 11 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.726530612244898
Confusion Matrix of Random Forest is:
 [[146   1   1   8  30  19   5]
 [  4 165   1   9  11  16   4]
 [  0   4 183  11   4   8   0]
 [  1   9   2 127  31  31   9]
 [ 17  17   0  17 145   3  11]
 [  6   5   3  24   8 118  46]
 [  0   0   0   0   2  24 184]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.84      0.70      0.76       210
           2       0.82      0.79      0.80       210
           3       0.96      0.87      0.91       210
           4       0.65      0.60      0.63       210
           5       0.63      0.69      0.66       210
           6       0.54      0.56      0.55       210
           7       0.71      0.88      0.78       210

    accuracy                           0.73      1470
   macro avg       0.74      0.73      0.73      1470
weighted avg       0.74      0.73      0.73      1470

Random Forest with 19 max_depth

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//bert_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6333333333333333
Confusion Matrix of Logistic Regression is:
 [[143   5   1  10  35  11   5]
 [  9 134   9  16  20  17   5]
 [  2  13 174  13   1   6   1]
 [  4  20   6 112  25  34   9]
 [ 37  18   3  23 109  15   5]
 [ 16  18   5  31  11  96  33]
 [  4   5   1   3   2  32 163]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.67      0.68      0.67       210
           2       0.63      0.64      0.63       210
           3       0.87      0.83      0.85       210
           4       0.54      0.53      0.54       210
           5       0.54      0.52      0.53       210
           6       0.45      0.46      0.46       210
           7       0.74      0.78      0.76       210

    accuracy                           0.63      1470
   macro avg       0.63      0.63      0.63      1470
weighted avg       0.63      0.63      0.63      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.6224489795918368
Confusion Matrix of SVM is:
 [[143   6   3  12  31  10   5]
 [ 12 143  12  19  11  11   2]
 [  4  15 177   7   1   4   2]
 [ 12  26   8 112  17  27   8]
 [ 46  21   2  25  96  13   7]
 [ 16  19   9  36  12  85  33]
 [  3   5   0   4   7  32 159]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.61      0.68      0.64       210
           2       0.61      0.68      0.64       210
           3       0.84      0.84      0.84       210
           4       0.52      0.53      0.53       210
           5       0.55      0.46      0.50       210
           6       0.47      0.40      0.43       210
           7       0.74      0.76      0.75       210

    accuracy                           0.62      1470
   macro avg       0.62      0.62      0.62      1470
weighted avg       0.62      0.62      0.62      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.29863945578231293
Confusion Matrix of Decision Tree is:
 [[ 18 165   0   0   0   0  27]
 [  6 176   3   0   0   0  25]
 [  0 114  92   0   0   0   4]
 [  1 150   0   0   0   0  59]
 [  0 157   0   0   0   0  53]
 [  4 134   0   0   0   0  72]
 [  0  57   0   0   0   0 153]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.62      0.09      0.15       210
           2       0.18      0.84      0.30       210
           3       0.97      0.44      0.60       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.39      0.73      0.51       210

    accuracy                           0.30      1470
   macro avg       0.31      0.30      0.22      1470
weighted avg       0.31      0.30      0.22      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.32857142857142857
Confusion Matrix of Decision Tree is:
 [[ 14  62   0   0 103   4  27]
 [  2  86   3   0  90   4  25]
 [  0  64  91   0  51   0   4]
 [  0  44   0   0 106   1  59]
 [  0  21   0   0 136   0  53]
 [  1  50   0   0  84   3  72]
 [  0  16   0   0  41   0 153]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.82      0.07      0.12       210
           2       0.25      0.41      0.31       210
           3       0.97      0.43      0.60       210
           4       0.00      0.00      0.00       210
           5       0.22      0.65      0.33       210
           6       0.25      0.01      0.03       210
           7       0.39      0.73      0.51       210

    accuracy                           0.33      1470
   macro avg       0.41      0.33      0.27      1470
weighted avg       0.41      0.33      0.27      1470

Decision Tree with 4 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3585034013605442
Confusion Matrix of Decision Tree is:
 [[ 62  16   0  63  46   2  21]
 [ 16  75   3  68  43   1   4]
 [ 19  45  91  23  30   0   2]
 [ 13  32   0  73  65   0  27]
 [  7  14   0  66  97   0  26]
 [ 13  39   0  82  26   2  48]
 [  5  11   0  56  11   0 127]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.30      0.36       210
           2       0.32      0.36      0.34       210
           3       0.97      0.43      0.60       210
           4       0.17      0.35      0.23       210
           5       0.31      0.46      0.37       210
           6       0.40      0.01      0.02       210
           7       0.50      0.60      0.55       210

    accuracy                           0.36      1470
   macro avg       0.45      0.36      0.35      1470
weighted avg       0.45      0.36      0.35      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.39931972789115644
Confusion Matrix of Decision Tree is:
 [[ 85  17  13  23  30  29  13]
 [ 23  81  20  24  11  43   8]
 [  7  21 152  13   7   8   2]
 [ 22  29  14  51  43  32  19]
 [ 37  30   8  39  58  26  12]
 [ 28  29  10  33  15  60  35]
 [ 17  15   1  17  16  44 100]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.39      0.40      0.40       210
           2       0.36      0.39      0.38       210
           3       0.70      0.72      0.71       210
           4       0.26      0.24      0.25       210
           5       0.32      0.28      0.30       210
           6       0.25      0.29      0.27       210
           7       0.53      0.48      0.50       210

    accuracy                           0.40      1470
   macro avg       0.40      0.40      0.40      1470
weighted avg       0.40      0.40      0.40      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.39319727891156464
Confusion Matrix of Decision Tree is:
 [[ 85  18   9  15  30  29  24]
 [ 23  80  27  25  15  31   9]
 [ 12  11 152  13  11   9   2]
 [ 27  16  18  49  52  26  22]
 [ 38  24   9  33  66  26  14]
 [ 26  25  10  38  16  52  43]
 [ 25  11   1  15  24  40  94]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.36      0.40      0.38       210
           2       0.43      0.38      0.41       210
           3       0.67      0.72      0.70       210
           4       0.26      0.23      0.25       210
           5       0.31      0.31      0.31       210
           6       0.24      0.25      0.25       210
           7       0.45      0.45      0.45       210

    accuracy                           0.39      1470
   macro avg       0.39      0.39      0.39      1470
weighted avg       0.39      0.39      0.39      1470

Accuracy of Decision Tree aft

Accuracy of Random Forest after Standard Scaling is: 0.5326530612244897
Confusion Matrix of Random Forest is:
 [[ 95  12   8  12  42  10  31]
 [  4 129   8  13  30  15  11]
 [  3  42 152   5   3   4   1]
 [  4  28   5  64  50  14  45]
 [ 10  22   1  10 133   4  30]
 [  7  39   5  19  23  33  84]
 [  2   7   0   9   8   7 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.76      0.45      0.57       210
           2       0.46      0.61      0.53       210
           3       0.85      0.72      0.78       210
           4       0.48      0.30      0.37       210
           5       0.46      0.63      0.53       210
           6       0.38      0.16      0.22       210
           7       0.47      0.84      0.60       210

    accuracy                           0.53      1470
   macro avg       0.55      0.53      0.52      1470
weighted avg       0.55      0.53      0.52      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.5829931972789115
Confusion Matrix of Random Forest is:
 [[114  10   7   8  43  11  17]
 [  6 118  14  19  19  29   5]
 [  1  34 161   6   1   6   1]
 [  5  22   6  87  42  26  22]
 [ 14  16   1  13 138   7  21]
 [ 11  23   8  23  16  65  64]
 [  3   4   1   3   6  19 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.54      0.63       210
           2       0.52      0.56      0.54       210
           3       0.81      0.77      0.79       210
           4       0.55      0.41      0.47       210
           5       0.52      0.66      0.58       210
           6       0.40      0.31      0.35       210
           7       0.57      0.83      0.68       210

    accuracy                           0.58      1470
   macro avg       0.59      0.58      0.58      1470
weighted avg       0.59      0.58      0.58      1470

Random Forest with 16 max_dept

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.6333333333333333
Confusion Matrix of Logistic Regression is:
 [[133   8   7   8  32  19   3]
 [ 11 144   5   6  21  20   3]
 [  6  12 175   7   4   6   0]
 [ 16  15   5 123  33  15   3]
 [ 37  17   3  31 101  12   9]
 [ 17  16  14  24   9  98  32]
 [  5   3   0   2   4  39 157]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.59      0.63      0.61       210
           2       0.67      0.69      0.68       210
           3       0.84      0.83      0.84       210
           4       0.61      0.59      0.60       210
           5       0.50      0.48      0.49       210
           6       0.47      0.47      0.47       210
           7       0.76      0.75      0.75       210

    accuracy                           0.63      1470
   macro avg       0.63      0.63      0.63      1470
weighted avg       0.63      0.63      0.63      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.43673469387755104
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 54  11   2  10  68  51  14]
 [  3  71   3  12  47  62  12]
 [ 17   8  79  32  33  39   2]
 [ 10  10   1  41 105  31  12]
 [  7  12   0   8 147  25  11]
 [  9  13   3  18  17  70  80]
 [  1   5   0   0   2  22 180]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.53      0.26      0.35       210
           2       0.55      0.34      0.42       210
           3       0.90      0.38      0.53       210
           4       0.34      0.20      0.25       210
           5       0.35      0.70      0.47       210
           6       0.23      0.33      0.27       210
           7       0.58      0.86      0.69       210

    accuracy                           0.44      1470
   macro avg       0.50      0.44      0.43      1470
weighted avg       0.50      0.44      0.43      1470

Worki

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.29523809523809524
Confusion Matrix of Decision Tree is:
 [[  0   5  17   0 178   0  10]
 [  0  10   8   0 183   0   9]
 [  0  28  87   0  94   0   1]
 [  0   1   5   0 192   0  12]
 [  0   0   0   0 200   0  10]
 [  0   7   7   0 136   0  60]
 [  0   0   0   0  73   0 137]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.20      0.05      0.08       210
           3       0.70      0.41      0.52       210
           4       0.00      0.00      0.00       210
           5       0.19      0.95      0.32       210
           6       0.00      0.00      0.00       210
           7       0.57      0.65      0.61       210

    accuracy                           0.30      1470
   macro avg       0.24      0.30      0.22      1470
weighted avg       0.24      0.30      0.22      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3585034013605442
Confusion Matrix of Decision Tree is:
 [[  5  79  16   0  99   1  10]
 [  3 130   7   0  60   1   9]
 [ 27  48  87   0  47   0   1]
 [  1  40   5   0 152   0  12]
 [  0  32   0   0 168   0  10]
 [  6 108   7   0  29   0  60]
 [  0  60   0   0  13   0 137]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.12      0.02      0.04       210
           2       0.26      0.62      0.37       210
           3       0.71      0.41      0.52       210
           4       0.00      0.00      0.00       210
           5       0.30      0.80      0.43       210
           6       0.00      0.00      0.00       210
           7       0.57      0.65      0.61       210

    accuracy                           0.36      1470
   macro avg       0.28      0.36      0.28      1470
weighted avg       0.28      0.36      0.28      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3979591836734694
Confusion Matrix of Decision Tree is:
 [[  3  44  16   6  95  37   9]
 [  0  98   7   7  55  35   8]
 [  0  29  87   7  40  46   1]
 [  0  11   5  25 131  30   8]
 [  0  13   0   9 163  19   6]
 [  1  37   7  14  25  81  45]
 [  0  18   0   6  13  45 128]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.75      0.01      0.03       210
           2       0.39      0.47      0.43       210
           3       0.71      0.41      0.52       210
           4       0.34      0.12      0.18       210
           5       0.31      0.78      0.45       210
           6       0.28      0.39      0.32       210
           7       0.62      0.61      0.62       210

    accuracy                           0.40      1470
   macro avg       0.49      0.40      0.36      1470
weighted avg       0.49      0.40      0.36      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.4204081632653061
Confusion Matrix of Decision Tree is:
 [[ 76  20  18  29  37  19  11]
 [ 26  93  15  12  30  27   7]
 [ 15  22 117  10   6  40   0]
 [ 22  19   8  63  71  20   7]
 [ 35  27   5  22 100  10  11]
 [ 31  22  17  24  20  53  43]
 [ 16  12   0   7   8  51 116]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.36      0.35       210
           2       0.43      0.44      0.44       210
           3       0.65      0.56      0.60       210
           4       0.38      0.30      0.33       210
           5       0.37      0.48      0.41       210
           6       0.24      0.25      0.25       210
           7       0.59      0.55      0.57       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.42      1470
weighted avg       0.43      0.42      0.42      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.42653061224489797
Confusion Matrix of Decision Tree is:
 [[ 81  17  20  29  32  18  13]
 [ 24  83  16  16  36  27   8]
 [ 14  15 128   7  10  36   0]
 [ 23  18   9  69  57  23  11]
 [ 31  28   4  25  87  24  11]
 [ 30  21  14  23  22  58  42]
 [ 10  11   0   5  10  53 121]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.39      0.38       210
           2       0.43      0.40      0.41       210
           3       0.67      0.61      0.64       210
           4       0.40      0.33      0.36       210
           5       0.34      0.41      0.38       210
           6       0.24      0.28      0.26       210
           7       0.59      0.58      0.58       210

    accuracy                           0.43      1470
   macro avg       0.44      0.43      0.43      1470
weighted avg       0.44      0.43      0.43      1470

Accuracy of Decision Tree aft

Accuracy of Random Forest after Standard Scaling is: 0.5904761904761905
Confusion Matrix of Random Forest is:
 [[ 93  16   5  21  42  24   9]
 [  1 132   9   9  25  23  11]
 [  9  16 154  11   6  14   0]
 [ 10  14   1  98  52  25  10]
 [  6  18   0  16 141  18  11]
 [ 12  22   9  13  14  66  74]
 [  0   4   1   0   3  18 184]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.71      0.44      0.55       210
           2       0.59      0.63      0.61       210
           3       0.86      0.73      0.79       210
           4       0.58      0.47      0.52       210
           5       0.50      0.67      0.57       210
           6       0.35      0.31      0.33       210
           7       0.62      0.88      0.72       210

    accuracy                           0.59      1470
   macro avg       0.60      0.59      0.58      1470
weighted avg       0.60      0.59      0.58      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6095238095238096
Confusion Matrix of Random Forest is:
 [[104  14   4  15  42  23   8]
 [  5 132   6  13  24  24   6]
 [  9   9 167   9   5  11   0]
 [ 14   9   4 104  45  26   8]
 [ 18  12   1  20 134  16   9]
 [ 12  14  10  17  12  79  66]
 [  2   2   1   1   3  25 176]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.63      0.50      0.56       210
           2       0.69      0.63      0.66       210
           3       0.87      0.80      0.83       210
           4       0.58      0.50      0.53       210
           5       0.51      0.64      0.56       210
           6       0.39      0.38      0.38       210
           7       0.64      0.84      0.73       210

    accuracy                           0.61      1470
   macro avg       0.62      0.61      0.61      1470
weighted avg       0.62      0.61      0.61      1470

Random Forest with 16 max_dept

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7108843537414966
Confusion Matrix of Logistic Regression is:
 [[145   4   1   8  35  15   2]
 [  6 152   1  13  21  15   2]
 [  1   6 190   1   4   8   0]
 [  6  16   3 141  21  19   4]
 [ 24   8   1  30 137   4   6]
 [ 13  14   5  23   9 115  31]
 [  6   2   0   2   4  31 165]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.72      0.69      0.71       210
           2       0.75      0.72      0.74       210
           3       0.95      0.90      0.92       210
           4       0.65      0.67      0.66       210
           5       0.59      0.65      0.62       210
           6       0.56      0.55      0.55       210
           7       0.79      0.79      0.79       210

    accuracy                           0.71      1470
   macro avg       0.71      0.71      0.71      1470
weighted avg       0.71      0.71      0.71      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.5204081632653061
Confusion Matrix of Bernoulli Naive Bayes is:
 [[111   5  18   8  26  12  30]
 [  0 122  24  18  16  16  14]
 [  1  16 167  22   1   2   1]
 [  3  16  15  81  33  16  46]
 [ 18  21   1  14 108   8  40]
 [  6  43  11  22  17  32  79]
 [  1  28   0  17   7  13 144]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.79      0.53      0.63       210
           2       0.49      0.58      0.53       210
           3       0.71      0.80      0.75       210
           4       0.45      0.39      0.41       210
           5       0.52      0.51      0.52       210
           6       0.32      0.15      0.21       210
           7       0.41      0.69      0.51       210

    accuracy                           0.52      1470
   macro avg       0.53      0.52      0.51      1470
weighted avg       0.53      0.52      0.51      1470

Workin

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.34625850340136055
Confusion Matrix of Decision Tree is:
 [[  0  32  14   0 131   0  33]
 [  0  38  18   0  80   0  74]
 [  0  19 138   0  48   0   5]
 [  0   5   5   0 142   0  58]
 [  0   3   0   0 153   0  54]
 [  0  12  11   0  82   0 105]
 [  0   0   0   0  30   0 180]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.35      0.18      0.24       210
           3       0.74      0.66      0.70       210
           4       0.00      0.00      0.00       210
           5       0.23      0.73      0.35       210
           6       0.00      0.00      0.00       210
           7       0.35      0.86      0.50       210

    accuracy                           0.35      1470
   macro avg       0.24      0.35      0.26      1470
weighted avg       0.24      0.35      0.26      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3816326530612245
Confusion Matrix of Decision Tree is:
 [[102  17   5  53   0  16  17]
 [ 11  41  11  73   0  58  16]
 [ 15  26 126  38   0   3   2]
 [ 31   5   4 112   0  41  17]
 [ 71   2   0  83   0  30  24]
 [ 26  11   8  60   0  45  60]
 [ 13   0   0  17   0  45 135]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.49      0.43       210
           2       0.40      0.20      0.26       210
           3       0.82      0.60      0.69       210
           4       0.26      0.53      0.35       210
           5       0.00      0.00      0.00       210
           6       0.19      0.21      0.20       210
           7       0.50      0.64      0.56       210

    accuracy                           0.38      1470
   macro avg       0.36      0.38      0.36      1470
weighted avg       0.36      0.38      0.36      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.4163265306122449
Confusion Matrix of Decision Tree is:
 [[ 73  46   6  15  35  33   2]
 [  3  97  13  27   6  56   8]
 [  5  39 137  15   9   4   1]
 [ 13  41   4  78  18  55   1]
 [ 13  59   0  28  57  45   8]
 [  2  30  10  39  21  83  25]
 [  0  16   0   4  13  90  87]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.67      0.35      0.46       210
           2       0.30      0.46      0.36       210
           3       0.81      0.65      0.72       210
           4       0.38      0.37      0.37       210
           5       0.36      0.27      0.31       210
           6       0.23      0.40      0.29       210
           7       0.66      0.41      0.51       210

    accuracy                           0.42      1470
   macro avg       0.48      0.42      0.43      1470
weighted avg       0.48      0.42      0.43      1470

Decision Tree with 5 max_depth

Accuracy of Decision Tree after Standard Scaling is: 0.4340136054421769
Confusion Matrix of Decision Tree is:
 [[100   8  12  24  35  23   8]
 [ 15  90  21  26  19  31   8]
 [  4  10  99  19   7  70   1]
 [ 21  22  11  84  30  28  14]
 [ 33  22   8  24  84  22  17]
 [ 24  19  11  34  28  59  35]
 [ 17   9   0  12  13  37 122]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.48      0.47       210
           2       0.50      0.43      0.46       210
           3       0.61      0.47      0.53       210
           4       0.38      0.40      0.39       210
           5       0.39      0.40      0.39       210
           6       0.22      0.28      0.25       210
           7       0.60      0.58      0.59       210

    accuracy                           0.43      1470
   macro avg       0.45      0.43      0.44      1470
weighted avg       0.45      0.43      0.44      1470

Decision Tree with 13 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.41768707482993195
Confusion Matrix of Decision Tree is:
 [[ 97   7  14  22  32  28  10]
 [ 14  90  19  25  17  31  14]
 [ 11  14  96  13   6  67   3]
 [ 19  20   8  83  35  33  12]
 [ 41  18  10  29  73  24  15]
 [ 23  21  12  32  29  57  36]
 [ 12   8   1  19  15  37 118]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.46      0.45       210
           2       0.51      0.43      0.46       210
           3       0.60      0.46      0.52       210
           4       0.37      0.40      0.38       210
           5       0.35      0.35      0.35       210
           6       0.21      0.27      0.23       210
           7       0.57      0.56      0.56       210

    accuracy                           0.42      1470
   macro avg       0.44      0.42      0.42      1470
weighted avg       0.44      0.42      0.42      1470

Accuracy of Decision Tree aft

Accuracy of Random Forest after Standard Scaling is: 0.6401360544217687
Confusion Matrix of Random Forest is:
 [[117   8   3  10  44  11  17]
 [  1 156   3  10  18  15   7]
 [  0  29 172   6   2   1   0]
 [  2  14   7 106  36  26  19]
 [ 11  20   0  17 135   6  21]
 [  7  27   5  28  11  58  74]
 [  0   3   0   1   3   6 197]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.85      0.56      0.67       210
           2       0.61      0.74      0.67       210
           3       0.91      0.82      0.86       210
           4       0.60      0.50      0.55       210
           5       0.54      0.64      0.59       210
           6       0.47      0.28      0.35       210
           7       0.59      0.94      0.72       210

    accuracy                           0.64      1470
   macro avg       0.65      0.64      0.63      1470
weighted avg       0.65      0.64      0.63      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6850340136054421
Confusion Matrix of Random Forest is:
 [[129   5   6  12  35  15   8]
 [  2 156   4  12  15  14   7]
 [  0  16 185   5   0   3   1]
 [  3  13   6 118  41  19  10]
 [ 17  16   0  18 141   9   9]
 [  7  16   5  29  10  86  57]
 [  1   0   0   2   4  11 192]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.81      0.61      0.70       210
           2       0.70      0.74      0.72       210
           3       0.90      0.88      0.89       210
           4       0.60      0.56      0.58       210
           5       0.57      0.67      0.62       210
           6       0.55      0.41      0.47       210
           7       0.68      0.91      0.78       210

    accuracy                           0.69      1470
   macro avg       0.69      0.69      0.68      1470
weighted avg       0.69      0.69      0.68      1470

Random Forest with 16 max_dept

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.7251700680272108
Confusion Matrix of Logistic Regression is:
 [[156   2   3   4  34   9   2]
 [  3 160   4  12  18  13   0]
 [  1   9 192   3   3   2   0]
 [  6  17   3 138  22  18   6]
 [ 38  11   1  24 126   6   4]
 [ 11  15   5  25   7 118  29]
 [  0   0   0   3   3  28 176]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.73      0.74      0.73       210
           2       0.75      0.76      0.75       210
           3       0.92      0.91      0.92       210
           4       0.66      0.66      0.66       210
           5       0.59      0.60      0.60       210
           6       0.61      0.56      0.58       210
           7       0.81      0.84      0.82       210

    accuracy                           0.73      1470
   macro avg       0.72      0.73      0.72      1470
weighted avg       0.72      0.73      0.72      1470

KNN with 3 N

Accuracy of SVM after Standard Scaling is: 0.7224489795918367
Confusion Matrix of SVM is:
 [[156   3   1   3  37  10   0]
 [  2 170   5  14  10   7   2]
 [  0   6 196   3   2   3   0]
 [  9  19   6 136  20  19   1]
 [ 45  14   1  21 123   4   2]
 [  8  20  14  23   5 113  27]
 [  1   0   0   5   2  34 168]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.71      0.74      0.72       210
           2       0.73      0.81      0.77       210
           3       0.88      0.93      0.91       210
           4       0.66      0.65      0.66       210
           5       0.62      0.59      0.60       210
           6       0.59      0.54      0.56       210
           7       0.84      0.80      0.82       210

    accuracy                           0.72      1470
   macro avg       0.72      0.72      0.72      1470
weighted avg       0.72      0.72      0.72      1470

Working on SVM Kernal: poly
Accuracy of SVM after Standard S

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.34217687074829933
Confusion Matrix of Decision Tree is:
 [[ 22   0   0   0 121   0  67]
 [ 13   0   4   0  98   0  95]
 [  9   0 121   0  35   0  45]
 [ 13   0   6   0 121   0  70]
 [  1   0   0   0 177   0  32]
 [  3   0   6   0  61   0 140]
 [  1   0   0   0  26   0 183]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.10      0.16       210
           2       0.00      0.00      0.00       210
           3       0.88      0.58      0.70       210
           4       0.00      0.00      0.00       210
           5       0.28      0.84      0.42       210
           6       0.00      0.00      0.00       210
           7       0.29      0.87      0.43       210

    accuracy                           0.34      1470
   macro avg       0.26      0.34      0.24      1470
weighted avg       0.26      0.34      0.24      1470

Decision Tree with 3 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3925170068027211
Confusion Matrix of Decision Tree is:
 [[ 22  74   0   0  98   0  16]
 [  1 138   3   1  40   0  27]
 [  3  72 118   3  11   0   3]
 [  2 113   2   4  64   0  25]
 [  0  60   0   0 133   0  17]
 [  1 107   3   3  15   0  81]
 [  0  39   0   0   9   0 162]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.76      0.10      0.18       210
           2       0.23      0.66      0.34       210
           3       0.94      0.56      0.70       210
           4       0.36      0.02      0.04       210
           5       0.36      0.63      0.46       210
           6       0.00      0.00      0.00       210
           7       0.49      0.77      0.60       210

    accuracy                           0.39      1470
   macro avg       0.45      0.39      0.33      1470
weighted avg       0.45      0.39      0.33      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.43673469387755104
Confusion Matrix of Decision Tree is:
 [[ 54   3   0  71  66  13   3]
 [  2  86   3  53  39  17  10]
 [  5  28 118  47   9   2   1]
 [  2  16   1 102  64  15  10]
 [  2   5   0  55 131   5  12]
 [  1  20   2  91  15  28  53]
 [  0   3   0  36   9  39 123]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.82      0.26      0.39       210
           2       0.53      0.41      0.46       210
           3       0.95      0.56      0.71       210
           4       0.22      0.49      0.31       210
           5       0.39      0.62      0.48       210
           6       0.24      0.13      0.17       210
           7       0.58      0.59      0.58       210

    accuracy                           0.44      1470
   macro avg       0.53      0.44      0.44      1470
weighted avg       0.53      0.44      0.44      1470

Decision Tree with 5 max_dept

Accuracy of Decision Tree after Standard Scaling is: 0.46870748299319726
Confusion Matrix of Decision Tree is:
 [[116  14   4  19  39  14   4]
 [ 21  88  18  22  27  22  12]
 [ 12  17 154   5   6  14   2]
 [ 24  20  19  64  45  28  10]
 [ 44   9   2  44  89  17   5]
 [ 26  20   9  25  26  61  43]
 [ 13  11   3  12   9  45 117]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.55      0.50       210
           2       0.49      0.42      0.45       210
           3       0.74      0.73      0.74       210
           4       0.34      0.30      0.32       210
           5       0.37      0.42      0.39       210
           6       0.30      0.29      0.30       210
           7       0.61      0.56      0.58       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.47      1470
weighted avg       0.47      0.47      0.47      1470

Decision Tree with 13 max_dep

Accuracy of Decision Tree after Standard Scaling is: 0.4605442176870748
Confusion Matrix of Decision Tree is:
 [[108  17  10  15  34  13  13]
 [ 13  93  22  20  25  24  13]
 [  8  16 155   7   5  16   3]
 [ 13  24  12  70  46  35  10]
 [ 43  10   4  42  84  15  12]
 [ 19  26  12  40  17  51  45]
 [ 13  12   3  10  11  45 116]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.51      0.51       210
           2       0.47      0.44      0.46       210
           3       0.71      0.74      0.72       210
           4       0.34      0.33      0.34       210
           5       0.38      0.40      0.39       210
           6       0.26      0.24      0.25       210
           7       0.55      0.55      0.55       210

    accuracy                           0.46      1470
   macro avg       0.46      0.46      0.46      1470
weighted avg       0.46      0.46      0.46      1470

Accuracy of Decision Tree afte

Accuracy of Random Forest after Standard Scaling is: 0.6374149659863946
Confusion Matrix of Random Forest is:
 [[121   8   3   4  45  17  12]
 [  1 148   3  16  17  19   6]
 [  0  33 162   8   2   5   0]
 [  1  22   3 102  33  38  11]
 [  7  10   0  22 140  17  14]
 [  8  23   5  19  11  66  78]
 [  0   0   0   0   5   7 198]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.88      0.58      0.70       210
           2       0.61      0.70      0.65       210
           3       0.92      0.77      0.84       210
           4       0.60      0.49      0.54       210
           5       0.55      0.67      0.60       210
           6       0.39      0.31      0.35       210
           7       0.62      0.94      0.75       210

    accuracy                           0.64      1470
   macro avg       0.65      0.64      0.63      1470
weighted avg       0.65      0.64      0.63      1470

Random Forest with 8 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.6625850340136055
Confusion Matrix of Random Forest is:
 [[122   4   8  10  43  16   7]
 [  2 147   1  19  17  21   3]
 [  0  25 174   5   4   1   1]
 [  2  19   4 110  31  33  11]
 [ 13  12   0  21 144  12   8]
 [  8  15   6  25  11  83  62]
 [  0   0   0   0   4  12 194]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.83      0.58      0.68       210
           2       0.66      0.70      0.68       210
           3       0.90      0.83      0.86       210
           4       0.58      0.52      0.55       210
           5       0.57      0.69      0.62       210
           6       0.47      0.40      0.43       210
           7       0.68      0.92      0.78       210

    accuracy                           0.66      1470
   macro avg       0.67      0.66      0.66      1470
weighted avg       0.67      0.66      0.66      1470

Random Forest with 16 max_dept

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//xlm_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = standard_scaling(x_df,labels_df['Labels'])

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=2000)
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after Standard Scaling is: 0.5027210884353741
Confusion Matrix of Logistic Regression is:
 [[110  14   6  16  32  17  15]
 [ 16 107  11  22  29  22   3]
 [  3  12 172  13   4   3   3]
 [ 19  15   7  79  37  42  11]
 [ 35  25   2  43  77  17  11]
 [ 22  21   7  41  18  65  36]
 [  7   8   2  17  12  35 129]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.52      0.52      0.52       210
           2       0.53      0.51      0.52       210
           3       0.83      0.82      0.82       210
           4       0.34      0.38      0.36       210
           5       0.37      0.37      0.37       210
           6       0.32      0.31      0.32       210
           7       0.62      0.61      0.62       210

    accuracy                           0.50      1470
   macro avg       0.50      0.50      0.50      1470
weighted avg       0.50      0.50      0.50      1470

KNN with 3 N

Accuracy of Bernoulli Naive Bayes after Standard Scaling is: 0.38979591836734695
Confusion Matrix of Bernoulli Naive Bayes is:
 [[ 83  59   8  10  23   4  23]
 [ 15 115  26  14  21   7  12]
 [ 18  52 135   2   2   0   1]
 [ 24  58   6  39  31  19  33]
 [ 27  52   3  15  74   6  33]
 [ 23  65   1  26  27  19  49]
 [ 14  27   0  23  21  17 108]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.41      0.40      0.40       210
           2       0.27      0.55      0.36       210
           3       0.75      0.64      0.69       210
           4       0.30      0.19      0.23       210
           5       0.37      0.35      0.36       210
           6       0.26      0.09      0.13       210
           7       0.42      0.51      0.46       210

    accuracy                           0.39      1470
   macro avg       0.40      0.39      0.38      1470
weighted avg       0.40      0.39      0.38      1470

Worki

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.2578231292517007
Confusion Matrix of Decision Tree is:
 [[  0  77  13   0   0   0 120]
 [  0  89  15   0   0   0 106]
 [  0  62 111   0   0   0  37]
 [  0  55   3   0   0   0 152]
 [  0  52   3   0   0   0 155]
 [  0  56   5   0   0   0 149]
 [  0  29   2   0   0   0 179]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.21      0.42      0.28       210
           3       0.73      0.53      0.61       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.20      0.85      0.32       210

    accuracy                           0.26      1470
   macro avg       0.16      0.26      0.17      1470
weighted avg       0.16      0.26      0.17      1470

Decision Tree with 3 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.3034013605442177
Confusion Matrix of Decision Tree is:
 [[ 47  38   5  85   0   0  35]
 [ 12  77  15  81   0   0  25]
 [  6  56 111  30   0   0   7]
 [ 11  44   3 108   0   0  44]
 [ 21  31   3 101   0   0  54]
 [ 21  36   4  93   0   0  56]
 [ 14  15   2  76   0   0 103]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.36      0.22      0.27       210
           2       0.26      0.37      0.30       210
           3       0.78      0.53      0.63       210
           4       0.19      0.51      0.28       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.32      0.49      0.39       210

    accuracy                           0.30      1470
   macro avg       0.27      0.30      0.27      1470
weighted avg       0.27      0.30      0.27      1470

Decision Tree with 4 max_depth

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.31700680272108844
Confusion Matrix of Decision Tree is:
 [[ 49  39   3  68  16   0  35]
 [  4  83   9  80   9   0  25]
 [  1  59 108  30   5   0   7]
 [  3  47   0 106  10   0  44]
 [  6  34   0  99  17   0  54]
 [  7  37   3  92  15   0  56]
 [  0  17   0  76  14   0 103]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.23      0.35       210
           2       0.26      0.40      0.32       210
           3       0.88      0.51      0.65       210
           4       0.19      0.50      0.28       210
           5       0.20      0.08      0.11       210
           6       0.00      0.00      0.00       210
           7       0.32      0.49      0.39       210

    accuracy                           0.32      1470
   macro avg       0.36      0.32      0.30      1470
weighted avg       0.36      0.32      0.30      1470

Decision Tree with 5 max_dept

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after Standard Scaling is: 0.31360544217687075
Confusion Matrix of Decision Tree is:
 [[51 39  3 49 40  0 28]
 [ 7 77  8 55 39  1 23]
 [ 0 33 95 25 49  1  7]
 [ 2 41  4 68 58  0 37]
 [ 4 34  0 48 77  0 47]
 [ 3 38  8 56 56  0 49]
 [ 0 17  0 49 51  0 93]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.76      0.24      0.37       210
           2       0.28      0.37      0.31       210
           3       0.81      0.45      0.58       210
           4       0.19      0.32      0.24       210
           5       0.21      0.37      0.27       210
           6       0.00      0.00      0.00       210
           7       0.33      0.44      0.38       210

    accuracy                           0.31      1470
   macro avg       0.37      0.31      0.31      1470
weighted avg       0.37      0.31      0.31      1470

Decision Tree with 6 max_depth
Accuracy of Decision Tree after Standard Scalin

Accuracy of Decision Tree after Standard Scaling is: 0.31360544217687075
Confusion Matrix of Decision Tree is:
 [[81 18  7 33 28 27 16]
 [21 69  9 31 31 35 14]
 [16 26 99 11 10  9 39]
 [35 32  6 42 32 44 19]
 [34 18  7 46 53 29 23]
 [22 21  9 40 30 50 38]
 [25 15  1 35 24 43 67]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.39      0.36       210
           2       0.35      0.33      0.34       210
           3       0.72      0.47      0.57       210
           4       0.18      0.20      0.19       210
           5       0.25      0.25      0.25       210
           6       0.21      0.24      0.22       210
           7       0.31      0.32      0.31       210

    accuracy                           0.31      1470
   macro avg       0.34      0.31      0.32      1470
weighted avg       0.34      0.31      0.32      1470

Decision Tree with 15 max_depth
Accuracy of Decision Tree after Standard Scali

Accuracy of Random Forest after Standard Scaling is: 0.2857142857142857
Confusion Matrix of Random Forest is:
 [[ 25   8  24  11   6   5 131]
 [ 12  21  53   7   7   0 110]
 [  3  10 162   7   2   0  26]
 [  6  10  15  11   6   3 159]
 [  2   7  11   7   2   1 180]
 [ 10  10  20   8   3   7 152]
 [  4   1   1   5   5   2 192]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.12      0.18       210
           2       0.31      0.10      0.15       210
           3       0.57      0.77      0.65       210
           4       0.20      0.05      0.08       210
           5       0.06      0.01      0.02       210
           6       0.39      0.03      0.06       210
           7       0.20      0.91      0.33       210

    accuracy                           0.29      1470
   macro avg       0.31      0.29      0.21      1470
weighted avg       0.31      0.29      0.21      1470

Random Forest with 2 max_depth

Accuracy of Random Forest after Standard Scaling is: 0.4557823129251701
Confusion Matrix of Random Forest is:
 [[ 80  32   5  17  41  13  22]
 [  9 117   6  20  30  18  10]
 [  6  34 151   8   3   7   1]
 [  5  28   3  78  41  22  33]
 [ 12  32   3  34  96   6  27]
 [ 10  39   3  38  35  28  57]
 [  9   6   0  33  20  22 120]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.38      0.47       210
           2       0.41      0.56      0.47       210
           3       0.88      0.72      0.79       210
           4       0.34      0.37      0.36       210
           5       0.36      0.46      0.40       210
           6       0.24      0.13      0.17       210
           7       0.44      0.57      0.50       210

    accuracy                           0.46      1470
   macro avg       0.47      0.46      0.45      1470
weighted avg       0.47      0.46      0.45      1470

Random Forest with 10 max_dept

Accuracy of Random Forest after Standard Scaling is: 0.46598639455782315
Confusion Matrix of Random Forest is:
 [[ 88  29   9  13  39  16  16]
 [ 12 109  10  23  31  17   8]
 [  4  27 160   9   3   6   1]
 [ 12  27   4  78  37  23  29]
 [ 20  30   3  29  84  19  25]
 [ 19  37   4  36  31  40  43]
 [  6   2   0  30  19  27 126]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.55      0.42      0.47       210
           2       0.42      0.52      0.46       210
           3       0.84      0.76      0.80       210
           4       0.36      0.37      0.36       210
           5       0.34      0.40      0.37       210
           6       0.27      0.19      0.22       210
           7       0.51      0.60      0.55       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.46      1470
weighted avg       0.47      0.47      0.46      1470

Random Forest with 18 max_dep