In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()

In [2]:
# Reading labels
labels_df = pd.read_csv(pwd+"//Datasets//Kabita//Input//kabita_dataset_labels.csv")
labels_df

Unnamed: 0,kabita_labels
0,7
1,7
2,4
3,2
4,7
...,...
4895,1
4896,1
4897,1
4898,4


In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Data split for TFIDF data

In [4]:
# reading dataset
tfidf_500_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tfidf_500_vectors.csv",encoding_errors='ignore')
# Splitting the data
x_train,x_test,y_train,y_test = train_test_split(tfidf_500_df,labels_df['kabita_labels'],test_size=0.30,
                                                 random_state=21,stratify=labels_df['kabita_labels'])

#Modelling
# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression is: 0.753061224489796
Confusion Matrix of Logistic Regression is:
 [[169   0   0   5  14  21   1]
 [  0 163  12   6   8  21   0]
 [  0   1 181  10   0  18   0]
 [  1   8  14 160   5  19   3]
 [ 23  13  16  21 129   2   6]
 [  1   4   4  24   2 139  36]
 [  0   2   0   4   1  37 166]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.87      0.80      0.84       210
           2       0.85      0.78      0.81       210
           3       0.80      0.86      0.83       210
           4       0.70      0.76      0.73       210
           5       0.81      0.61      0.70       210
           6       0.54      0.66      0.60       210
           7       0.78      0.79      0.79       210

    accuracy                           0.75      1470
   macro avg       0.76      0.75      0.76      1470
weighted avg       0.76      0.75      0.76      1470

KNN with 3 Neighbors
Accuracy of KNN

Confusion Matrix of Bernoulli Naive Bayes is:
 [[163   1  10   2  30   4   0]
 [  1 161  24   7  13   4   0]
 [  0   1 198  10   0   1   0]
 [  7  11  32 143   6   8   3]
 [ 28  15  17  12 134   0   4]
 [  6  12  44  18   5  97  28]
 [  3   5   7   2   3  37 153]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.78      0.78      0.78       210
           2       0.78      0.77      0.77       210
           3       0.60      0.94      0.73       210
           4       0.74      0.68      0.71       210
           5       0.70      0.64      0.67       210
           6       0.64      0.46      0.54       210
           7       0.81      0.73      0.77       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.71      1470
weighted avg       0.72      0.71      0.71      1470

Working on SVM Kernal: linear
Accuracy of SVM is: 0.7571428571428571
Confusion Matrix 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree is: 0.36054421768707484
Confusion Matrix of Decision Tree is:
 [[ 66   2   0   0  89   0  53]
 [  0  54   2   0   2   0 152]
 [  0   0  95   0   0   0 115]
 [  0   6   1   0   1   0 202]
 [ 13   4   2   0 107   0  84]
 [  0   3   0   0   0   0 207]
 [  0   0   0   0   2   0 208]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.84      0.31      0.46       210
           2       0.78      0.26      0.39       210
           3       0.95      0.45      0.61       210
           4       0.00      0.00      0.00       210
           5       0.53      0.51      0.52       210
           6       0.00      0.00      0.00       210
           7       0.20      0.99      0.34       210

    accuracy                           0.36      1470
   macro avg       0.47      0.36      0.33      1470
weighted avg       0.47      0.36      0.33      1470

Decision Tree with 5 max_depth
Accuracy of Decision 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree is: 0.45374149659863944
Confusion Matrix of Decision Tree is:
 [[155   2   0   0   0  53   0]
 [  0 122   2   0   3  83   0]
 [  0   0  95   0   0 115   0]
 [  1   6   1   1   0 201   0]
 [ 74  20   2   0  47  65   2]
 [  0   3   0   0   0 203   4]
 [  2   1   0   0   0 163  44]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.67      0.74      0.70       210
           2       0.79      0.58      0.67       210
           3       0.95      0.45      0.61       210
           4       1.00      0.00      0.01       210
           5       0.94      0.22      0.36       210
           6       0.23      0.97      0.37       210
           7       0.88      0.21      0.34       210

    accuracy                           0.45      1470
   macro avg       0.78      0.45      0.44      1470
weighted avg       0.78      0.45      0.44      1470

Decision Tree with 8 max_depth
Accuracy of Decision 

Accuracy of Decision Tree is: 0.5625850340136055
Confusion Matrix of Decision Tree is:
 [[140   1   0  42  27   0   0]
 [  0 136   6  62   6   0   0]
 [  0  13 144  49   4   0   0]
 [  1  10   5 182   8   0   4]
 [ 32  13   5  43 116   1   0]
 [  0   4   1 179   2   1  23]
 [  0   1   0  96   4   1 108]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.81      0.67      0.73       210
           2       0.76      0.65      0.70       210
           3       0.89      0.69      0.78       210
           4       0.28      0.87      0.42       210
           5       0.69      0.55      0.62       210
           6       0.33      0.00      0.01       210
           7       0.80      0.51      0.63       210

    accuracy                           0.56      1470
   macro avg       0.65      0.56      0.55      1470
weighted avg       0.65      0.56      0.55      1470

Decision Tree with 16 max_depth
Accuracy of Decision 

Accuracy of Random Forest is: 0.6428571428571429
Confusion Matrix of Random Forest is:
 [[161   4   0  18   5  21   1]
 [  0 130   9  10  18  43   0]
 [  0   0 156  17   0  36   1]
 [  1   7   8 140   4  43   7]
 [ 67  17   6  22  69  16  13]
 [  0   3   4  24   1 136  42]
 [  1   0   1  11   0  44 153]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.70      0.77      0.73       210
           2       0.81      0.62      0.70       210
           3       0.85      0.74      0.79       210
           4       0.58      0.67      0.62       210
           5       0.71      0.33      0.45       210
           6       0.40      0.65      0.50       210
           7       0.71      0.73      0.72       210

    accuracy                           0.64      1470
   macro avg       0.68      0.64      0.64      1470
weighted avg       0.68      0.64      0.64      1470

Random Forest with 4 max_depth
Accuracy of Random For

Accuracy of Random Forest is: 0.6727891156462585
Confusion Matrix of Random Forest is:
 [[161   2   0  10   8  29   0]
 [  0 142   8   9  14  37   0]
 [  0   0 160  11   0  38   1]
 [  0   6   8 136   7  49   4]
 [ 54  15   6  18  90  17  10]
 [  0   3   4  15   3 145  40]
 [  1   0   0   6   1  47 155]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.75      0.77      0.76       210
           2       0.85      0.68      0.75       210
           3       0.86      0.76      0.81       210
           4       0.66      0.65      0.66       210
           5       0.73      0.43      0.54       210
           6       0.40      0.69      0.51       210
           7       0.74      0.74      0.74       210

    accuracy                           0.67      1470
   macro avg       0.71      0.67      0.68      1470
weighted avg       0.71      0.67      0.68      1470

Random Forest with 12 max_depth
Accuracy of Random Fo

Accuracy of Random Forest is: 0.7006802721088435
Confusion Matrix of Random Forest is:
 [[154   2   0   9  14  29   2]
 [  0 156   8   7   8  31   0]
 [  0   4 168  13   0  24   1]
 [  0   9   9 140   7  41   4]
 [ 34  13   6  22 110  15  10]
 [  0   5   3  17   3 142  40]
 [  0   0   0   5   2  43 160]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.73      0.77       210
           2       0.83      0.74      0.78       210
           3       0.87      0.80      0.83       210
           4       0.66      0.67      0.66       210
           5       0.76      0.52      0.62       210
           6       0.44      0.68      0.53       210
           7       0.74      0.76      0.75       210

    accuracy                           0.70      1470
   macro avg       0.73      0.70      0.71      1470
weighted avg       0.73      0.70      0.71      1470

Random Forest with 20 max_depth
Accuracy of Random Fo

### Data split for Count vectorizer data

In [5]:
# reading dataset
cv_500_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//cv_500_vectors.csv",encoding_errors='ignore')
cv_500_df

# Train Test split
x_train,x_test,y_train,y_test = train_test_split(cv_500_df,labels_df['kabita_labels'],test_size=0.30,
                                                 random_state=21,stratify=labels_df['kabita_labels'])

# Modelling

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression is: 0.746938775510204
Confusion Matrix of Logistic Regression is:
 [[168   0   2   7  17  14   2]
 [  0 167  12   8   5  18   0]
 [  0   0 183   9   0  18   0]
 [  0   9  19 153   6  20   3]
 [ 19  14  15  14 141   2   5]
 [  2   8   4  19   3 140  34]
 [  1   4   0   6   1  52 146]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.88      0.80      0.84       210
           2       0.83      0.80      0.81       210
           3       0.78      0.87      0.82       210
           4       0.71      0.73      0.72       210
           5       0.82      0.67      0.74       210
           6       0.53      0.67      0.59       210
           7       0.77      0.70      0.73       210

    accuracy                           0.75      1470
   macro avg       0.76      0.75      0.75      1470
weighted avg       0.76      0.75      0.75      1470

KNN with 3 Neighbors
Accuracy of KNN

Accuracy of Bernoulli Naive Bayes is: 0.7142857142857143
Confusion Matrix of Bernoulli Naive Bayes is:
 [[162   1  11   2  30   4   0]
 [  1 161  24   7  13   4   0]
 [  0   1 198  10   0   1   0]
 [  7  10  33 144   6   7   3]
 [ 28  15  17  12 134   0   4]
 [  6  11  42  19   5  98  29]
 [  3   5   7   2   3  37 153]]
Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.78      0.77      0.78       210
           2       0.79      0.77      0.78       210
           3       0.60      0.94      0.73       210
           4       0.73      0.69      0.71       210
           5       0.70      0.64      0.67       210
           6       0.65      0.47      0.54       210
           7       0.81      0.73      0.77       210

    accuracy                           0.71      1470
   macro avg       0.72      0.71      0.71      1470
weighted avg       0.72      0.71      0.71      1470

Working on SVM Kernal: linear

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[155   0   1   0   0  54   0]
 [  0  30  22   0   2 156   0]
 [  0   0 132   0   0  78   0]
 [  1   1  16   0   0 192   0]
 [ 86   4  22   0  34  63   1]
 [  0   0   2   0   0 204   4]
 [  2   0   0   0   0 163  45]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.64      0.74      0.68       210
           2       0.86      0.14      0.24       210
           3       0.68      0.63      0.65       210
           4       0.00      0.00      0.00       210
           5       0.94      0.16      0.28       210
           6       0.22      0.97      0.36       210
           7       0.90      0.21      0.35       210

    accuracy                           0.41      1470
   macro avg       0.61      0.41      0.37      1470
weighted avg       0.61      0.41      0.37      1470

Decision Tree with 5 max_depth
Accuracy of Decision Tree is: 0.43673469387755104
Confusion Matrix of D

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree is: 0.4891156462585034
Confusion Matrix of Decision Tree is:
 [[154   0   1   0   1  54   0]
 [  0 106  17   3   4  80   0]
 [  0   0 132   0   0  78   0]
 [  1   4   8   6   1 188   2]
 [ 63  18  22   0  62  45   0]
 [  0   2   1   1   0 188  18]
 [  2   1   0   0   0 136  71]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.70      0.73      0.72       210
           2       0.81      0.50      0.62       210
           3       0.73      0.63      0.68       210
           4       0.60      0.03      0.05       210
           5       0.91      0.30      0.45       210
           6       0.24      0.90      0.38       210
           7       0.78      0.34      0.47       210

    accuracy                           0.49      1470
   macro avg       0.68      0.49      0.48      1470
weighted avg       0.68      0.49      0.48      1470

Decision Tree with 8 max_depth
Accuracy of Decision T

Accuracy of Decision Tree is: 0.5843537414965987
Confusion Matrix of Decision Tree is:
 [[160   0   2   0   5  43   0]
 [  0 116  22   4   7  61   0]
 [  0   0 162   0   0  48   0]
 [  0   5  28  36   3 135   3]
 [ 43  21  21   3  93  29   0]
 [  0   2   3   1   2 177  25]
 [  2   1   1   2   1  88 115]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.78      0.76      0.77       210
           2       0.80      0.55      0.65       210
           3       0.68      0.77      0.72       210
           4       0.78      0.17      0.28       210
           5       0.84      0.44      0.58       210
           6       0.30      0.84      0.45       210
           7       0.80      0.55      0.65       210

    accuracy                           0.58      1470
   macro avg       0.71      0.58      0.59      1470
weighted avg       0.71      0.58      0.59      1470

Decision Tree with 17 max_depth
Accuracy of Decision 

Accuracy of Random Forest is: 0.6061224489795919
Confusion Matrix of Random Forest is:
 [[154   4   2   8  11  28   3]
 [  0 105  51   7  12  35   0]
 [  0   0 161   6   0  43   0]
 [  0  13  27 106   7  53   4]
 [ 54  20  28  16  71   9  12]
 [  0   9   5  17   2 137  40]
 [  0   1   0   5   0  47 157]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.74      0.73      0.74       210
           2       0.69      0.50      0.58       210
           3       0.59      0.77      0.67       210
           4       0.64      0.50      0.57       210
           5       0.69      0.34      0.45       210
           6       0.39      0.65      0.49       210
           7       0.73      0.75      0.74       210

    accuracy                           0.61      1470
   macro avg       0.64      0.61      0.60      1470
weighted avg       0.64      0.61      0.60      1470

Random Forest with 5 max_depth
Accuracy of Random For

Accuracy of Random Forest is: 0.6571428571428571
Confusion Matrix of Random Forest is:
 [[159   4   1   8   8  29   1]
 [  0 141  21   9   6  33   0]
 [  0   0 163   6   0  41   0]
 [  0  11  18 124   4  50   3]
 [ 49  26  24  14  80   7  10]
 [  0   8   4  12   2 141  43]
 [  1   2   0   5   2  42 158]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.76      0.76      0.76       210
           2       0.73      0.67      0.70       210
           3       0.71      0.78      0.74       210
           4       0.70      0.59      0.64       210
           5       0.78      0.38      0.51       210
           6       0.41      0.67      0.51       210
           7       0.73      0.75      0.74       210

    accuracy                           0.66      1470
   macro avg       0.69      0.66      0.66      1470
weighted avg       0.69      0.66      0.66      1470

Random Forest with 13 max_depth
Accuracy of Random Fo

Accuracy of Random Forest is: 0.6727891156462585
Confusion Matrix of Random Forest is:
 [[158   4   2   9  11  26   0]
 [  0 149  17   9   5  30   0]
 [  0   0 171   8   0  31   0]
 [  0  11  22 130   3  40   4]
 [ 46  29  24  15  85   5   6]
 [  0   9   4  16   1 142  38]
 [  1   2   1   5   1  46 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.77      0.75      0.76       210
           2       0.73      0.71      0.72       210
           3       0.71      0.81      0.76       210
           4       0.68      0.62      0.65       210
           5       0.80      0.40      0.54       210
           6       0.44      0.68      0.54       210
           7       0.76      0.73      0.75       210

    accuracy                           0.67      1470
   macro avg       0.70      0.67      0.67      1470
weighted avg       0.70      0.67      0.67      1470

Accuracy of Multinomial Naive Bayes is: 0.71224489795

### Data split for Term frequency vectorizer data

In [6]:
# reading dataset
tf_500_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tf_500_vectors.csv",encoding_errors='ignore')
tf_500_df

# Train Test split
x_train,x_test,y_train,y_test = train_test_split(tf_500_df,labels_df['kabita_labels'],test_size=0.30,
                                                 random_state=21,stratify=labels_df['kabita_labels'])

# Modelling

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_train,x_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression is: 0.7496598639455783
Confusion Matrix of Logistic Regression is:
 [[165   3   1   9  15  15   2]
 [  0 163  15   6   6  20   0]
 [  0   1 181   9   0  19   0]
 [  1   9  17 162   2  16   3]
 [ 24  18  18  17 125   2   6]
 [  0   7   4  23   2 139  35]
 [  0   3   0   6   1  33 167]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.87      0.79      0.82       210
           2       0.80      0.78      0.79       210
           3       0.77      0.86      0.81       210
           4       0.70      0.77      0.73       210
           5       0.83      0.60      0.69       210
           6       0.57      0.66      0.61       210
           7       0.78      0.80      0.79       210

    accuracy                           0.75      1470
   macro avg       0.76      0.75      0.75      1470
weighted avg       0.76      0.75      0.75      1470

KNN with 3 Neighbors
Accuracy of KN

Accuracy of SVM is: 0.7619047619047619
Confusion Matrix of SVM is:
 [[174   1   0   4  13  16   2]
 [  0 173  11   7   2  17   0]
 [  0   1 183  10   0  16   0]
 [  0  12  18 150   2  24   4]
 [ 27  17  16  18 127   2   3]
 [  0   4   4  19   1 143  39]
 [  1   3   0   6   0  30 170]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.86      0.83      0.84       210
           2       0.82      0.82      0.82       210
           3       0.79      0.87      0.83       210
           4       0.70      0.71      0.71       210
           5       0.88      0.60      0.72       210
           6       0.58      0.68      0.62       210
           7       0.78      0.81      0.79       210

    accuracy                           0.76      1470
   macro avg       0.77      0.76      0.76      1470
weighted avg       0.77      0.76      0.76      1470

Working on SVM Kernal: poly
Accuracy of SVM is: 0.7034013605442176
Confusion Matrix

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 78   0   0  55  77   0   0]
 [  0   0   2 206   2   0   0]
 [  0   0  86 124   0   0   0]
 [  0   0   1 208   1   0   0]
 [ 16   0   1  86 104   0   3]
 [  0   0   0 206   0   0   4]
 [  0   0   0 163   2   0  45]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.83      0.37      0.51       210
           2       0.00      0.00      0.00       210
           3       0.96      0.41      0.57       210
           4       0.20      0.99      0.33       210
           5       0.56      0.50      0.53       210
           6       0.00      0.00      0.00       210
           7       0.87      0.21      0.34       210

    accuracy                           0.35      1470
   macro avg       0.49      0.35      0.33      1470
weighted avg       0.49      0.35      0.33      1470

Decision Tree with 5 max_depth
Accuracy of Decision Tree is: 0.4061224489795918
Confusion Matrix of De

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree is: 0.4557823129251701
Confusion Matrix of Decision Tree is:
 [[119   5   0   0  36  50   0]
 [  0 131   2   0   2  75   0]
 [  0   0  86   0   0 124   0]
 [  0   6   1   0   1 202   0]
 [ 34  22   1   0  87  64   2]
 [  0   3   0   0   0 203   4]
 [  0   1   0   0   2 163  44]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.78      0.57      0.66       210
           2       0.78      0.62      0.69       210
           3       0.96      0.41      0.57       210
           4       0.00      0.00      0.00       210
           5       0.68      0.41      0.51       210
           6       0.23      0.97      0.37       210
           7       0.88      0.21      0.34       210

    accuracy                           0.46      1470
   macro avg       0.61      0.46      0.45      1470
weighted avg       0.61      0.46      0.45      1470

Decision Tree with 8 max_depth
Accuracy of Decision T

Accuracy of Decision Tree is: 0.5489795918367347
Confusion Matrix of Decision Tree is:
 [[135   5   0  40  30   0   0]
 [  0 141   7  54   8   0   0]
 [  0  13 146  50   1   0   0]
 [  0  13   8 179   6   0   4]
 [ 36  23   5  40 104   1   1]
 [  0   4   2 177   1   1  25]
 [  0   1   0  96   5   7 101]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.79      0.64      0.71       210
           2       0.70      0.67      0.69       210
           3       0.87      0.70      0.77       210
           4       0.28      0.85      0.42       210
           5       0.67      0.50      0.57       210
           6       0.11      0.00      0.01       210
           7       0.77      0.48      0.59       210

    accuracy                           0.55      1470
   macro avg       0.60      0.55      0.54      1470
weighted avg       0.60      0.55      0.54      1470

Decision Tree with 16 max_depth
Accuracy of Decision 

Accuracy of Random Forest is: 0.6204081632653061
Confusion Matrix of Random Forest is:
 [[151   4   1  16  11  22   5]
 [  0 128  17  13  15  36   1]
 [  0   0 160  14   0  36   0]
 [  1  12  11 130   6  46   4]
 [ 65  19  11  31  59  10  15]
 [  0   7   5  22   3 130  43]
 [  0   1   0   9   0  46 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.70      0.72      0.71       210
           2       0.75      0.61      0.67       210
           3       0.78      0.76      0.77       210
           4       0.55      0.62      0.58       210
           5       0.63      0.28      0.39       210
           6       0.40      0.62      0.49       210
           7       0.69      0.73      0.71       210

    accuracy                           0.62      1470
   macro avg       0.64      0.62      0.62      1470
weighted avg       0.64      0.62      0.62      1470

Random Forest with 4 max_depth
Accuracy of Random For

Accuracy of Random Forest is: 0.673469387755102
Confusion Matrix of Random Forest is:
 [[144   5   0   9  23  29   0]
 [  0 140  17  10  14  29   0]
 [  0   0 173  14   0  23   0]
 [  0  11  11 133  10  42   3]
 [ 41  15   9  19 104  12  10]
 [  0   6   5  16   4 137  42]
 [  0   0   0   5   2  44 159]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.69      0.73       210
           2       0.79      0.67      0.72       210
           3       0.80      0.82      0.81       210
           4       0.65      0.63      0.64       210
           5       0.66      0.50      0.57       210
           6       0.43      0.65      0.52       210
           7       0.74      0.76      0.75       210

    accuracy                           0.67      1470
   macro avg       0.69      0.67      0.68      1470
weighted avg       0.69      0.67      0.68      1470

Random Forest with 12 max_depth
Accuracy of Random For

Accuracy of Random Forest is: 0.689795918367347
Confusion Matrix of Random Forest is:
 [[140   5   0   9  28  28   0]
 [  0 143  13  10  14  30   0]
 [  0   4 175   7   1  23   0]
 [  0   7  10 135  12  43   3]
 [ 29  20   7  16 118  11   9]
 [  0   5   2  13   6 144  40]
 [  1   1   0   5   3  41 159]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.67      0.74       210
           2       0.77      0.68      0.72       210
           3       0.85      0.83      0.84       210
           4       0.69      0.64      0.67       210
           5       0.65      0.56      0.60       210
           6       0.45      0.69      0.54       210
           7       0.75      0.76      0.76       210

    accuracy                           0.69      1470
   macro avg       0.71      0.69      0.70      1470
weighted avg       0.71      0.69      0.70      1470

Random Forest with 20 max_depth
Accuracy of Random For