#### Info:
#### All PCA models are built using the elbow method through the scree plots in Kabita_PCA_Scree_Plots
#### 

In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    from sklearn.decomposition import FastICA
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Kabita//Input//kabita_dataset_labels.csv")

In [23]:
# Function of Scaling, PCA, ICA
def scale_pca_ica(x_data, y_data, comp):
    scaler_model = StandardScaler()
    scaled_data = scaler_model.fit_transform(x_data)
    #print(scaled_data)
    # Doing PCA giving number of Components(dimensions)
    pca_comp = PCA(n_components=comp)
    pca_data = pca_comp.fit_transform(scaled_data)
    #print(pca_data)
    # Doing ICA on PCA transformed data to make features independent
    ica_comp = FastICA(n_components=comp)
    #ica_comp = FastICA(n_components=comp,max_iter=5000)
    ica_data = ica_comp.fit_transform(pca_data)
    #print(ica_data)
    x_train,x_test,y_train,y_test = train_test_split(ica_data,y_data,test_size=0.30,random_state=21,stratify=y_data)
    return x_train, x_test, y_train, y_test

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" after PCA and ICA is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tfidf_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.37891156462585035
Confusion Matrix of Logistic Regression is:
 [[ 66   0  48   4  74   8  10]
 [ 56   0  70   7  71   3   3]
 [ 18   0 155   1  34   0   2]
 [ 50   0  27  17 100   5  11]
 [ 44   0  23   8 111   4  20]
 [ 22   0  39  13  31   9  96]
 [  0   0   6   1   2   2 199]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.26      0.31      0.28       210
           2       0.00      0.00      0.00       210
           3       0.42      0.74      0.54       210
           4       0.33      0.08      0.13       210
           5       0.26      0.53      0.35       210
           6       0.29      0.04      0.07       210
           7       0.58      0.95      0.72       210

    accuracy                           0.38      1470
   macro avg       0.31      0.38      0.30      1470
weighted avg       0.31      0.38      0.30      1470

KNN with 3 Neigh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.5469387755102041
Confusion Matrix of KNN Model is:
 [[103  21   3  29  27  22   5]
 [ 19 126   8  16  24  15   2]
 [ 16   4 161   8   3  16   2]
 [ 26  21  14 101  19  23   6]
 [ 46  26   6  38  70  14  10]
 [ 27  17   2  16  10  97  41]
 [  4   5   0   4   2  49 146]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.43      0.49      0.46       210
           2       0.57      0.60      0.59       210
           3       0.83      0.77      0.80       210
           4       0.48      0.48      0.48       210
           5       0.45      0.33      0.38       210
           6       0.41      0.46      0.43       210
           7       0.69      0.70      0.69       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.1891156462585034
Confusion Matrix of SVM is:
 [[  2   0 205   1   2   0   0]
 [  1   0 207   1   1   0   0]
 [  0   0 209   1   0   0   0]
 [  8   0 195   3   4   0   0]
 [  5   0 186   2  15   0   2]
 [  3   0 198   1   1   0   7]
 [  0   0 147   4   9   1  49]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.11      0.01      0.02       210
           2       0.00      0.00      0.00       210
           3       0.16      1.00      0.27       210
           4       0.23      0.01      0.03       210
           5       0.47      0.07      0.12       210
           6       0.00      0.00      0.00       210
           7       0.84      0.23      0.37       210

    accuracy                           0.19      1470
   macro avg       0.26      0.19      0.11      1470
weighted avg       0.26      0.19      0.11      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.30952380952380953
Confusion Matrix of SVM is:
 [[ 13  14 156   7  14   6   0]
 [  1  11 188   5   5   0   0]
 [  2   1 206   0   0   1   0]
 [ 12  14 149  24   6   3   2]
 [ 11  24 120  18  24   9   4]
 [  6   9 120   6   5  44  20]
 [  2   4  23   0   5  43 133]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.28      0.06      0.10       210
           2       0.14      0.05      0.08       210
           3       0.21      0.98      0.35       210
           4       0.40      0.11      0.18       210
           5       0.41      0.11      0.18       210
           6       0.42      0.21      0.28       210
           7       0.84      0.63      0.72       210

    accuracy                           0.31      1470
   macro avg       0.38      0.31      0.27      1470
weighted avg       0.38      0.31      0.27      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.5380952380952381
Confusion Matrix of Decision Tree is:
 [[ 94  23   3  23  39  25   3]
 [ 12 125   8  16  28  20   1]
 [  9   9 163   9   6  14   0]
 [ 22  16  10  95  31  31   5]
 [ 30  36  14  36  74  12   8]
 [ 21  18   3  16  12  99  41]
 [  3   5   0   4  10  47 141]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.45      0.47       210
           2       0.54      0.60      0.57       210
           3       0.81      0.78      0.79       210
           4       0.48      0.45      0.46       210
           5       0.37      0.35      0.36       210
           6       0.40      0.47      0.43       210
           7       0.71      0.67      0.69       210

    accuracy                           0.54      1470
   macro avg       0.54      0.54      0.54      1470
weighted avg       0.54      0.54      0.54      1470

Decision Tree with 14 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.32789115646258504
Confusion Matrix of Random Forest is:
 [[  0   0 127  65   0   0  18]
 [  0   0 146  56   0   1   7]
 [  0   0 181  27   0   0   2]
 [  0   0  69 119   0   2  20]
 [  0   0 111  74   0   0  25]
 [  0   0  21  85   0   1 103]
 [  0   0   3  25   0   1 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.28      0.86      0.42       210
           4       0.26      0.57      0.36       210
           5       0.00      0.00      0.00       210
           6       0.20      0.00      0.01       210
           7       0.51      0.86      0.64       210

    accuracy                           0.33      1470
   macro avg       0.18      0.33      0.20      1470
weighted avg       0.18      0.33      0.20      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.38639455782312926
Confusion Matrix of Random Forest is:
 [[ 41   0  72  61   7  21   8]
 [ 57   0  75  53   3  21   1]
 [ 10   0 167  14   0  18   1]
 [ 28   0  38  96   2  37   9]
 [ 64   0  31  69  20  12  14]
 [  2   0  22  37   2  79  68]
 [  0   0   3   4   0  38 165]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.20      0.20      0.20       210
           2       0.00      0.00      0.00       210
           3       0.41      0.80      0.54       210
           4       0.29      0.46      0.35       210
           5       0.59      0.10      0.16       210
           6       0.35      0.38      0.36       210
           7       0.62      0.79      0.69       210

    accuracy                           0.39      1470
   macro avg       0.35      0.39      0.33      1470
weighted avg       0.35      0.39      0.33      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4095238095238095
Confusion Matrix of Random Forest is:
 [[ 64   0  34  59  22  24   7]
 [ 79   0  43  48   9  31   0]
 [ 32   0 144  12   1  20   1]
 [ 28   0  29  89  11  46   7]
 [ 46   0  22  67  47  16  12]
 [  5   0  17  30   2  93  63]
 [  0   0   3   3   0  39 165]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.25      0.30      0.28       210
           2       0.00      0.00      0.00       210
           3       0.49      0.69      0.57       210
           4       0.29      0.42      0.34       210
           5       0.51      0.22      0.31       210
           6       0.35      0.44      0.39       210
           7       0.65      0.79      0.71       210

    accuracy                           0.41      1470
   macro avg       0.36      0.41      0.37      1470
weighted avg       0.36      0.41      0.37      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.43537414965986393
Confusion Matrix of Random Forest is:
 [[ 57   2  31  52  35  27   6]
 [ 56  16  43  47  22  24   2]
 [ 20  12 141  12   4  20   1]
 [ 19   9  23  88  23  44   4]
 [ 33   5  19  49  76  17  11]
 [  4   1  12  29  10  90  64]
 [  0   0   3   2   0  33 172]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.30      0.27      0.29       210
           2       0.36      0.08      0.13       210
           3       0.52      0.67      0.59       210
           4       0.32      0.42      0.36       210
           5       0.45      0.36      0.40       210
           6       0.35      0.43      0.39       210
           7       0.66      0.82      0.73       210

    accuracy                           0.44      1470
   macro avg       0.42      0.44      0.41      1470
weighted avg       0.42      0.44      0.41      1470

Random Forest with 5 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5993197278911565
Confusion Matrix of Random Forest is:
 [[ 89  11   0  31  45  30   4]
 [ 13 127   8  22  23  15   2]
 [  5   4 167  12   5  16   1]
 [ 17  10   8 120  23  28   4]
 [ 18   9   9  42 107  14  11]
 [  9   9   1  18  13 100  60]
 [  0   2   0   1   2  34 171]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.42      0.49       210
           2       0.74      0.60      0.66       210
           3       0.87      0.80      0.83       210
           4       0.49      0.57      0.53       210
           5       0.49      0.51      0.50       210
           6       0.42      0.48      0.45       210
           7       0.68      0.81      0.74       210

    accuracy                           0.60      1470
   macro avg       0.61      0.60      0.60      1470
weighted avg       0.61      0.60      0.60      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5918367346938775
Confusion Matrix of Random Forest is:
 [[ 92   9   2  34  40  28   5]
 [ 13 129   8  20  21  17   2]
 [  6   6 165  12   4  16   1]
 [ 20  11   8 112  27  28   4]
 [ 27  17   6  36 100  12  12]
 [ 13  14   1  11  15 102  54]
 [  0   1   0   2   3  34 170]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.54      0.44      0.48       210
           2       0.69      0.61      0.65       210
           3       0.87      0.79      0.82       210
           4       0.49      0.53      0.51       210
           5       0.48      0.48      0.48       210
           6       0.43      0.49      0.46       210
           7       0.69      0.81      0.74       210

    accuracy                           0.59      1470
   macro avg       0.60      0.59      0.59      1470
weighted avg       0.60      0.59      0.59      1470

Accuracy of Multinomial Naive Bayes

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//cv_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],5)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.373469387755102
Confusion Matrix of Logistic Regression is:
 [[ 22   2  99   0  78   0   9]
 [  4   4 157   0  44   0   1]
 [  5   0 200   0   4   0   1]
 [ 21   3  90   1  85   0  10]
 [  8   1  53   0 133   0  15]
 [  6   1  94   0  26   1  82]
 [  1   1  17   0   2   1 188]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.33      0.10      0.16       210
           2       0.33      0.02      0.04       210
           3       0.28      0.95      0.43       210
           4       1.00      0.00      0.01       210
           5       0.36      0.63      0.46       210
           6       0.50      0.00      0.01       210
           7       0.61      0.90      0.73       210

    accuracy                           0.37      1470
   macro avg       0.49      0.37      0.26      1470
weighted avg       0.49      0.37      0.26      1470

KNN with 3 Neighbo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.15714285714285714
Confusion Matrix of SVM is:
 [[  0  33 170   7   0   0   0]
 [  0   9 201   0   0   0   0]
 [  0   0 210   0   0   0   0]
 [  3  25 175   7   0   0   0]
 [  7  52 137  13   1   0   0]
 [  1  12 196   1   0   0   0]
 [  1   7 187   5   0   6   4]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.07      0.04      0.05       210
           3       0.16      1.00      0.28       210
           4       0.21      0.03      0.06       210
           5       1.00      0.00      0.01       210
           6       0.00      0.00      0.00       210
           7       1.00      0.02      0.04       210

    accuracy                           0.16      1470
   macro avg       0.35      0.16      0.06      1470
weighted avg       0.35      0.16      0.06      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[109  13   1  26  37  19   5]
 [ 21 121  12  20  16  19   1]
 [  7   9 164   8   4  17   1]
 [ 31  23  13  71  34  34   4]
 [ 43  22   5  32  84  18   6]
 [ 13  11   1  23   9 103  50]
 [  3   2   0   4  11  48 142]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.48      0.52      0.50       210
           2       0.60      0.58      0.59       210
           3       0.84      0.78      0.81       210
           4       0.39      0.34      0.36       210
           5       0.43      0.40      0.41       210
           6       0.40      0.49      0.44       210
           7       0.68      0.68      0.68       210

    accuracy                           0.54      1470
   macro avg       0.54      0.54      0.54      1470
weighted avg       0.54      0.54      0.54      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.54421768707483
Confu

Accuracy of Random Forest after PCA and ICA is: 0.3836734693877551
Confusion Matrix of Random Forest is:
 [[168   7  10   0   6   0  19]
 [ 70  37  95   0   0   0   8]
 [ 38  11 160   0   0   0   1]
 [ 95  21  55   0  15   0  24]
 [117  14  29   0   7   0  43]
 [ 47   4  55   0   4   0 100]
 [  5   1  10   0   2   0 192]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.31      0.80      0.45       210
           2       0.39      0.18      0.24       210
           3       0.39      0.76      0.51       210
           4       0.00      0.00      0.00       210
           5       0.21      0.03      0.06       210
           6       0.00      0.00      0.00       210
           7       0.50      0.91      0.64       210

    accuracy                           0.38      1470
   macro avg       0.26      0.38      0.27      1470
weighted avg       0.26      0.38      0.27      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3979591836734694
Confusion Matrix of Random Forest is:
 [[152   8  17   7   7   0  19]
 [ 56  12 102  30   3   0   7]
 [ 11  11 180   7   0   0   1]
 [ 84   8  41  35  18   0  24]
 [115  10  15  19   9   0  42]
 [ 29  15  52  12   4   2  96]
 [  6   1   7   1   0   0 195]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.34      0.72      0.46       210
           2       0.18      0.06      0.09       210
           3       0.43      0.86      0.58       210
           4       0.32      0.17      0.22       210
           5       0.22      0.04      0.07       210
           6       1.00      0.01      0.02       210
           7       0.51      0.93      0.66       210

    accuracy                           0.40      1470
   macro avg       0.43      0.40      0.30      1470
weighted avg       0.43      0.40      0.30      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5884353741496599
Confusion Matrix of Random Forest is:
 [[ 88   3   0  30  69  17   3]
 [ 13 122   9  25  25  16   0]
 [  9   8 160  10   4  18   1]
 [ 15   9  14  93  45  26   8]
 [ 23   8   5  29 126   9  10]
 [  6   3   3  26  11 103  58]
 [  0   1   0   6   2  28 173]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.57      0.42      0.48       210
           2       0.79      0.58      0.67       210
           3       0.84      0.76      0.80       210
           4       0.42      0.44      0.43       210
           5       0.45      0.60      0.51       210
           6       0.47      0.49      0.48       210
           7       0.68      0.82      0.75       210

    accuracy                           0.59      1470
   macro avg       0.60      0.59      0.59      1470
weighted avg       0.60      0.59      0.59      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5884353741496599
Confusion Matrix of Random Forest is:
 [[ 92   7   0  29  62  18   2]
 [ 12 123  10  23  26  16   0]
 [  6   6 166  11   4  16   1]
 [ 11  11  14  91  45  32   6]
 [ 24   8   4  38 116  11   9]
 [  6   7   2  24  12 102  57]
 [  0   0   0   6   3  26 175]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.44      0.51       210
           2       0.76      0.59      0.66       210
           3       0.85      0.79      0.82       210
           4       0.41      0.43      0.42       210
           5       0.43      0.55      0.49       210
           6       0.46      0.49      0.47       210
           7       0.70      0.83      0.76       210

    accuracy                           0.59      1470
   macro avg       0.60      0.59      0.59      1470
weighted avg       0.60      0.59      0.59      1470

Random Forest with 19 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tf_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3653061224489796
Confusion Matrix of Logistic Regression is:
 [[ 81   0  38   4  68   8  11]
 [106   0  30   3  63   5   3]
 [ 44   0 138   1  25   1   1]
 [ 46   0  32   2 110  10  10]
 [ 57   0  20   2 102   8  21]
 [  8   0  46   6  38  14  98]
 [  0   0   6   0   1   3 200]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.24      0.39      0.29       210
           2       0.00      0.00      0.00       210
           3       0.45      0.66      0.53       210
           4       0.11      0.01      0.02       210
           5       0.25      0.49      0.33       210
           6       0.29      0.07      0.11       210
           7       0.58      0.95      0.72       210

    accuracy                           0.37      1470
   macro avg       0.27      0.37      0.29      1470
weighted avg       0.27      0.37      0.29      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of KNN Model is:
 [[109  17   5  27  37  12   3]
 [ 31 131  11  23  10   4   0]
 [ 13  16 168   7   4   2   0]
 [ 38  28  12  82  32  14   4]
 [ 50  26   4  46  63  11  10]
 [ 26  41   2  21  13  69  38]
 [  4   7   0   0   9  43 147]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.40      0.52      0.45       210
           2       0.49      0.62      0.55       210
           3       0.83      0.80      0.82       210
           4       0.40      0.39      0.39       210
           5       0.38      0.30      0.33       210
           6       0.45      0.33      0.38       210
           7       0.73      0.70      0.71       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.52      1470
weighted avg       0.52      0.52      0.52      1470

KNN with 5 Neighbors
Accuracy of KNN Model after PCA and ICA is: 0.5340136054421769
Confusion Matrix of KNN Mo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.18979591836734694
Confusion Matrix of SVM is:
 [[  2   0 191   0  17   0   0]
 [  3   0 201   0   6   0   0]
 [  0   0 208   0   2   0   0]
 [  1   0 183   1  25   0   0]
 [  4   0 166   2  37   0   1]
 [  0   0 193   1  12   3   1]
 [  0   0 151   2  11  18  28]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.20      0.01      0.02       210
           2       0.00      0.00      0.00       210
           3       0.16      0.99      0.28       210
           4       0.17      0.00      0.01       210
           5       0.34      0.18      0.23       210
           6       0.14      0.01      0.03       210
           7       0.93      0.13      0.23       210

    accuracy                           0.19      1470
   macro avg       0.28      0.19      0.11      1470
weighted avg       0.28      0.19      0.11      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3081632653061224
Confusion Matrix of SVM is:
 [[ 17  13 152  15   8   5   0]
 [  6   8 187   6   1   2   0]
 [  1   0 206   1   1   1   0]
 [  8   9 144  36   8   4   1]
 [ 28  12 120  22  15   8   5]
 [  7   9 117   9   3  41  24]
 [  1   2  28   1   6  42 130]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.25      0.08      0.12       210
           2       0.15      0.04      0.06       210
           3       0.22      0.98      0.35       210
           4       0.40      0.17      0.24       210
           5       0.36      0.07      0.12       210
           6       0.40      0.20      0.26       210
           7       0.81      0.62      0.70       210

    accuracy                           0.31      1470
   macro avg       0.37      0.31      0.27      1470
weighted avg       0.37      0.31      0.27      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           1       0.43      0.46      0.45       210
           2       0.50      0.53      0.52       210
           3       0.86      0.73      0.79       210
           4       0.40      0.42      0.41       210
           5       0.35      0.30      0.32       210
           6       0.39      0.45      0.42       210
           7       0.69      0.65      0.67       210

    accuracy                           0.51      1470
   macro avg       0.52      0.51      0.51      1470
weighted avg       0.52      0.51      0.51      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5346938775510204
Confusion Matrix of Decision Tree is:
 [[ 94  19   1  30  42  22   2]
 [ 19 114   9  31  20  16   1]
 [  9   7 164   9   5  16   0]
 [ 25  22  11  97  24  29   2]
 [ 40  24   4  36  72  23  11]
 [ 13  13   1  16  16 106  45]
 [  1   5   0   2  13  50 139]]
Classification Report of Decision Tree is:
    

Confusion Matrix of Decision Tree is:
 [[ 92  18   7  34  35  21   3]
 [ 11 122   9  28  21  18   1]
 [  5   8 169   8   6  14   0]
 [ 25  19  18  86  35  24   3]
 [ 35  25  10  40  70  19  11]
 [ 14  12   2  20  16  99  47]
 [  2   4   0   1  13  55 135]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.44      0.47       210
           2       0.59      0.58      0.58       210
           3       0.79      0.80      0.80       210
           4       0.40      0.41      0.40       210
           5       0.36      0.33      0.34       210
           6       0.40      0.47      0.43       210
           7       0.68      0.64      0.66       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Random Forest with 1 max_depth
Accuracy of Random Forest after PCA and ICA is: 0.336734693877551
Confu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.41904761904761906
Confusion Matrix of Random Forest is:
 [[119   0  13  29   0  40   9]
 [ 98   0  42  34   2  32   2]
 [ 30   0 156  11   0  13   0]
 [ 80   1  23  66   1  30   9]
 [ 97   0  16  38   0  38  21]
 [ 23   0  13  13   0 101  60]
 [  0   0   2   0   0  34 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.27      0.57      0.36       210
           2       0.00      0.00      0.00       210
           3       0.59      0.74      0.66       210
           4       0.35      0.31      0.33       210
           5       0.00      0.00      0.00       210
           6       0.35      0.48      0.41       210
           7       0.63      0.83      0.72       210

    accuracy                           0.42      1470
   macro avg       0.31      0.42      0.35      1470
weighted avg       0.31      0.42      0.35      1470

Random Forest with 3 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5789115646258504
Confusion Matrix of Random Forest is:
 [[ 84   8   4  37  51  23   3]
 [  8 114   9  32  28  19   0]
 [  9   5 162  12   7  15   0]
 [ 20   5   7 114  36  25   3]
 [ 18  11   5  47 100  19  10]
 [  6   7   1  18  15 112  51]
 [  1   1   0   2   3  38 165]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.40      0.47       210
           2       0.75      0.54      0.63       210
           3       0.86      0.77      0.81       210
           4       0.44      0.54      0.48       210
           5       0.42      0.48      0.44       210
           6       0.45      0.53      0.49       210
           7       0.71      0.79      0.75       210

    accuracy                           0.58      1470
   macro avg       0.60      0.58      0.58      1470
weighted avg       0.60      0.58      0.58      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5836734693877551
Confusion Matrix of Random Forest is:
 [[ 94  14   2  32  43  22   3]
 [ 14 121   8  28  22  16   1]
 [  5   3 171   8   7  16   0]
 [ 16  15   9 106  33  28   3]
 [ 24  16   5  42  94  18  11]
 [  6   9   1  19  16 109  50]
 [  1   1   0   0   7  38 163]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.45      0.51       210
           2       0.68      0.58      0.62       210
           3       0.87      0.81      0.84       210
           4       0.45      0.50      0.48       210
           5       0.42      0.45      0.44       210
           6       0.44      0.52      0.48       210
           7       0.71      0.78      0.74       210

    accuracy                           0.58      1470
   macro avg       0.59      0.58      0.59      1470
weighted avg       0.59      0.58      0.59      1470

Random Forest with 19 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.454421768707483
Confusion Matrix of Logistic Regression is:
 [[ 95   0  30   0  54   8  23]
 [  8  45  90   0  47  10  10]
 [ 17   1 183   0   2   7   0]
 [ 35   6  39   0  69  29  32]
 [ 39   6  25   0 121   2  17]
 [ 30   0  26   0  10  50  94]
 [  7   0   0   0   2  27 174]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.41      0.45      0.43       210
           2       0.78      0.21      0.34       210
           3       0.47      0.87      0.61       210
           4       0.00      0.00      0.00       210
           5       0.40      0.58      0.47       210
           6       0.38      0.24      0.29       210
           7       0.50      0.83      0.62       210

    accuracy                           0.45      1470
   macro avg       0.42      0.45      0.39      1470
weighted avg       0.42      0.45      0.39      1470

KNN with 3 Neighbo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.49863945578231295
Confusion Matrix of KNN Model is:
 [[140   5   1  17  24  13  10]
 [ 11 136   9  24  11  16   3]
 [  8  20 159   8   6   9   0]
 [ 40  25   8  62  29  27  19]
 [ 55  33   7  46  58   7   4]
 [ 31  14   5  23  10  66  61]
 [  8   7   2  14   4  63 112]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.48      0.67      0.56       210
           2       0.57      0.65      0.60       210
           3       0.83      0.76      0.79       210
           4       0.32      0.30      0.31       210
           5       0.41      0.28      0.33       210
           6       0.33      0.31      0.32       210
           7       0.54      0.53      0.53       210

    accuracy                           0.50      1470
   macro avg       0.50      0.50      0.49      1470
weighted avg       0.50      0.50      0.49      1470

KNN with 5 Neighbors
Accuracy of KNN Model aft

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.4163265306122449
Confusion Matrix of SVM is:
 [[113   0  41   0  40  10   6]
 [ 15  54  90   0  41   8   2]
 [ 10   2 194   0   0   4   0]
 [ 71   9  45   0  58  13  14]
 [ 65  10  33   0  93   2   7]
 [ 76   5  31   0  10  40  48]
 [ 53   1   0   0   2  36 118]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.28      0.54      0.37       210
           2       0.67      0.26      0.37       210
           3       0.45      0.92      0.60       210
           4       0.00      0.00      0.00       210
           5       0.38      0.44      0.41       210
           6       0.35      0.19      0.25       210
           7       0.61      0.56      0.58       210

    accuracy                           0.42      1470
   macro avg       0.39      0.42      0.37      1470
weighted avg       0.39      0.42      0.37      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.48639455782312924
Confusion Matrix of SVM is:
 [[119   0   1  25  37  20   8]
 [  2  89  17  65  13  23   1]
 [  9  20 138  21   7  15   0]
 [ 13  16  22  65  41  36  17]
 [ 31  18   5  55  92   4   5]
 [  6   9   8  38   8  77  64]
 [  2   1   0  11   2  59 135]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.65      0.57      0.61       210
           2       0.58      0.42      0.49       210
           3       0.72      0.66      0.69       210
           4       0.23      0.31      0.27       210
           5       0.46      0.44      0.45       210
           6       0.33      0.37      0.35       210
           7       0.59      0.64      0.61       210

    accuracy                           0.49      1470
   macro avg       0.51      0.49      0.49      1470
weighted avg       0.51      0.49      0.49      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[120   4   3  13  40  18  12]
 [  7 135   3  25  26  11   3]
 [  9   9 159  10  16   7   0]
 [ 18  19   8  56  58  22  29]
 [ 44  29   1  41  78   9   8]
 [ 17  17   4  16  20  61  75]
 [  4   6   0  13   6  39 142]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.57      0.56       210
           2       0.62      0.64      0.63       210
           3       0.89      0.76      0.82       210
           4       0.32      0.27      0.29       210
           5       0.32      0.37      0.34       210
           6       0.37      0.29      0.32       210
           7       0.53      0.68      0.59       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.49863945578231295
Co

Accuracy of Random Forest after PCA and ICA is: 0.4061224489795918
Confusion Matrix of Random Forest is:
 [[106   0  15   0  56   0  33]
 [  2   0  30   0 157   0  21]
 [ 16   0 151   0  38   0   5]
 [ 24   0  17   0 107   0  62]
 [ 39   0   3   0 149   0  19]
 [ 17   0  22   0  31   0 140]
 [  8   0   0   0  11   0 191]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.50      0.50      0.50       210
           2       0.00      0.00      0.00       210
           3       0.63      0.72      0.67       210
           4       0.00      0.00      0.00       210
           5       0.27      0.71      0.39       210
           6       0.00      0.00      0.00       210
           7       0.41      0.91      0.56       210

    accuracy                           0.41      1470
   macro avg       0.26      0.41      0.30      1470
weighted avg       0.26      0.41      0.30      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4605442176870748
Confusion Matrix of Random Forest is:
 [[116   3   4   0  53   1  33]
 [  2  84  30   0  73   0  21]
 [ 16  30 151   0   8   0   5]
 [ 23  21  17   0  85   0  64]
 [ 40  19   3   0 129   0  19]
 [ 14   4  22   0  25   1 144]
 [  4   0   0   0  10   0 196]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.54      0.55      0.55       210
           2       0.52      0.40      0.45       210
           3       0.67      0.72      0.69       210
           4       0.00      0.00      0.00       210
           5       0.34      0.61      0.44       210
           6       0.50      0.00      0.01       210
           7       0.41      0.93      0.57       210

    accuracy                           0.46      1470
   macro avg       0.42      0.46      0.39      1470
weighted avg       0.42      0.46      0.39      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.49047619047619045
Confusion Matrix of Random Forest is:
 [[116   3   3   0  53  11  24]
 [  2 113  30   0  43  10  12]
 [ 18  32 149   0   6   5   0]
 [ 19  34  17   0  75  16  49]
 [ 33  32   3   0 123   3  16]
 [ 11  11  22   0  20  30 116]
 [  1   1   0   0   9   9 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.55      0.57       210
           2       0.50      0.54      0.52       210
           3       0.67      0.71      0.69       210
           4       0.00      0.00      0.00       210
           5       0.37      0.59      0.46       210
           6       0.36      0.14      0.20       210
           7       0.47      0.90      0.62       210

    accuracy                           0.49      1470
   macro avg       0.42      0.49      0.44      1470
weighted avg       0.42      0.49      0.44      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.507482993197279
Confusion Matrix of Random Forest is:
 [[117   4   3   0  52  13  21]
 [  2 126  16   0  42  14  10]
 [ 13  34 148   0   9   6   0]
 [ 18  33  17   2  76  26  38]
 [ 31  35   3   1 121   3  16]
 [ 10  10  19   1  20  48 102]
 [  1   2   0   0   7  16 184]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.56      0.58       210
           2       0.52      0.60      0.56       210
           3       0.72      0.70      0.71       210
           4       0.50      0.01      0.02       210
           5       0.37      0.58      0.45       210
           6       0.38      0.23      0.29       210
           7       0.50      0.88      0.63       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.46      1470
weighted avg       0.51      0.51      0.46      1470

Random Forest with 5 max_depth
Accur

Accuracy of Random Forest after PCA and ICA is: 0.5653061224489796
Confusion Matrix of Random Forest is:
 [[129   5   1  12  38  12  13]
 [  4 129   7  24  24  20   2]
 [  7   8 170  10   6   9   0]
 [ 14  14   9  53  58  29  33]
 [ 35  22   3  27 107   8   8]
 [ 13   7   6   7  15  83  79]
 [  6   3   0   4   4  33 160]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.61      0.62       210
           2       0.69      0.61      0.65       210
           3       0.87      0.81      0.84       210
           4       0.39      0.25      0.31       210
           5       0.42      0.51      0.46       210
           6       0.43      0.40      0.41       210
           7       0.54      0.76      0.63       210

    accuracy                           0.57      1470
   macro avg       0.57      0.57      0.56      1470
weighted avg       0.57      0.57      0.56      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5489795918367347
Confusion Matrix of Random Forest is:
 [[132   4   1  16  31  14  12]
 [  4 126  10  23  24  21   2]
 [  8   9 171   9   6   7   0]
 [ 16  11  10  72  47  27  27]
 [ 39  27   3  46  79   9   7]
 [ 16   6   4  12  15  79  78]
 [  4   4   0  12   4  38 148]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.60      0.63      0.62       210
           2       0.67      0.60      0.63       210
           3       0.86      0.81      0.84       210
           4       0.38      0.34      0.36       210
           5       0.38      0.38      0.38       210
           6       0.41      0.38      0.39       210
           7       0.54      0.70      0.61       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Accuracy of Multinomial Naive Bayes

In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_gkb.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.2891156462585034
Confusion Matrix of Logistic Regression is:
 [[ 78   0  79   0   1   0  52]
 [ 33   0 122   0   2   0  53]
 [ 19   0 172   0   1   0  18]
 [ 32   0 136   0   5   0  37]
 [ 29   0 146   0   3   0  32]
 [ 33   0  41   0   0   0 136]
 [ 29   0   9   0   0   0 172]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.31      0.37      0.34       210
           2       0.00      0.00      0.00       210
           3       0.24      0.82      0.38       210
           4       0.00      0.00      0.00       210
           5       0.25      0.01      0.03       210
           6       0.00      0.00      0.00       210
           7       0.34      0.82      0.48       210

    accuracy                           0.29      1470
   macro avg       0.16      0.29      0.17      1470
weighted avg       0.16      0.29      0.17      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.3761904761904762
Confusion Matrix of KNN Model is:
 [[100  18  13  29  26  10  14]
 [ 39  93  10  21  27   7  13]
 [ 15  20 155   7   5   7   1]
 [ 55  28  11  47  40   9  20]
 [ 52  43  10  29  50  11  15]
 [ 47  26   6  19  11  43  58]
 [ 30  23   2  16   8  66  65]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.30      0.48      0.36       210
           2       0.37      0.44      0.40       210
           3       0.75      0.74      0.74       210
           4       0.28      0.22      0.25       210
           5       0.30      0.24      0.27       210
           6       0.28      0.20      0.24       210
           7       0.35      0.31      0.33       210

    accuracy                           0.38      1470
   macro avg       0.37      0.38      0.37      1470
weighted avg       0.37      0.38      0.37      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.2782312925170068
Confusion Matrix of SVM is:
 [[ 54   0  77   0  13  13  53]
 [ 23   0 123   0  10   2  52]
 [ 14   0 172   0   6   2  16]
 [ 23   0 133   0  17   3  34]
 [ 22   0 145   0  11   0  32]
 [ 27   0  38   0   8   6 131]
 [ 26   0   9   0   2   7 166]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.29      0.26      0.27       210
           2       0.00      0.00      0.00       210
           3       0.25      0.82      0.38       210
           4       0.00      0.00      0.00       210
           5       0.16      0.05      0.08       210
           6       0.18      0.03      0.05       210
           7       0.34      0.79      0.48       210

    accuracy                           0.28      1470
   macro avg       0.17      0.28      0.18      1470
weighted avg       0.17      0.28      0.18      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3006802721088435
Confusion Matrix of SVM is:
 [[ 78   0  86   8   1  17  20]
 [ 22   0 131   5   1   7  44]
 [  9   0 176   2   0  13  10]
 [ 23   0 147   6   0   7  27]
 [ 23   0 153   2   0  10  22]
 [ 40   0  45   2   0  50  73]
 [ 28   0  11   5   0  34 132]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.35      0.37      0.36       210
           2       0.00      0.00      0.00       210
           3       0.23      0.84      0.37       210
           4       0.20      0.03      0.05       210
           5       0.00      0.00      0.00       210
           6       0.36      0.24      0.29       210
           7       0.40      0.63      0.49       210

    accuracy                           0.30      1470
   macro avg       0.22      0.30      0.22      1470
weighted avg       0.22      0.30      0.22      1470

Working on SVM Kernal: rbf


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3326530612244898
Confusion Matrix of SVM is:
 [[ 90   0  58  17   6  18  21]
 [ 34   0 117   6   2   8  43]
 [ 13   0 175   3   4  13   2]
 [ 32   0  94  30  20   7  27]
 [ 28   0 122  15  13  10  22]
 [ 42   0  27  11   4  53  73]
 [ 31   0  11   1   2  37 128]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.33      0.43      0.38       210
           2       0.00      0.00      0.00       210
           3       0.29      0.83      0.43       210
           4       0.36      0.14      0.20       210
           5       0.25      0.06      0.10       210
           6       0.36      0.25      0.30       210
           7       0.41      0.61      0.49       210

    accuracy                           0.33      1470
   macro avg       0.29      0.33      0.27      1470
weighted avg       0.29      0.33      0.27      1470

Working on SVM Kernal: sigmoid


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.23605442176870747
Confusion Matrix of SVM is:
 [[ 69  22  53  20   0   2  44]
 [ 50   5 101   4   0  11  39]
 [ 38   2 155   2   0   0  13]
 [ 81   7  87   4   0   9  22]
 [ 62   8 109   0   0   9  22]
 [ 51  21  22   9   0  26  81]
 [ 41  11   6   5   0  59  88]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.18      0.33      0.23       210
           2       0.07      0.02      0.03       210
           3       0.29      0.74      0.42       210
           4       0.09      0.02      0.03       210
           5       0.00      0.00      0.00       210
           6       0.22      0.12      0.16       210
           7       0.28      0.42      0.34       210

    accuracy                           0.24      1470
   macro avg       0.16      0.24      0.17      1470
weighted avg       0.16      0.24      0.17      1470

Decision Tree with 1 max_depth
Accuracy of Decision Tree after P

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.40680272108843535
Confusion Matrix of Decision Tree is:
 [[100  16   8  35  26  16   9]
 [ 27  82   5  25  36  22  13]
 [ 11  11 154   8  20   3   3]
 [ 44  11   8  60  47   9  31]
 [ 39  19   7  38  67  21  19]
 [ 41  19   6  21  17  42  64]
 [ 29  15   4  10  13  46  93]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.48      0.40       210
           2       0.47      0.39      0.43       210
           3       0.80      0.73      0.77       210
           4       0.30      0.29      0.29       210
           5       0.30      0.32      0.31       210
           6       0.26      0.20      0.23       210
           7       0.40      0.44      0.42       210

    accuracy                           0.41      1470
   macro avg       0.41      0.41      0.41      1470
weighted avg       0.41      0.41      0.41      1470

Decision Tree with 14 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.2571428571428571
Confusion Matrix of Random Forest is:
 [[ 37   0  79   0   2   0  92]
 [ 17   0 124   0   0   0  69]
 [ 18   0 176   0   0   0  16]
 [ 40   0 120   0  10   0  40]
 [ 16   0 147   0   1   0  46]
 [ 27   0  35   0   4   0 144]
 [ 35   0  11   0   0   0 164]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.19      0.18      0.18       210
           2       0.00      0.00      0.00       210
           3       0.25      0.84      0.39       210
           4       0.00      0.00      0.00       210
           5       0.06      0.00      0.01       210
           6       0.00      0.00      0.00       210
           7       0.29      0.78      0.42       210

    accuracy                           0.26      1470
   macro avg       0.11      0.26      0.14      1470
weighted avg       0.11      0.26      0.14      1470

Random Forest with 2 max_depth
Accu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Random Forest is:
 [[104   0  40  12  25   3  26]
 [ 36   0  86   5  32   1  50]
 [ 19   0 160   3  12   0  16]
 [ 56   0  56  21  47   0  30]
 [ 37   0  84   7  55   1  26]
 [ 51   0  20  12   7   2 118]
 [ 40   0   2   0   7   1 160]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.30      0.50      0.38       210
           2       0.00      0.00      0.00       210
           3       0.36      0.76      0.49       210
           4       0.35      0.10      0.16       210
           5       0.30      0.26      0.28       210
           6       0.25      0.01      0.02       210
           7       0.38      0.76      0.50       210

    accuracy                           0.34      1470
   macro avg       0.28      0.34      0.26      1470
weighted avg       0.28      0.34      0.26      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.354421768707483
Confusion Matrix of Random Forest is:
 [[104   1  15  24  35  13  18]
 [ 37  15  52  10  45  10  41]
 [ 24   4 124   3  41   1  13]
 [ 46   0  25  39  69   4  27]
 [ 35   4  37  27  78   6  23]
 [ 53   1  10  16  11  17 102]
 [ 46   0   2   3   4  11 144]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.30      0.50      0.37       210
           2       0.60      0.07      0.13       210
           3       0.47      0.59      0.52       210
           4       0.32      0.19      0.23       210
           5       0.28      0.37      0.32       210
           6       0.27      0.08      0.12       210
           7       0.39      0.69      0.50       210

    accuracy                           0.35      1470
   macro avg       0.38      0.35      0.31      1470
weighted avg       0.38      0.35      0.31      1470

Random Forest with 4 max_depth
Accur

Accuracy of Random Forest after PCA and ICA is: 0.4448979591836735
Confusion Matrix of Random Forest is:
 [[ 87  12   3  32  38  12  26]
 [ 24  87   8  13  49   7  22]
 [  5  13 155   9  21   2   5]
 [ 29   6   8  54  70  12  31]
 [ 29  23   8  31  89  11  19]
 [ 23   6   3  26  15  59  78]
 [ 22   8   2   4   9  42 123]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.41      0.41       210
           2       0.56      0.41      0.48       210
           3       0.83      0.74      0.78       210
           4       0.32      0.26      0.28       210
           5       0.31      0.42      0.36       210
           6       0.41      0.28      0.33       210
           7       0.40      0.59      0.48       210

    accuracy                           0.44      1470
   macro avg       0.46      0.44      0.44      1470
weighted avg       0.46      0.44      0.44      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4217687074829932
Confusion Matrix of Random Forest is:
 [[ 79  16  10  35  32  15  23]
 [ 25  90   9  23  36  13  14]
 [  6   8 164   9  16   4   3]
 [ 27  17   9  57  56  17  27]
 [ 34  28   9  38  60  22  19]
 [ 23  20   5  21  15  63  63]
 [ 20  10   2  17   8  46 107]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.37      0.38      0.37       210
           2       0.48      0.43      0.45       210
           3       0.79      0.78      0.78       210
           4       0.28      0.27      0.28       210
           5       0.27      0.29      0.28       210
           6       0.35      0.30      0.32       210
           7       0.42      0.51      0.46       210

    accuracy                           0.42      1470
   macro avg       0.42      0.42      0.42      1470
weighted avg       0.42      0.42      0.42      1470

Random Forest with 20 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_ndisbert.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],2)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3258503401360544
Confusion Matrix of Logistic Regression is:
 [[ 93  35  44   0   0   0  38]
 [ 17 110  31   3   0   0  49]
 [ 62  47  87   0   0   0  14]
 [ 84  38  48   0   0   0  40]
 [ 60  53  62   0   0   0  35]
 [ 18  42   6   0   0   0 144]
 [  0  21   0   0   0   0 189]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.28      0.44      0.34       210
           2       0.32      0.52      0.40       210
           3       0.31      0.41      0.36       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.37      0.90      0.53       210

    accuracy                           0.33      1470
   macro avg       0.18      0.33      0.23      1470
weighted avg       0.18      0.33      0.23      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.4204081632653061
Confusion Matrix of KNN Model is:
 [[ 95  21  12  33  22  22   5]
 [ 19 109   5  24  28  17   8]
 [ 26   7 140  20  14   3   0]
 [ 66  28  14  53  29  11   9]
 [ 59  39  16  45  37   6   8]
 [ 29  23   4  15  12  57  70]
 [  7  19   2   3   3  49 127]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.32      0.45      0.37       210
           2       0.44      0.52      0.48       210
           3       0.73      0.67      0.69       210
           4       0.27      0.25      0.26       210
           5       0.26      0.18      0.21       210
           6       0.35      0.27      0.30       210
           7       0.56      0.60      0.58       210

    accuracy                           0.42      1470
   macro avg       0.42      0.42      0.41      1470
weighted avg       0.42      0.42      0.41      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.35034013605442177
Confusion Matrix of SVM is:
 [[ 91  38  56   0   0   1  24]
 [ 25 120  36   0   0   0  29]
 [ 49  20 132   0   0   1   8]
 [ 85  42  54   0   0   1  28]
 [ 63  62  70   1   0   1  13]
 [ 20  57   7   0   0   2 124]
 [  0  40   0   0   0   0 170]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.27      0.43      0.34       210
           2       0.32      0.57      0.41       210
           3       0.37      0.63      0.47       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.33      0.01      0.02       210
           7       0.43      0.81      0.56       210

    accuracy                           0.35      1470
   macro avg       0.25      0.35      0.26      1470
weighted avg       0.25      0.35      0.26      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3843537414965986
Confusion Matrix of SVM is:
 [[ 62  29  35  60   7  11   6]
 [ 18  93  31  35   6  16  11]
 [ 21  16 112  33  23   3   2]
 [ 45  33  35  65  13  13   6]
 [ 31  40  68  47  13   7   4]
 [ 14  36   0  28   0  51  81]
 [  3  12   0   0   0  26 169]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.32      0.30      0.31       210
           2       0.36      0.44      0.40       210
           3       0.40      0.53      0.46       210
           4       0.24      0.31      0.27       210
           5       0.21      0.06      0.10       210
           6       0.40      0.24      0.30       210
           7       0.61      0.80      0.69       210

    accuracy                           0.38      1470
   macro avg       0.36      0.38      0.36      1470
weighted avg       0.36      0.38      0.36      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.427891156462585
Confusion Matrix of Decision Tree is:
 [[ 76  19   8  52  33  17   5]
 [ 10  90   9  30  36  22  13]
 [ 17   6 147  26   9   4   1]
 [ 52  19  12  73  34  16   4]
 [ 17  28  20  57  67  15   6]
 [ 14  29   7  26  12  56  66]
 [  2  14   6   2   4  62 120]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.36      0.38       210
           2       0.44      0.43      0.43       210
           3       0.70      0.70      0.70       210
           4       0.27      0.35      0.31       210
           5       0.34      0.32      0.33       210
           6       0.29      0.27      0.28       210
           7       0.56      0.57      0.56       210

    accuracy                           0.43      1470
   macro avg       0.43      0.43      0.43      1470
weighted avg       0.43      0.43      0.43      1470

Decision Tree with 14 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.31020408163265306
Confusion Matrix of Random Forest is:
 [[ 74   3 120   0   0   0  13]
 [  8   8 166   0   0   0  28]
 [ 30   1 177   0   0   0   2]
 [ 76   3 117   0   0   0  14]
 [ 50   4 144   0   0   0  12]
 [ 31   4  48   0   0   0 127]
 [  1   2  10   0   0   0 197]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.27      0.35      0.31       210
           2       0.32      0.04      0.07       210
           3       0.23      0.84      0.36       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.50      0.94      0.65       210

    accuracy                           0.31      1470
   macro avg       0.19      0.31      0.20      1470
weighted avg       0.19      0.31      0.20      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3768707482993197
Confusion Matrix of Random Forest is:
 [[ 73  42  61  13   6   1  14]
 [  9 117  35   5  16   1  27]
 [ 30  17 150   0  10   1   2]
 [ 76  47  61   6   5   1  14]
 [ 51  57  74   4  11   3  10]
 [ 31  44   7   4   1   3 120]
 [  1  12   0   2   0   1 194]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.27      0.35      0.30       210
           2       0.35      0.56      0.43       210
           3       0.39      0.71      0.50       210
           4       0.18      0.03      0.05       210
           5       0.22      0.05      0.08       210
           6       0.27      0.01      0.03       210
           7       0.51      0.92      0.66       210

    accuracy                           0.38      1470
   macro avg       0.31      0.38      0.29      1470
weighted avg       0.31      0.38      0.29      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.45918367346938777
Confusion Matrix of Random Forest is:
 [[ 92  19  10  30  36  17   6]
 [  9  91   5  26  39  24  16]
 [ 18   7 139  26  18   2   0]
 [ 49  17  10  67  43  14  10]
 [ 30  27  10  45  73  18   7]
 [ 14  18   3  18  16  53  88]
 [  0   7   1   2   1  39 160]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.43      0.44      0.44       210
           2       0.49      0.43      0.46       210
           3       0.78      0.66      0.72       210
           4       0.31      0.32      0.32       210
           5       0.32      0.35      0.33       210
           6       0.32      0.25      0.28       210
           7       0.56      0.76      0.64       210

    accuracy                           0.46      1470
   macro avg       0.46      0.46      0.46      1470
weighted avg       0.46      0.46      0.46      1470

Random Forest with 11 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.4448979591836735
Confusion Matrix of Random Forest is:
 [[ 77  22  10  43  34  19   5]
 [ 11  99   9  25  34  18  14]
 [ 17   2 153  16  15   6   1]
 [ 49  19  17  61  41  13  10]
 [ 31  32  14  45  63  16   9]
 [ 13  20   3  21  14  63  76]
 [  3   8   1   5   4  51 138]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.38      0.37      0.37       210
           2       0.49      0.47      0.48       210
           3       0.74      0.73      0.73       210
           4       0.28      0.29      0.29       210
           5       0.31      0.30      0.30       210
           6       0.34      0.30      0.32       210
           7       0.55      0.66      0.60       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.44      1470
weighted avg       0.44      0.44      0.44      1470

Random Forest with 19 max_depth
Acc

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_vbert.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4435374149659864
Confusion Matrix of Logistic Regression is:
 [[130   5   9   0  42   1  23]
 [ 17  65  88   0  18   3  19]
 [  7   0 193   0   8   1   1]
 [ 44  16  33   0  61   2  54]
 [ 53   9  22   0  81   0  45]
 [ 18  20  27   0  15   2 128]
 [  2  19   1   0   7   0 181]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.48      0.62      0.54       210
           2       0.49      0.31      0.38       210
           3       0.52      0.92      0.66       210
           4       0.00      0.00      0.00       210
           5       0.35      0.39      0.37       210
           6       0.22      0.01      0.02       210
           7       0.40      0.86      0.55       210

    accuracy                           0.44      1470
   macro avg       0.35      0.44      0.36      1470
weighted avg       0.35      0.44      0.36      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of KNN Model is:
 [[137   8   3  26  24  10   2]
 [  2 148   8  23  10  12   7]
 [  3  13 175   8   6   5   0]
 [ 27  16  16  82  36  28   5]
 [ 55  14  11  38  74  12   6]
 [ 20  26  13  29   4  62  56]
 [  7   9   1   6   7  48 132]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.55      0.65      0.59       210
           2       0.63      0.70      0.67       210
           3       0.77      0.83      0.80       210
           4       0.39      0.39      0.39       210
           5       0.46      0.35      0.40       210
           6       0.35      0.30      0.32       210
           7       0.63      0.63      0.63       210

    accuracy                           0.55      1470
   macro avg       0.54      0.55      0.54      1470
weighted avg       0.54      0.55      0.54      1470

KNN with 5 Neighbors
Accuracy of KNN Model after PCA and ICA is: 0.5578231292517006
Confusion Matrix of KNN Mo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.5210884353741496
Confusion Matrix of SVM is:
 [[ 91   3   1  49  52  12   2]
 [  9 119   9  41  14  13   5]
 [  1   3 158  34   9   5   0]
 [ 17   7  10  98  49  24   5]
 [ 17   4   2  79  95   9   4]
 [ 10  12  12  48  12  62  54]
 [  0   5   0  10   8  44 143]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.63      0.43      0.51       210
           2       0.78      0.57      0.66       210
           3       0.82      0.75      0.79       210
           4       0.27      0.47      0.34       210
           5       0.40      0.45      0.42       210
           6       0.37      0.30      0.33       210
           7       0.67      0.68      0.68       210

    accuracy                           0.52      1470
   macro avg       0.56      0.52      0.53      1470
weighted avg       0.56      0.52      0.53      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[118   4   5  20  48  12   3]
 [  4 119   9  40  20  13   5]
 [  3  10 170  12  10   5   0]
 [ 26  11   8  57  61  41   6]
 [ 50   7   5  39  84  18   7]
 [ 12  13  10  27  21  59  68]
 [  6  12   0   8  10  47 127]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.54      0.56      0.55       210
           2       0.68      0.57      0.62       210
           3       0.82      0.81      0.82       210
           4       0.28      0.27      0.28       210
           5       0.33      0.40      0.36       210
           6       0.30      0.28      0.29       210
           7       0.59      0.60      0.60       210

    accuracy                           0.50      1470
   macro avg       0.51      0.50      0.50      1470
weighted avg       0.51      0.50      0.50      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5020408163265306
Con

Confusion Matrix of Decision Tree is:
 [[110   5   5  31  44  12   3]
 [  7 124  13  29  12  18   7]
 [  6   7 170  13   5   9   0]
 [ 31   9   9  61  50  44   6]
 [ 43  15   8  42  67  22  13]
 [  9  16   9  32  19  63  62]
 [  3   8   0   8  15  45 131]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.52      0.53       210
           2       0.67      0.59      0.63       210
           3       0.79      0.81      0.80       210
           4       0.28      0.29      0.29       210
           5       0.32      0.32      0.32       210
           6       0.30      0.30      0.30       210
           7       0.59      0.62      0.61       210

    accuracy                           0.49      1470
   macro avg       0.50      0.49      0.49      1470
weighted avg       0.50      0.49      0.49      1470

Decision Tree with 20 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.49115646258503404
Co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.45170068027210886
Confusion Matrix of Random Forest is:
 [[134   3   8   0  46   2  17]
 [  9  93  58   0  22   0  28]
 [  4   3 161   1  36   0   5]
 [ 31  25  26   1  75   4  48]
 [ 57  15   4   0  90   0  44]
 [ 10  30  22   0  18   6 124]
 [  0  24   1   0   6   0 179]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.55      0.64      0.59       210
           2       0.48      0.44      0.46       210
           3       0.57      0.77      0.66       210
           4       0.50      0.00      0.01       210
           5       0.31      0.43      0.36       210
           6       0.50      0.03      0.05       210
           7       0.40      0.85      0.55       210

    accuracy                           0.45      1470
   macro avg       0.47      0.45      0.38      1470
weighted avg       0.47      0.45      0.38      1470

Random Forest with 3 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5768707482993197
Confusion Matrix of Random Forest is:
 [[118   5   2  23  45  14   3]
 [  3 144   9  21  15  10   8]
 [  4   5 178   8  11   4   0]
 [ 20  14   8  79  55  22  12]
 [ 32   9   5  31 111  12  10]
 [ 12  21  11  20  19  55  72]
 [  5   6   0   5   6  25 163]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.56      0.58       210
           2       0.71      0.69      0.70       210
           3       0.84      0.85      0.84       210
           4       0.42      0.38      0.40       210
           5       0.42      0.53      0.47       210
           6       0.39      0.26      0.31       210
           7       0.61      0.78      0.68       210

    accuracy                           0.58      1470
   macro avg       0.57      0.58      0.57      1470
weighted avg       0.57      0.58      0.57      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5591836734693878
Confusion Matrix of Random Forest is:
 [[122   6   2  22  45  11   2]
 [  4 138   8  24  14  15   7]
 [  5   6 176   8  11   4   0]
 [ 24  14   8  72  52  28  12]
 [ 39  11   4  42  95  10   9]
 [ 12  19  13  24  14  62  66]
 [  4   6   0   6   4  33 157]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.58      0.58       210
           2       0.69      0.66      0.67       210
           3       0.83      0.84      0.84       210
           4       0.36      0.34      0.35       210
           5       0.40      0.45      0.43       210
           6       0.38      0.30      0.33       210
           7       0.62      0.75      0.68       210

    accuracy                           0.56      1470
   macro avg       0.55      0.56      0.55      1470
weighted avg       0.55      0.56      0.55      1470

Random Forest with 19 max_depth
Acc

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//gpt_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4095238095238095
Confusion Matrix of Logistic Regression is:
 [[ 79   7  23  12  38   7  44]
 [ 18  75  60   8  24   2  23]
 [ 17   0 178   3   1   3   8]
 [ 35  27  34  17  27   3  67]
 [ 52  24   8  15  64   1  46]
 [ 18  21  33   7   4   5 122]
 [  2  16   5   1   0   2 184]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.36      0.38      0.37       210
           2       0.44      0.36      0.39       210
           3       0.52      0.85      0.65       210
           4       0.27      0.08      0.12       210
           5       0.41      0.30      0.35       210
           6       0.22      0.02      0.04       210
           7       0.37      0.88      0.52       210

    accuracy                           0.41      1470
   macro avg       0.37      0.41      0.35      1470
weighted avg       0.37      0.41      0.35      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.40272108843537413
Confusion Matrix of SVM is:
 [[ 83   6  20  11   8  36  46]
 [ 14  75  56  16   2  27  20]
 [ 15   0 173   1   0  13   8]
 [ 30  18  30  25   1  30  76]
 [ 60  27   4  19  12  31  57]
 [ 15  15  23   4   0  30 123]
 [  0   7   1   0   0   8 194]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.38      0.40      0.39       210
           2       0.51      0.36      0.42       210
           3       0.56      0.82      0.67       210
           4       0.33      0.12      0.17       210
           5       0.52      0.06      0.10       210
           6       0.17      0.14      0.16       210
           7       0.37      0.92      0.53       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.35      1470
weighted avg       0.41      0.40      0.35      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 79   6   6  23  60  17  19]
 [ 12 108   3  27  24  23  13]
 [ 12   6 164   7   4  15   2]
 [ 28  19  13  51  37  27  35]
 [ 37  27   1  29  79  23  14]
 [ 17  23   8  26  13  64  59]
 [  6  13   0  18   6  44 123]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.41      0.38      0.39       210
           2       0.53      0.51      0.52       210
           3       0.84      0.78      0.81       210
           4       0.28      0.24      0.26       210
           5       0.35      0.38      0.36       210
           6       0.30      0.30      0.30       210
           7       0.46      0.59      0.52       210

    accuracy                           0.45      1470
   macro avg       0.46      0.45      0.45      1470
weighted avg       0.46      0.45      0.45      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.46190476190476193
Co

Confusion Matrix of Decision Tree is:
 [[ 89   9   3  31  47  17  14]
 [  9 109   4  37  23  17  11]
 [  8   7 167  10   5  11   2]
 [ 19  27  16  48  42  24  34]
 [ 33  29   4  37  75  17  15]
 [ 22  17  12  24  14  61  60]
 [ 15  16   2  15  13  43 106]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.42      0.44       210
           2       0.51      0.52      0.51       210
           3       0.80      0.80      0.80       210
           4       0.24      0.23      0.23       210
           5       0.34      0.36      0.35       210
           6       0.32      0.29      0.31       210
           7       0.44      0.50      0.47       210

    accuracy                           0.45      1470
   macro avg       0.44      0.45      0.44      1470
weighted avg       0.44      0.45      0.44      1470

Decision Tree with 20 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.44421768707482995
Co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3687074829931973
Confusion Matrix of Random Forest is:
 [[ 28   1  22   0  94   0  65]
 [  0  32  36   0 111   0  31]
 [  5   2 157   0  29   2  15]
 [  5   5  18   0  98   0  84]
 [  7   5   1   0 139   0  58]
 [  3   4  27   0  38   1 137]
 [  0   3   7   0  15   0 185]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.13      0.22       210
           2       0.62      0.15      0.24       210
           3       0.59      0.75      0.66       210
           4       0.00      0.00      0.00       210
           5       0.27      0.66      0.38       210
           6       0.33      0.00      0.01       210
           7       0.32      0.88      0.47       210

    accuracy                           0.37      1470
   macro avg       0.39      0.37      0.28      1470
weighted avg       0.39      0.37      0.28      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.43537414965986393
Confusion Matrix of Random Forest is:
 [[ 84  10   7   3  53  13  40]
 [  3 106  15   4  55   0  27]
 [  9  20 151  16   1   3  10]
 [ 19  23  16  15  58   4  75]
 [ 37  30   1   1  93   2  46]
 [ 10  33  18   2  14  12 121]
 [  1  20   2   2   3   3 179]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.52      0.40      0.45       210
           2       0.44      0.50      0.47       210
           3       0.72      0.72      0.72       210
           4       0.35      0.07      0.12       210
           5       0.34      0.44      0.38       210
           6       0.32      0.06      0.10       210
           7       0.36      0.85      0.51       210

    accuracy                           0.44      1470
   macro avg       0.43      0.44      0.39      1470
weighted avg       0.43      0.44      0.39      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5136054421768708
Confusion Matrix of Random Forest is:
 [[ 95   4   3  14  59  15  20]
 [  5 124   5  20  29  16  11]
 [  7   5 169  13   4  11   1]
 [ 22  15  14  58  35  24  42]
 [ 31  23   0  29  91  12  24]
 [ 11  15  12  17  14  62  79]
 [  3   7   1  12   3  28 156]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.55      0.45      0.49       210
           2       0.64      0.59      0.62       210
           3       0.83      0.80      0.82       210
           4       0.36      0.28      0.31       210
           5       0.39      0.43      0.41       210
           6       0.37      0.30      0.33       210
           7       0.47      0.74      0.57       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4959183673469388
Confusion Matrix of Random Forest is:
 [[ 96   3   2  22  54  20  13]
 [  3 121   5  22  30  15  14]
 [  9   4 168  11   5  12   1]
 [ 20  17  14  58  40  24  37]
 [ 32  23   0  36  87   7  25]
 [ 11  16  11  16  18  61  77]
 [  2   7   0   9   6  48 138]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.55      0.46      0.50       210
           2       0.63      0.58      0.60       210
           3       0.84      0.80      0.82       210
           4       0.33      0.28      0.30       210
           5       0.36      0.41      0.39       210
           6       0.33      0.29      0.31       210
           7       0.45      0.66      0.54       210

    accuracy                           0.50      1470
   macro avg       0.50      0.50      0.49      1470
weighted avg       0.50      0.50      0.49      1470

Random Forest with 20 max_depth
Acc

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//xlm_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.5204081632653061
Confusion Matrix of Logistic Regression is:
 [[139   0   4   5  32  13  17]
 [  9  84  51  20  17  19  10]
 [ 14   2 181   1   0   8   4]
 [ 26  12  34  33  32  22  51]
 [ 52  22   9   8  93   1  25]
 [ 13   5  20   8   5  50 109]
 [  2   1   0   5   1  16 185]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.55      0.66      0.60       210
           2       0.67      0.40      0.50       210
           3       0.61      0.86      0.71       210
           4       0.41      0.16      0.23       210
           5       0.52      0.44      0.48       210
           6       0.39      0.24      0.29       210
           7       0.46      0.88      0.61       210

    accuracy                           0.52      1470
   macro avg       0.51      0.52      0.49      1470
weighted avg       0.51      0.52      0.49      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.5108843537414965
Confusion Matrix of SVM is:
 [[134   0   3   7  31  15  20]
 [  6  62  51  41  22  20   8]
 [ 12   2 180   1   2  10   3]
 [ 14  11  38  34  32  27  54]
 [ 44  22   8   8  91   3  34]
 [  5   5  16   7   6  60 111]
 [  0   1   0   1   0  18 190]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.62      0.64      0.63       210
           2       0.60      0.30      0.40       210
           3       0.61      0.86      0.71       210
           4       0.34      0.16      0.22       210
           5       0.49      0.43      0.46       210
           6       0.39      0.29      0.33       210
           7       0.45      0.90      0.60       210

    accuracy                           0.51      1470
   macro avg       0.50      0.51      0.48      1470
weighted avg       0.50      0.51      0.48      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.5129251700680272
Confusion Matrix of Decision Tree is:
 [[113   9   4  15  40  16  13]
 [  7 127   5  32  21  12   6]
 [  2  12 179   9   1   3   4]
 [ 20  24  15  71  20  27  33]
 [ 45  35   4  23  80  10  13]
 [ 12  11   7  25  10  62  83]
 [  4   5   0  13   7  59 122]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.56      0.54      0.55       210
           2       0.57      0.60      0.59       210
           3       0.84      0.85      0.84       210
           4       0.38      0.34      0.36       210
           5       0.45      0.38      0.41       210
           6       0.33      0.30      0.31       210
           7       0.45      0.58      0.50       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Decision Tree with 14 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4523809523809524
Confusion Matrix of Random Forest is:
 [[139  33   5   0   2   0  31]
 [  6 167   9   0   1   0  27]
 [ 12  27 159   0   0   0  12]
 [ 23 101  13   0   2   0  71]
 [ 58 127   0   0   9   0  16]
 [  7  33  14   0   1   0 155]
 [  3  16   0   0   0   0 191]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.56      0.66      0.61       210
           2       0.33      0.80      0.47       210
           3       0.80      0.76      0.78       210
           4       0.00      0.00      0.00       210
           5       0.60      0.04      0.08       210
           6       0.00      0.00      0.00       210
           7       0.38      0.91      0.54       210

    accuracy                           0.45      1470
   macro avg       0.38      0.45      0.35      1470
weighted avg       0.38      0.45      0.35      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4850340136054422
Confusion Matrix of Random Forest is:
 [[136  13   4   0  21   6  30]
 [  7 159   5   0  12   9  18]
 [ 12  27 159   0   0   6   6]
 [ 20  66  13   0  35  13  63]
 [ 60  85   0   0  47   0  18]
 [  6  30  15   0   3  22 134]
 [  0   8   0   0   8   4 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.56      0.65      0.60       210
           2       0.41      0.76      0.53       210
           3       0.81      0.76      0.78       210
           4       0.00      0.00      0.00       210
           5       0.37      0.22      0.28       210
           6       0.37      0.10      0.16       210
           7       0.41      0.90      0.57       210

    accuracy                           0.49      1470
   macro avg       0.42      0.49      0.42      1470
weighted avg       0.42      0.49      0.42      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.5034013605442177
Confusion Matrix of Random Forest is:
 [[138  12   3   0  22  14  21]
 [  6 161   4   2  10  17  10]
 [ 12  37 150   0   0   8   3]
 [ 20  65  13   0  36  26  50]
 [ 58  73   0   0  62   1  16]
 [  6  29   4   0   4  52 115]
 [  0   7   0   0   9  17 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.57      0.66      0.61       210
           2       0.42      0.77      0.54       210
           3       0.86      0.71      0.78       210
           4       0.00      0.00      0.00       210
           5       0.43      0.30      0.35       210
           6       0.39      0.25      0.30       210
           7       0.45      0.84      0.59       210

    accuracy                           0.50      1470
   macro avg       0.45      0.50      0.45      1470
weighted avg       0.45      0.50      0.45      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.573469387755102
Confusion Matrix of Random Forest is:
 [[130   2   2  11  35  17  13]
 [  2 129   4  33  17  21   4]
 [  4  14 175   7   2   8   0]
 [  9  18   9  86  18  43  27]
 [ 37  31   0  21 107   6   8]
 [  7   8   8  22   7  71  87]
 [  0   3   0   2   7  53 145]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.62      0.65       210
           2       0.63      0.61      0.62       210
           3       0.88      0.83      0.86       210
           4       0.47      0.41      0.44       210
           5       0.55      0.51      0.53       210
           6       0.32      0.34      0.33       210
           7       0.51      0.69      0.59       210

    accuracy                           0.57      1470
   macro avg       0.58      0.57      0.57      1470
weighted avg       0.58      0.57      0.57      1470

Random Forest with 12 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5789115646258504
Confusion Matrix of Random Forest is:
 [[132   3   2   8  38  18   9]
 [  2 134   5  33  13  21   2]
 [  3   6 184   9   2   6   0]
 [  6  17  13  87  25  36  26]
 [ 38  27   0  23 104   9   9]
 [  8  10   9  21   7  75  80]
 [  2   5   0   6   3  59 135]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.63      0.66       210
           2       0.66      0.64      0.65       210
           3       0.86      0.88      0.87       210
           4       0.47      0.41      0.44       210
           5       0.54      0.50      0.52       210
           6       0.33      0.36      0.35       210
           7       0.52      0.64      0.57       210

    accuracy                           0.58      1470
   macro avg       0.58      0.58      0.58      1470
weighted avg       0.58      0.58      0.58      1470

Random Forest with 20 max_depth
Acc

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//bert_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],7)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3578231292517007
Confusion Matrix of Logistic Regression is:
 [[ 31  42  11   1  71   7  47]
 [  4  87  44   0  61   3  11]
 [ 13  33 142   0  14   3   5]
 [  1  52  19   1  61   5  71]
 [  2  31  10   1 126   0  40]
 [  5  39  17   1  52  10  86]
 [  1  19   5   0  49   7 129]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.54      0.15      0.23       210
           2       0.29      0.41      0.34       210
           3       0.57      0.68      0.62       210
           4       0.25      0.00      0.01       210
           5       0.29      0.60      0.39       210
           6       0.29      0.05      0.08       210
           7       0.33      0.61      0.43       210

    accuracy                           0.36      1470
   macro avg       0.37      0.36      0.30      1470
weighted avg       0.37      0.36      0.30      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.3122448979591837
Confusion Matrix of SVM is:
 [[ 12  91   0   2  73   0  32]
 [  0 128  14   0  62   1   5]
 [  9 100  80   1  18   0   2]
 [  0  88   3   0  69   0  50]
 [  1  40   0   0 136   0  33]
 [  4  82   1   0  58   1  64]
 [  0  50   0   0  58   0 102]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.46      0.06      0.10       210
           2       0.22      0.61      0.32       210
           3       0.82      0.38      0.52       210
           4       0.00      0.00      0.00       210
           5       0.29      0.65      0.40       210
           6       0.50      0.00      0.01       210
           7       0.35      0.49      0.41       210

    accuracy                           0.31      1470
   macro avg       0.38      0.31      0.25      1470
weighted avg       0.38      0.31      0.25      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.20      0.26       210
           2       0.43      0.50      0.47       210
           3       0.75      0.73      0.74       210
           4       0.23      0.20      0.21       210
           5       0.39      0.41      0.40       210
           6       0.26      0.14      0.18       210
           7       0.34      0.60      0.43       210

    accuracy                           0.40      1470
   macro avg       0.39      0.40      0.39      1470
weighted avg       0.39      0.40      0.39      1470

Decision Tree with 11 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4
Confusion Matrix of Decision Tree is:
 [[ 57  23   2  26  36  20  46]
 [ 19  99  15  14  33  15  15]
 [ 16  17 148   9   4   5  11]
 [ 22  22   7  40  22  36  61]
 [ 28  25   2  30  81  10  34]
 [ 20  20  11  26  22  43  68]
 [ 12  10   0  23  18  27 120]]
Classification Rep

Accuracy of Random Forest after PCA and ICA is: 0.3326530612244898
Confusion Matrix of Random Forest is:
 [[ 11  32  22   0  64   0  81]
 [  0  81  46   0  59   0  24]
 [  0  26 157   0  19   0   8]
 [  0  33  27   0  66   2  82]
 [  0  32  11   0 111   0  56]
 [  1  29  20   0  46   0 114]
 [  0  16   3   0  62   0 129]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.92      0.05      0.10       210
           2       0.33      0.39      0.35       210
           3       0.55      0.75      0.63       210
           4       0.00      0.00      0.00       210
           5       0.26      0.53      0.35       210
           6       0.00      0.00      0.00       210
           7       0.26      0.61      0.37       210

    accuracy                           0.33      1470
   macro avg       0.33      0.33      0.26      1470
weighted avg       0.33      0.33      0.26      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3442176870748299
Confusion Matrix of Random Forest is:
 [[ 18  30  23   0  71   9  59]
 [  1  81  45   0  61   5  17]
 [  0  25 155   0  22   3   5]
 [  1  33  27   0  68  13  68]
 [  1  31  11   0 119   1  47]
 [  1  29  20   0  49  14  97]
 [  0  15   3   0  64   9 119]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.09      0.16       210
           2       0.33      0.39      0.36       210
           3       0.55      0.74      0.63       210
           4       0.00      0.00      0.00       210
           5       0.26      0.57      0.36       210
           6       0.26      0.07      0.11       210
           7       0.29      0.57      0.38       210

    accuracy                           0.34      1470
   macro avg       0.36      0.34      0.28      1470
weighted avg       0.36      0.34      0.28      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.35918367346938773
Confusion Matrix of Random Forest is:
 [[ 19  46   8   0  71  11  55]
 [  1 104  24   0  61   3  17]
 [  0  34 149   0  19   3   5]
 [  0  40  21   0  67  14  68]
 [  1  40   4   0 120   1  44]
 [  1  33  16   0  49  13  98]
 [  0  17   2   1  61   6 123]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.86      0.09      0.16       210
           2       0.33      0.50      0.40       210
           3       0.67      0.71      0.69       210
           4       0.00      0.00      0.00       210
           5       0.27      0.57      0.36       210
           6       0.25      0.06      0.10       210
           7       0.30      0.59      0.40       210

    accuracy                           0.36      1470
   macro avg       0.38      0.36      0.30      1470
weighted avg       0.38      0.36      0.30      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.45918367346938777
Confusion Matrix of Random Forest is:
 [[ 46  19   2  24  55  22  42]
 [  6 119   6  16  44   5  14]
 [  2  11 167   8   8  10   4]
 [  6  18   9  55  39  34  49]
 [  9  23   4  16 112   9  37]
 [ 13  15   6  24  23  58  71]
 [  1   6   0  15  27  43 118]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.55      0.22      0.31       210
           2       0.56      0.57      0.57       210
           3       0.86      0.80      0.83       210
           4       0.35      0.26      0.30       210
           5       0.36      0.53      0.43       210
           6       0.32      0.28      0.30       210
           7       0.35      0.56      0.43       210

    accuracy                           0.46      1470
   macro avg       0.48      0.46      0.45      1470
weighted avg       0.48      0.46      0.45      1470

Random Forest with 12 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.46190476190476193
Confusion Matrix of Random Forest is:
 [[ 60  20   2  23  49  31  25]
 [ 10 120  12  15  30  10  13]
 [  4   9 166  10   9   8   4]
 [  7  17   8  49  40  46  43]
 [ 17  22   5  15 106  11  34]
 [ 18  10   8  23  24  60  67]
 [  5   9   0  15  20  43 118]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.50      0.29      0.36       210
           2       0.58      0.57      0.58       210
           3       0.83      0.79      0.81       210
           4       0.33      0.23      0.27       210
           5       0.38      0.50      0.43       210
           6       0.29      0.29      0.29       210
           7       0.39      0.56      0.46       210

    accuracy                           0.46      1470
   macro avg       0.47      0.46      0.46      1470
weighted avg       0.47      0.46      0.46      1470

Random Forest with 20 max_depth
Ac

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4142857142857143
Confusion Matrix of Logistic Regression is:
 [[  1  67  33   0  85   0  24]
 [  0 103  20   0  54   1  32]
 [  1  15 172   1  21   0   0]
 [  0  32  21   1 127   2  27]
 [  0  26   4   0 141   1  38]
 [  0  45  28   0  19   1 117]
 [  0  16   1   0   2   1 190]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.50      0.00      0.01       210
           2       0.34      0.49      0.40       210
           3       0.62      0.82      0.70       210
           4       0.50      0.00      0.01       210
           5       0.31      0.67      0.43       210
           6       0.17      0.00      0.01       210
           7       0.44      0.90      0.60       210

    accuracy                           0.41      1470
   macro avg       0.41      0.41      0.31      1470
weighted avg       0.41      0.41      0.31      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.3802721088435374
Confusion Matrix of SVM is:
 [[  1 100   5   0  96   0   8]
 [  0 139   6   0  57   0   8]
 [  1  25 134   2  48   0   0]
 [  0  55   6   0 135   1  13]
 [  0  36   0   0 150   0  24]
 [  0 103  13   0  24   1  69]
 [  0  72   1   0   3   0 134]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.50      0.00      0.01       210
           2       0.26      0.66      0.38       210
           3       0.81      0.64      0.71       210
           4       0.00      0.00      0.00       210
           5       0.29      0.71      0.41       210
           6       0.50      0.00      0.01       210
           7       0.52      0.64      0.58       210

    accuracy                           0.38      1470
   macro avg       0.41      0.38      0.30      1470
weighted avg       0.41      0.38      0.30      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.4530612244897959
Confusion Matrix of Decision Tree is:
 [[ 74  27   5  44  34  19   7]
 [ 18 105   4  29  21  24   9]
 [ 11   3 166  11   7  11   1]
 [ 27  16   7  56  58  38   8]
 [ 18  20   4  62  74  17  15]
 [ 24  25   6  23  11  62  59]
 [  7  18   0   5   6  45 129]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.41      0.35      0.38       210
           2       0.49      0.50      0.50       210
           3       0.86      0.79      0.83       210
           4       0.24      0.27      0.25       210
           5       0.35      0.35      0.35       210
           6       0.29      0.30      0.29       210
           7       0.57      0.61      0.59       210

    accuracy                           0.45      1470
   macro avg       0.46      0.45      0.46      1470
weighted avg       0.46      0.45      0.46      1470

Decision Tree with 14 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.3727891156462585
Confusion Matrix of Random Forest is:
 [[ 97   0   9   0  63   1  40]
 [ 91   1   8   0  56   1  53]
 [ 58   0 138   0   9   0   5]
 [ 89   0  10   0  87   0  24]
 [ 65   0   1   0 126   0  18]
 [ 59   0  15   0  29   3 104]
 [ 13   1   3   0  10   0 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.21      0.46      0.28       210
           2       0.50      0.00      0.01       210
           3       0.75      0.66      0.70       210
           4       0.00      0.00      0.00       210
           5       0.33      0.60      0.43       210
           6       0.60      0.01      0.03       210
           7       0.43      0.87      0.57       210

    accuracy                           0.37      1470
   macro avg       0.40      0.37      0.29      1470
weighted avg       0.40      0.37      0.29      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.40272108843537413
Confusion Matrix of Random Forest is:
 [[  9  25   9  63  62   1  41]
 [  5  53   8  32  54   2  56]
 [  3  11 138  44   9   0   5]
 [  2  11  10  74  86   2  25]
 [  0   9   1  55 123   1  21]
 [  7  12  12  37  25   5 112]
 [  0   3   1   6   6   4 190]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.35      0.04      0.08       210
           2       0.43      0.25      0.32       210
           3       0.77      0.66      0.71       210
           4       0.24      0.35      0.28       210
           5       0.34      0.59      0.43       210
           6       0.33      0.02      0.04       210
           7       0.42      0.90      0.58       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.35      1470
weighted avg       0.41      0.40      0.35      1470

Random Forest with 3 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4965986394557823
Confusion Matrix of Random Forest is:
 [[ 63  22   9  40  40  26  10]
 [ 10 119   5  14  27  20  15]
 [ 10   4 166  17   4   9   0]
 [ 24  15   9  57  67  28  10]
 [ 17  21   3  44  97  11  17]
 [ 19  19   7  15   6  67  77]
 [  4   6   1   1   2  35 161]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.43      0.30      0.35       210
           2       0.58      0.57      0.57       210
           3       0.83      0.79      0.81       210
           4       0.30      0.27      0.29       210
           5       0.40      0.46      0.43       210
           6       0.34      0.32      0.33       210
           7       0.56      0.77      0.64       210

    accuracy                           0.50      1470
   macro avg       0.49      0.50      0.49      1470
weighted avg       0.49      0.50      0.49      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.49047619047619045
Confusion Matrix of Random Forest is:
 [[ 72  21  12  40  31  27   7]
 [ 12 118   5  16  28  21  10]
 [ 11   5 165  15   6   8   0]
 [ 28  13  10  61  60  31   7]
 [ 25  20   3  51  85  10  16]
 [ 18  23   5  17   6  65  76]
 [  8   6   1   4   3  33 155]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.41      0.34      0.38       210
           2       0.57      0.56      0.57       210
           3       0.82      0.79      0.80       210
           4       0.30      0.29      0.29       210
           5       0.39      0.40      0.40       210
           6       0.33      0.31      0.32       210
           7       0.57      0.74      0.64       210

    accuracy                           0.49      1470
   macro avg       0.49      0.49      0.49      1470
weighted avg       0.49      0.49      0.49      1470

Random Forest with 19 max_depth
Ac

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4272108843537415
Confusion Matrix of Logistic Regression is:
 [[ 95   3  22   0  47   8  35]
 [ 11  91  61   0  30   1  16]
 [ 16   5 176   0   5   2   6]
 [ 33  25  41   0  35   7  69]
 [ 62  13   5   0  85   2  43]
 [ 23  21  30   0  19  10 107]
 [  1  17   4   0   6  11 171]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.39      0.45      0.42       210
           2       0.52      0.43      0.47       210
           3       0.52      0.84      0.64       210
           4       0.00      0.00      0.00       210
           5       0.37      0.40      0.39       210
           6       0.24      0.05      0.08       210
           7       0.38      0.81      0.52       210

    accuracy                           0.43      1470
   macro avg       0.35      0.43      0.36      1470
weighted avg       0.35      0.43      0.36      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of KNN Model is:
 [[124   7   1  16  41  17   4]
 [ 13 134  12  19  20   8   4]
 [  8  13 169  10   3   5   2]
 [ 26  27  14  62  26  35  20]
 [ 56  31   6  20  75  10  12]
 [ 25  26  11  35   7  62  44]
 [ 17  10   1  13   9  43 117]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.46      0.59      0.52       210
           2       0.54      0.64      0.59       210
           3       0.79      0.80      0.80       210
           4       0.35      0.30      0.32       210
           5       0.41      0.36      0.38       210
           6       0.34      0.30      0.32       210
           7       0.58      0.56      0.57       210

    accuracy                           0.51      1470
   macro avg       0.50      0.51      0.50      1470
weighted avg       0.50      0.51      0.50      1470

KNN with 5 Neighbors
Accuracy of KNN Model after PCA and ICA is: 0.5204081632653061
Confusion Matrix of KNN Mo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.38571428571428573
Confusion Matrix of SVM is:
 [[ 63   5  31  29  17  33  32]
 [  1  49 114  16   3  10  17]
 [  1   1 190   3   1   9   5]
 [  6  19  53  27   3  43  59]
 [ 33  18  22  37  30  18  52]
 [  5  13  33  17   0  39 103]
 [  0   7   5   1   0  28 169]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.58      0.30      0.39       210
           2       0.44      0.23      0.30       210
           3       0.42      0.90      0.58       210
           4       0.21      0.13      0.16       210
           5       0.56      0.14      0.23       210
           6       0.22      0.19      0.20       210
           7       0.39      0.80      0.52       210

    accuracy                           0.39      1470
   macro avg       0.40      0.39      0.34      1470
weighted avg       0.40      0.39      0.34      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 91  10   3  19  53  27   7]
 [ 13 117   9  35  19  13   4]
 [  5   9 165   7  10  10   4]
 [ 20  27  16  53  27  43  24]
 [ 57  17   3  31  80   8  14]
 [ 13  13   8  30  21  72  53]
 [  5   5   0  25  11  51 113]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.43      0.44       210
           2       0.59      0.56      0.57       210
           3       0.81      0.79      0.80       210
           4       0.27      0.25      0.26       210
           5       0.36      0.38      0.37       210
           6       0.32      0.34      0.33       210
           7       0.52      0.54      0.53       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.47      1470
weighted avg       0.47      0.47      0.47      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.46190476190476193
Co

Accuracy of Random Forest after PCA and ICA is: 0.32448979591836735
Confusion Matrix of Random Forest is:
 [[102   0  30   0  55   0  23]
 [ 32   0  61   0 102   0  15]
 [ 22   0 166   0  10   0  12]
 [ 26   0  37   0  98   0  49]
 [ 84   0   2   0 101   0  23]
 [ 13   0  26   0  80   0  91]
 [  4   0   4   0  94   0 108]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.36      0.49      0.41       210
           2       0.00      0.00      0.00       210
           3       0.51      0.79      0.62       210
           4       0.00      0.00      0.00       210
           5       0.19      0.48      0.27       210
           6       0.00      0.00      0.00       210
           7       0.34      0.51      0.41       210

    accuracy                           0.32      1470
   macro avg       0.20      0.32      0.24      1470
weighted avg       0.20      0.32      0.24      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.373469387755102
Confusion Matrix of Random Forest is:
 [[110  40  15   0  21   0  24]
 [ 24 131  26   0  15   0  14]
 [ 24  13 160   0   2   0  11]
 [ 27  93  18   0  22   0  50]
 [ 85  61   0   0  38   0  26]
 [ 10  79  19   0   9   0  93]
 [  0  96   2   0   2   0 110]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.39      0.52      0.45       210
           2       0.26      0.62      0.36       210
           3       0.67      0.76      0.71       210
           4       0.00      0.00      0.00       210
           5       0.35      0.18      0.24       210
           6       0.00      0.00      0.00       210
           7       0.34      0.52      0.41       210

    accuracy                           0.37      1470
   macro avg       0.29      0.37      0.31      1470
weighted avg       0.29      0.37      0.31      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.46598639455782315
Confusion Matrix of Random Forest is:
 [[116  14   5  15  37   0  23]
 [ 13 123  15  19  26   0  14]
 [ 18   4 165   6   7   1   9]
 [ 26  37  21  34  37   1  54]
 [ 69  22   2  14  73   0  30]
 [ 12  18  17  36  12   3 112]
 [  0   9   2  15  13   0 171]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.46      0.55      0.50       210
           2       0.54      0.59      0.56       210
           3       0.73      0.79      0.76       210
           4       0.24      0.16      0.19       210
           5       0.36      0.35      0.35       210
           6       0.60      0.01      0.03       210
           7       0.41      0.81      0.55       210

    accuracy                           0.47      1470
   macro avg       0.48      0.47      0.42      1470
weighted avg       0.48      0.47      0.42      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5503401360544218
Confusion Matrix of Random Forest is:
 [[106   7   1  23  39  27   7]
 [  3 132   7  19  25  19   5]
 [  9   4 171   8   5  10   3]
 [ 18  31  12  60  24  36  29]
 [ 37  15   1  29 105   6  17]
 [  9  11   9  29   8  83  61]
 [  4   7   0   8   5  34 152]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.57      0.50      0.54       210
           2       0.64      0.63      0.63       210
           3       0.85      0.81      0.83       210
           4       0.34      0.29      0.31       210
           5       0.50      0.50      0.50       210
           6       0.39      0.40      0.39       210
           7       0.55      0.72      0.63       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.536734693877551
Confusion Matrix of Random Forest is:
 [[105   7   1  19  47  22   9]
 [  5 138   5  22  22  14   4]
 [  6   7 171  10   7   7   2]
 [ 22  27  14  54  24  40  29]
 [ 39  14   1  32  98   8  18]
 [  8  17  10  24  10  83  58]
 [  4   6   1   8   8  43 140]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.56      0.50      0.53       210
           2       0.64      0.66      0.65       210
           3       0.84      0.81      0.83       210
           4       0.32      0.26      0.28       210
           5       0.45      0.47      0.46       210
           6       0.38      0.40      0.39       210
           7       0.54      0.67      0.60       210

    accuracy                           0.54      1470
   macro avg       0.53      0.54      0.53      1470
weighted avg       0.53      0.54      0.53      1470

Random Forest with 20 max_depth
Accu

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3795918367346939
Confusion Matrix of Logistic Regression is:
 [[ 81   0  19   0  62   0  48]
 [ 20  12  86   0  46   1  45]
 [ 28   0 164   0   5   0  13]
 [ 28   6  24   0  66   0  86]
 [ 46   1   6   0 101   1  55]
 [  7   6  16   0  34   1 146]
 [  2   5   0   0   4   0 199]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.38      0.39      0.38       210
           2       0.40      0.06      0.10       210
           3       0.52      0.78      0.62       210
           4       0.00      0.00      0.00       210
           5       0.32      0.48      0.38       210
           6       0.33      0.00      0.01       210
           7       0.34      0.95      0.50       210

    accuracy                           0.38      1470
   macro avg       0.33      0.38      0.29      1470
weighted avg       0.33      0.38      0.29      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of KNN Model is:
 [[107   8   4  23  38  21   9]
 [ 20 111  19  24   7  20   9]
 [ 12  17 162   8   4   7   0]
 [ 34  31  14  44  40  25  22]
 [ 68  13   7  30  63  16  13]
 [ 30  26  13  22  16  51  52]
 [ 14  19   1  12   6  52 106]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.38      0.51      0.43       210
           2       0.49      0.53      0.51       210
           3       0.74      0.77      0.75       210
           4       0.27      0.21      0.24       210
           5       0.36      0.30      0.33       210
           6       0.27      0.24      0.25       210
           7       0.50      0.50      0.50       210

    accuracy                           0.44      1470
   macro avg       0.43      0.44      0.43      1470
weighted avg       0.43      0.44      0.43      1470

KNN with 5 Neighbors
Accuracy of KNN Model after PCA and ICA is: 0.46122448979591835
Confusion Matrix of KNN M

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3625850340136054
Confusion Matrix of SVM is:
 [[ 49   2  13  20  43  10  73]
 [  7  29  30  38  17   3  86]
 [ 14   0 138  15   6   7  30]
 [  9   5  14  40  23   8 111]
 [ 20   1   2  35  66   5  81]
 [  3   5  11  19   8   1 163]
 [  0   0   0   0   0   0 210]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.48      0.23      0.31       210
           2       0.69      0.14      0.23       210
           3       0.66      0.66      0.66       210
           4       0.24      0.19      0.21       210
           5       0.40      0.31      0.35       210
           6       0.03      0.00      0.01       210
           7       0.28      1.00      0.44       210

    accuracy                           0.36      1470
   macro avg       0.40      0.36      0.32      1470
weighted avg       0.40      0.36      0.32      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.43673469387755104
Confusion Matrix of Decision Tree is:
 [[ 76  13   6  38  46  14  17]
 [ 14 101  12  32  13  26  12]
 [ 13   9 160   8   7  12   1]
 [ 19  18  13  57  43  27  33]
 [ 49   9   7  36  75  18  16]
 [ 13  16   9  36  27  45  64]
 [  5  12   1  13  16  35 128]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.36      0.38       210
           2       0.57      0.48      0.52       210
           3       0.77      0.76      0.77       210
           4       0.26      0.27      0.27       210
           5       0.33      0.36      0.34       210
           6       0.25      0.21      0.23       210
           7       0.47      0.61      0.53       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.43      1470
weighted avg       0.44      0.44      0.43      1470

Decision Tree with 14 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.354421768707483
Confusion Matrix of Random Forest is:
 [[110   1  13  51   4   0  31]
 [ 28  57  32  73   2   0  18]
 [ 34  14 142  10   2   0   8]
 [ 40  11  14  98   3   0  44]
 [ 86   1   2  90   4   0  27]
 [ 16  11  12  73   1   0  97]
 [  8   2   0  90   0   0 110]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.34      0.52      0.41       210
           2       0.59      0.27      0.37       210
           3       0.66      0.68      0.67       210
           4       0.20      0.47      0.28       210
           5       0.25      0.02      0.04       210
           6       0.00      0.00      0.00       210
           7       0.33      0.52      0.40       210

    accuracy                           0.35      1470
   macro avg       0.34      0.35      0.31      1470
weighted avg       0.34      0.35      0.31      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.37755102040816324
Confusion Matrix of Random Forest is:
 [[110  20  13  14  22   0  31]
 [ 28 127  15  18   5   0  17]
 [ 34  17 142   6   3   0   8]
 [ 41  63  13  27  23   0  43]
 [ 86  40   2  17  38   0  27]
 [ 16  60  12  15  10   1  96]
 [  8  86   0   6   0   0 110]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.34      0.52      0.41       210
           2       0.31      0.60      0.41       210
           3       0.72      0.68      0.70       210
           4       0.26      0.13      0.17       210
           5       0.38      0.18      0.24       210
           6       1.00      0.00      0.01       210
           7       0.33      0.52      0.41       210

    accuracy                           0.38      1470
   macro avg       0.48      0.38      0.34      1470
weighted avg       0.48      0.38      0.34      1470

Random Forest with 3 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.507482993197279
Confusion Matrix of Random Forest is:
 [[ 97   5   5  14  64  11  14]
 [  9 119  13  30  14  13  12]
 [ 10  16 169   3   5   7   0]
 [ 18  25   9  52  54  12  40]
 [ 37   6   3  24 109   6  25]
 [ 10  17  11  33  22  33  84]
 [  2   5   2   8   7  19 167]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.46      0.49       210
           2       0.62      0.57      0.59       210
           3       0.80      0.80      0.80       210
           4       0.32      0.25      0.28       210
           5       0.40      0.52      0.45       210
           6       0.33      0.16      0.21       210
           7       0.49      0.80      0.61       210

    accuracy                           0.51      1470
   macro avg       0.50      0.51      0.49      1470
weighted avg       0.50      0.51      0.49      1470

Random Forest with 11 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.48367346938775513
Confusion Matrix of Random Forest is:
 [[ 90   6   5  24  58  13  14]
 [ 11 110  14  27  15  21  12]
 [  8  14 170   5   5   8   0]
 [ 18  21  15  50  50  18  38]
 [ 37   6   4  25 101  16  21]
 [  9  18  11  31  21  47  73]
 [  5   9   2  12   6  33 143]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.51      0.43      0.46       210
           2       0.60      0.52      0.56       210
           3       0.77      0.81      0.79       210
           4       0.29      0.24      0.26       210
           5       0.39      0.48      0.43       210
           6       0.30      0.22      0.26       210
           7       0.48      0.68      0.56       210

    accuracy                           0.48      1470
   macro avg       0.48      0.48      0.47      1470
weighted avg       0.48      0.48      0.47      1470

Random Forest with 19 max_depth
Ac

In [24]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//xlm_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],5)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3523809523809524
Confusion Matrix of Logistic Regression is:
 [[ 55  26  19  18  57   5  30]
 [ 19  66  57  23  26   1  18]
 [ 10  20 165   6   6   0   3]
 [ 25  30  22  32  46   2  53]
 [ 16  26  17  23  83   1  44]
 [ 17  41  13  18  43   9  69]
 [ 14  22   2  13  45   6 108]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.35      0.26      0.30       210
           2       0.29      0.31      0.30       210
           3       0.56      0.79      0.65       210
           4       0.24      0.15      0.19       210
           5       0.27      0.40      0.32       210
           6       0.38      0.04      0.08       210
           7       0.33      0.51      0.40       210

    accuracy                           0.35      1470
   macro avg       0.35      0.35      0.32      1470
weighted avg       0.35      0.35      0.32      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.29183673469387755
Confusion Matrix of SVM is:
 [[ 58  31   0  25  73   0  23]
 [ 27  94   7  36  39   0   7]
 [ 17  97  66  13  16   0   1]
 [ 28  47   0  31  74   0  30]
 [ 15  31   0  25 108   0  31]
 [ 18  43   0  27  76   4  42]
 [ 17  23   0  19  81   2  68]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.32      0.28      0.30       210
           2       0.26      0.45      0.33       210
           3       0.90      0.31      0.47       210
           4       0.18      0.15      0.16       210
           5       0.23      0.51      0.32       210
           6       0.67      0.02      0.04       210
           7       0.34      0.32      0.33       210

    accuracy                           0.29      1470
   macro avg       0.41      0.29      0.28      1470
weighted avg       0.41      0.29      0.28      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.3707482993197279
Confusion Matrix of Decision Tree is:
 [[ 65  14   8  38  29  26  30]
 [ 22  95   8  30  26  10  19]
 [ 13   8 161  15   4   6   3]
 [ 41  21   8  42  35  32  31]
 [ 22  24   5  34  53  24  48]
 [ 26  24   3  39  19  38  61]
 [ 20  14   1  33  24  27  91]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.31      0.31      0.31       210
           2       0.47      0.45      0.46       210
           3       0.83      0.77      0.80       210
           4       0.18      0.20      0.19       210
           5       0.28      0.25      0.27       210
           6       0.23      0.18      0.20       210
           7       0.32      0.43      0.37       210

    accuracy                           0.37      1470
   macro avg       0.38      0.37      0.37      1470
weighted avg       0.38      0.37      0.37      1470

Decision Tree with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.30680272108843537
Confusion Matrix of Random Forest is:
 [[ 47  13  61   0  37   0  52]
 [ 22  40  92   0  14   0  42]
 [  3   3 194   2   3   0   5]
 [ 25  22  69   0  34   0  60]
 [ 14  13  54   1  57   0  71]
 [ 21  24  56   1  29   0  79]
 [ 22  16  24   0  35   0 113]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.31      0.22      0.26       210
           2       0.31      0.19      0.23       210
           3       0.35      0.92      0.51       210
           4       0.00      0.00      0.00       210
           5       0.27      0.27      0.27       210
           6       0.00      0.00      0.00       210
           7       0.27      0.54      0.36       210

    accuracy                           0.31      1470
   macro avg       0.21      0.31      0.23      1470
weighted avg       0.21      0.31      0.23      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.33197278911564626
Confusion Matrix of Random Forest is:
 [[ 50  10  49  28  33   0  40]
 [ 22  54  70  28  12   0  24]
 [  2   4 189   8   3   0   4]
 [ 21  15  53  29  31   0  61]
 [ 14  19  39  32  55   0  51]
 [ 17  19  44  29  24   0  77]
 [ 16   9  15  31  28   0 111]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.35      0.24      0.28       210
           2       0.42      0.26      0.32       210
           3       0.41      0.90      0.57       210
           4       0.16      0.14      0.15       210
           5       0.30      0.26      0.28       210
           6       0.00      0.00      0.00       210
           7       0.30      0.53      0.38       210

    accuracy                           0.33      1470
   macro avg       0.28      0.33      0.28      1470
weighted avg       0.28      0.33      0.28      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.35034013605442177
Confusion Matrix of Random Forest is:
 [[ 53  15  23  51  31   1  36]
 [ 14  57  55  41  15   0  28]
 [  2   4 177  22   3   0   2]
 [ 19  18  28  55  32   5  53]
 [ 13  21  18  50  53   0  55]
 [  7  20  26  44  27   5  81]
 [  6   9   4  42  29   5 115]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.46      0.25      0.33       210
           2       0.40      0.27      0.32       210
           3       0.53      0.84      0.65       210
           4       0.18      0.26      0.21       210
           5       0.28      0.25      0.27       210
           6       0.31      0.02      0.04       210
           7       0.31      0.55      0.40       210

    accuracy                           0.35      1470
   macro avg       0.35      0.35      0.32      1470
weighted avg       0.35      0.35      0.32      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.41156462585034015
Confusion Matrix of Random Forest is:
 [[ 63  14   7  32  50  17  27]
 [ 12 100  15  30  26  13  14]
 [  6  10 162  20   7   2   3]
 [ 23  18   7  50  45  27  40]
 [ 17  21   4  27  85  13  43]
 [ 16  21   4  26  38  43  62]
 [  7   5   1  29  39  27 102]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.44      0.30      0.36       210
           2       0.53      0.48      0.50       210
           3       0.81      0.77      0.79       210
           4       0.23      0.24      0.24       210
           5       0.29      0.40      0.34       210
           6       0.30      0.20      0.24       210
           7       0.35      0.49      0.41       210

    accuracy                           0.41      1470
   macro avg       0.42      0.41      0.41      1470
weighted avg       0.42      0.41      0.41      1470

Random Forest with 12 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.40272108843537413
Confusion Matrix of Random Forest is:
 [[ 69  14   4  26  44  23  30]
 [ 15 107   7  23  30  14  14]
 [  6  11 164  16   8   2   3]
 [ 27  17   6  42  44  34  40]
 [ 19  20   3  34  76  19  39]
 [ 18  24   3  21  32  47  65]
 [ 15   8   1  33  37  29  87]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.41      0.33      0.36       210
           2       0.53      0.51      0.52       210
           3       0.87      0.78      0.82       210
           4       0.22      0.20      0.21       210
           5       0.28      0.36      0.32       210
           6       0.28      0.22      0.25       210
           7       0.31      0.41      0.36       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.41      1470
weighted avg       0.41      0.40      0.41      1470

Random Forest with 20 max_depth
Ac