#### Info:
#### All PCA models are built using the elbow method through the scree plots in Nisha_PCA_Scree_Plots
#### 

In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    from sklearn.decomposition import FastICA
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Nisha//Input//Nisha_dataset_labels.csv")

In [2]:
# Function of Scaling, PCA, ICA
def scale_pca_ica(x_data, y_data, comp):
    scaler_model = StandardScaler()
    scaled_data = scaler_model.fit_transform(x_data)
    #print(scaled_data)
    # Doing PCA giving number of Components(dimensions)
    pca_comp = PCA(n_components=comp)
    pca_data = pca_comp.fit_transform(scaled_data)
    #print(pca_data)
    # Doing ICA on PCA transformed data to make features independent
    #ica_comp = FastICA(n_components=comp)
    ica_comp = FastICA(n_components=comp,max_iter=50000)
    ica_data = ica_comp.fit_transform(pca_data)
    #print(ica_data)
    x_train,x_test,y_train,y_test = train_test_split(ica_data,y_data,test_size=0.30,random_state=21,stratify=y_data)
    return x_train, x_test, y_train, y_test

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" after PCA and ICA is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tfidf_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3727891156462585
Confusion Matrix of Logistic Regression is:
 [[  0  22 122  25  18  15   8]
 [  1  23 135  15  24   9   3]
 [  0   5 197   5   3   0   0]
 [  0  17  74  67  40   7   5]
 [  0  31  73  42  39  14  11]
 [  0  20  62  19  21  24  64]
 [  0   0   3   2   1   6 198]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.19      0.11      0.14       210
           3       0.30      0.94      0.45       210
           4       0.38      0.32      0.35       210
           5       0.27      0.19      0.22       210
           6       0.32      0.11      0.17       210
           7       0.69      0.94      0.79       210

    accuracy                           0.37      1470
   macro avg       0.31      0.37      0.30      1470
weighted avg       0.31      0.37      0.30      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.18639455782312925
Confusion Matrix of SVM is:
 [[  0  13 196   0   0   1   0]
 [  0   0 208   1   0   1   0]
 [  0   0 210   0   0   0   0]
 [  0   8 196   5   0   1   0]
 [  1  21 182   4   1   1   0]
 [  2  18 181   2   1   5   1]
 [  3  13 128   0   0  13  53]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.16      1.00      0.28       210
           4       0.42      0.02      0.05       210
           5       0.50      0.00      0.01       210
           6       0.23      0.02      0.04       210
           7       0.98      0.25      0.40       210

    accuracy                           0.19      1470
   macro avg       0.33      0.19      0.11      1470
weighted avg       0.33      0.19      0.11      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.5006802721088436
Confusion Matrix of Decision Tree is:
 [[ 76  33  12   9  52  25   3]
 [ 18  97  20  15  39  19   2]
 [  3  20 157  13   7  10   0]
 [ 15  25  10  80  59  20   1]
 [ 15  31  16  39  90  14   5]
 [  7  32   8  14  34  84  31]
 [  3   5   0   0  10  40 152]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.36      0.44       210
           2       0.40      0.46      0.43       210
           3       0.70      0.75      0.73       210
           4       0.47      0.38      0.42       210
           5       0.31      0.43      0.36       210
           6       0.40      0.40      0.40       210
           7       0.78      0.72      0.75       210

    accuracy                           0.50      1470
   macro avg       0.52      0.50      0.50      1470
weighted avg       0.52      0.50      0.50      1470

Decision Tree with 10 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.3598639455782313
Confusion Matrix of Random Forest is:
 [[  0   1 112  44   0  46   7]
 [  0   0 118  50   0  40   2]
 [  0   0 179  28   0   3   0]
 [  0   0  75 108   0  23   4]
 [  1   0  76  79   0  43  11]
 [  1   0  47  55   0  61  46]
 [  0   0   3   1   0  25 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.29      0.85      0.44       210
           4       0.30      0.51      0.38       210
           5       0.00      0.00      0.00       210
           6       0.25      0.29      0.27       210
           7       0.72      0.86      0.79       210

    accuracy                           0.36      1470
   macro avg       0.22      0.36      0.27      1470
weighted avg       0.22      0.36      0.27      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.41496598639455784
Confusion Matrix of Random Forest is:
 [[ 62  38  30  36   0  38   6]
 [ 16  53  41  61   0  37   2]
 [ 17  35 133  25   0   0   0]
 [ 15  14  30 129   0  18   4]
 [ 21  17  27  97   0  38  10]
 [ 15   2  43  53   0  52  45]
 [  0   1   3   1   0  24 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.42      0.30      0.35       210
           2       0.33      0.25      0.29       210
           3       0.43      0.63      0.51       210
           4       0.32      0.61      0.42       210
           5       0.00      0.00      0.00       210
           6       0.25      0.25      0.25       210
           7       0.73      0.86      0.79       210

    accuracy                           0.41      1470
   macro avg       0.36      0.41      0.37      1470
weighted avg       0.36      0.41      0.37      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.46870748299319726
Confusion Matrix of Random Forest is:
 [[102  28  13  21   2  38   6]
 [ 33  80  21  44   1  29   2]
 [ 23  26 134  19   2   6   0]
 [ 35  20  15 118   1  18   3]
 [ 36  35  14  80   5  30  10]
 [ 28  15  14  36   2  71  44]
 [  5   1   4   0   0  21 179]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.39      0.49      0.43       210
           2       0.39      0.38      0.39       210
           3       0.62      0.64      0.63       210
           4       0.37      0.56      0.45       210
           5       0.38      0.02      0.04       210
           6       0.33      0.34      0.34       210
           7       0.73      0.85      0.79       210

    accuracy                           0.47      1470
   macro avg       0.46      0.47      0.44      1470
weighted avg       0.46      0.47      0.44      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5564625850340136
Confusion Matrix of Random Forest is:
 [[ 99  18   4  18  33  33   5]
 [ 24  98  14  27  26  21   0]
 [  7  12 156  21   7   7   0]
 [ 18  13   8 109  39  20   3]
 [ 20  25   7  52  76  22   8]
 [ 17  23   4  14  16  99  37]
 [  0   4   0   1   0  24 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.54      0.47      0.50       210
           2       0.51      0.47      0.49       210
           3       0.81      0.74      0.77       210
           4       0.45      0.52      0.48       210
           5       0.39      0.36      0.37       210
           6       0.44      0.47      0.45       210
           7       0.77      0.86      0.82       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5530612244897959
Confusion Matrix of Random Forest is:
 [[102  12   7  19  33  32   5]
 [ 17  97  14  24  34  24   0]
 [  6  12 169  10   5   8   0]
 [ 21  10  12  93  46  25   3]
 [ 30  23   7  49  75  19   7]
 [ 20  23   3  19  12  96  37]
 [  1   3   0   1   1  23 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.52      0.49      0.50       210
           2       0.54      0.46      0.50       210
           3       0.80      0.80      0.80       210
           4       0.43      0.44      0.44       210
           5       0.36      0.36      0.36       210
           6       0.42      0.46      0.44       210
           7       0.78      0.86      0.82       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Random Forest with 20 max_depth
Acc

In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//cv_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],7)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.38503401360544215
Confusion Matrix of Logistic Regression is:
 [[  0  11 119  16  54   3   7]
 [  0  14 149   9  35   1   2]
 [  0   2 203   5   0   0   0]
 [  1   6  95  30  72   1   5]
 [  0  19  41  20 116   0  14]
 [  1   6  93  16  30   6  58]
 [  0   1   9   1   1   1 197]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.24      0.07      0.10       210
           3       0.29      0.97      0.44       210
           4       0.31      0.14      0.20       210
           5       0.38      0.55      0.45       210
           6       0.50      0.03      0.05       210
           7       0.70      0.94      0.80       210

    accuracy                           0.39      1470
   macro avg       0.34      0.39      0.29      1470
weighted avg       0.34      0.39      0.29      1470

KNN with 3 Neigh

Accuracy of SVM after PCA and ICA is: 0.1870748299319728
Confusion Matrix of SVM is:
 [[  1  38 168   1   1   1   0]
 [  0  24 186   0   0   0   0]
 [  0   0 210   0   0   0   0]
 [  0  45 165   0   0   0   0]
 [  5  88 113   0   2   1   1]
 [  0  34 169   2   0   1   4]
 [  0  26 146   1   0   0  37]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.17      0.00      0.01       210
           2       0.09      0.11      0.10       210
           3       0.18      1.00      0.31       210
           4       0.00      0.00      0.00       210
           5       0.67      0.01      0.02       210
           6       0.33      0.00      0.01       210
           7       0.88      0.18      0.29       210

    accuracy                           0.19      1470
   macro avg       0.33      0.19      0.11      1470
weighted avg       0.33      0.19      0.11      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 88  35   2  25  37  22   1]
 [ 11 118  14  25  25  17   0]
 [  4  17 156  19   2   9   3]
 [ 23  24  12  98  29  21   3]
 [ 30  32   3  34  91  15   5]
 [ 25  28   7  26  17  75  32]
 [  5   7   0   5   1  20 172]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.42      0.44       210
           2       0.45      0.56      0.50       210
           3       0.80      0.74      0.77       210
           4       0.42      0.47      0.44       210
           5       0.45      0.43      0.44       210
           6       0.42      0.36      0.39       210
           7       0.80      0.82      0.81       210

    accuracy                           0.54      1470
   macro avg       0.55      0.54      0.54      1470
weighted avg       0.55      0.54      0.54      1470

Decision Tree with 10 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5380952380952381
Con

Confusion Matrix of Decision Tree is:
 [[100  14   9  30  25  31   1]
 [ 14 111  18  27  21  18   1]
 [  3  13 167  13   2   9   3]
 [ 21  23  20  79  35  28   4]
 [ 40  32   4  34  66  28   6]
 [ 29  15   9  16  19  95  27]
 [  4   8   0   1   4  29 164]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.48      0.48       210
           2       0.51      0.53      0.52       210
           3       0.74      0.80      0.76       210
           4       0.40      0.38      0.39       210
           5       0.38      0.31      0.35       210
           6       0.40      0.45      0.42       210
           7       0.80      0.78      0.79       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Random Forest with 1 max_depth
Accuracy of Random Forest after PCA and ICA is: 0.3836734693877551
Conf

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4605442176870748
Confusion Matrix of Random Forest is:
 [[ 98  19  47   5  31   0  10]
 [ 30  87  51   6  33   0   3]
 [  8  18 173  11   0   0   0]
 [ 35  23  59  35  47   0  11]
 [ 39  20  22  25  89   0  15]
 [ 29  26  59  12  17   1  66]
 [  6   2   6   1   1   0 194]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.47      0.43       210
           2       0.45      0.41      0.43       210
           3       0.41      0.82      0.55       210
           4       0.37      0.17      0.23       210
           5       0.41      0.42      0.42       210
           6       1.00      0.00      0.01       210
           7       0.65      0.92      0.76       210

    accuracy                           0.46      1470
   macro avg       0.53      0.46      0.40      1470
weighted avg       0.53      0.46      0.40      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.6054421768707483
Confusion Matrix of Random Forest is:
 [[107  17   3  20  32  28   3]
 [ 11 126  13  18  24  18   0]
 [  4  11 166  22   1   6   0]
 [  6  19  12  99  47  24   3]
 [ 18  24   3  39 106  12   8]
 [ 12  23   6  25  12  99  33]
 [  1   1   0   3   1  17 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.67      0.51      0.58       210
           2       0.57      0.60      0.58       210
           3       0.82      0.79      0.80       210
           4       0.44      0.47      0.45       210
           5       0.48      0.50      0.49       210
           6       0.49      0.47      0.48       210
           7       0.80      0.89      0.84       210

    accuracy                           0.61      1470
   macro avg       0.61      0.61      0.60      1470
weighted avg       0.61      0.61      0.60      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5952380952380952
Confusion Matrix of Random Forest is:
 [[100  18   3  20  36  31   2]
 [ 10 118  12  21  31  18   0]
 [  1  13 175  13   2   6   0]
 [ 11  18  16  95  45  22   3]
 [ 23  23   2  41 102  10   9]
 [ 15  24   7  19  11 103  31]
 [  1   2   0   2   0  23 182]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.48      0.54       210
           2       0.55      0.56      0.55       210
           3       0.81      0.83      0.82       210
           4       0.45      0.45      0.45       210
           5       0.45      0.49      0.47       210
           6       0.48      0.49      0.49       210
           7       0.80      0.87      0.83       210

    accuracy                           0.60      1470
   macro avg       0.60      0.60      0.59      1470
weighted avg       0.60      0.60      0.59      1470

Random Forest with 19 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tf_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3761904761904762
Confusion Matrix of Logistic Regression is:
 [[  0  13 120  24  31  14   8]
 [  0  26 136  20  17   8   3]
 [  0   2 197   8   3   0   0]
 [  0  16  74  57  52   6   5]
 [  0  28  68  38  56   8  12]
 [  0  11  64  25  32  18  60]
 [  0   0   3   1   2   5 199]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.27      0.12      0.17       210
           3       0.30      0.94      0.45       210
           4       0.33      0.27      0.30       210
           5       0.29      0.27      0.28       210
           6       0.31      0.09      0.13       210
           7       0.69      0.95      0.80       210

    accuracy                           0.38      1470
   macro avg       0.31      0.38      0.30      1470
weighted avg       0.31      0.38      0.30      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.5027210884353741
Confusion Matrix of KNN Model is:
 [[121  16   3  16  18  33   3]
 [ 29 104  18  26  15  17   1]
 [ 15  20 157   5   7   6   0]
 [ 38  21  19  71  40  18   3]
 [ 57  32   8  50  43  14   6]
 [ 29  24   6  17  17  82  35]
 [  7   2   1   2   5  32 161]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.41      0.58      0.48       210
           2       0.47      0.50      0.48       210
           3       0.74      0.75      0.74       210
           4       0.38      0.34      0.36       210
           5       0.30      0.20      0.24       210
           6       0.41      0.39      0.40       210
           7       0.77      0.77      0.77       210

    accuracy                           0.50      1470
   macro avg       0.50      0.50      0.50      1470
weighted avg       0.50      0.50      0.50      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.17551020408163265
Confusion Matrix of SVM is:
 [[  0  11 196   0   2   1   0]
 [  0   1 207   1   0   1   0]
 [  0   0 209   1   0   0   0]
 [  2   3 196   7   1   1   0]
 [  4  16 180   5   4   1   0]
 [  5  15 174   2   5   7   2]
 [  6   5 144   0   3  22  30]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.02      0.00      0.01       210
           3       0.16      1.00      0.28       210
           4       0.44      0.03      0.06       210
           5       0.27      0.02      0.04       210
           6       0.21      0.03      0.06       210
           7       0.94      0.14      0.25       210

    accuracy                           0.18      1470
   macro avg       0.29      0.18      0.10      1470
weighted avg       0.29      0.18      0.10      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.5095238095238095
Confusion Matrix of Decision Tree is:
 [[108  11   6  19  30  32   4]
 [ 13  99  22  19  36  17   4]
 [ 10  14 162   7  12   5   0]
 [ 21  24  14  78  51  21   1]
 [ 25  32  11  50  61  24   7]
 [ 24  18   4  26  21  83  34]
 [  5   1   1   3  11  31 158]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.52      0.51      0.52       210
           2       0.50      0.47      0.48       210
           3       0.74      0.77      0.75       210
           4       0.39      0.37      0.38       210
           5       0.27      0.29      0.28       210
           6       0.39      0.40      0.39       210
           7       0.76      0.75      0.76       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Decision Tree with 13 max_depth
Acc

Accuracy of Decision Tree after PCA and ICA is: 0.5054421768707483
Confusion Matrix of Decision Tree is:
 [[108  14   4  13  32  35   4]
 [ 23  97  18  22  24  25   1]
 [  7  13 167   7   8   8   0]
 [ 20  31  17  67  44  28   3]
 [ 24  31  10  49  57  28  11]
 [ 25  16   4  21  19  91  34]
 [  6   4   1   2   7  34 156]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.51      0.51      0.51       210
           2       0.47      0.46      0.47       210
           3       0.76      0.80      0.77       210
           4       0.37      0.32      0.34       210
           5       0.30      0.27      0.28       210
           6       0.37      0.43      0.40       210
           7       0.75      0.74      0.74       210

    accuracy                           0.51      1470
   macro avg       0.50      0.51      0.50      1470
weighted avg       0.50      0.51      0.50      1470

Random Forest with 1 max_depth
Accu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3952380952380952
Confusion Matrix of Random Forest is:
 [[121  26  12   2   0  40   9]
 [ 60  80  31   0   0  34   5]
 [ 29  46 127   2   0   5   1]
 [113  46  18   4   0  18  11]
 [ 80  54  19   3   0  31  23]
 [ 41  22  15   1   0  75  56]
 [  2   1   4   0   0  29 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.27      0.58      0.37       210
           2       0.29      0.38      0.33       210
           3       0.56      0.60      0.58       210
           4       0.33      0.02      0.04       210
           5       0.00      0.00      0.00       210
           6       0.32      0.36      0.34       210
           7       0.62      0.83      0.71       210

    accuracy                           0.40      1470
   macro avg       0.34      0.40      0.34      1470
weighted avg       0.34      0.40      0.34      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.44829931972789117
Confusion Matrix of Random Forest is:
 [[103  22   9  19   0  50   7]
 [ 33  77  31  25   2  38   4]
 [ 22  45 126   8   1   7   1]
 [ 33  45  15  74   9  28   6]
 [ 30  54  15  40  16  36  19]
 [ 18  16   9  23   1  92  51]
 [  1   0   1   1   0  36 171]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.43      0.49      0.46       210
           2       0.30      0.37      0.33       210
           3       0.61      0.60      0.61       210
           4       0.39      0.35      0.37       210
           5       0.55      0.08      0.13       210
           6       0.32      0.44      0.37       210
           7       0.66      0.81      0.73       210

    accuracy                           0.45      1470
   macro avg       0.47      0.45      0.43      1470
weighted avg       0.47      0.45      0.43      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5659863945578232
Confusion Matrix of Random Forest is:
 [[105  10   4  16  27  48   0]
 [ 10 108  14  27  31  19   1]
 [  7  15 157  18   7   6   0]
 [ 12  13  12 102  46  25   0]
 [ 21  29   6  42  90  16   6]
 [ 13  14   6  27  16  92  42]
 [  0   2   0   1   1  28 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.50      0.56       210
           2       0.57      0.51      0.54       210
           3       0.79      0.75      0.77       210
           4       0.44      0.49      0.46       210
           5       0.41      0.43      0.42       210
           6       0.39      0.44      0.41       210
           7       0.78      0.85      0.81       210

    accuracy                           0.57      1470
   macro avg       0.57      0.57      0.57      1470
weighted avg       0.57      0.57      0.57      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5714285714285714
Confusion Matrix of Random Forest is:
 [[110   9   4  16  25  45   1]
 [ 12 112  14  21  30  20   1]
 [  5  13 171   7   5   8   1]
 [ 11  17  17  92  47  25   1]
 [ 26  29   6  44  80  20   5]
 [ 14  17   4  20  18  98  39]
 [  2   1   1   0   1  28 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.52      0.56       210
           2       0.57      0.53      0.55       210
           3       0.79      0.81      0.80       210
           4       0.46      0.44      0.45       210
           5       0.39      0.38      0.38       210
           6       0.40      0.47      0.43       210
           7       0.79      0.84      0.81       210

    accuracy                           0.57      1470
   macro avg       0.57      0.57      0.57      1470
weighted avg       0.57      0.57      0.57      1470

Random Forest with 20 max_depth
Acc

### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.45374149659863944
Confusion Matrix of Logistic Regression is:
 [[100   0  50   0  33   6  21]
 [ 11  58  86   0  31   5  19]
 [ 16   0 185   0   2   5   2]
 [ 42  14  43   0  64  12  35]
 [ 42   8  17   0 113   2  28]
 [ 26   6  31   0  10  31 106]
 [  3   0   0   0   4  23 180]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.42      0.48      0.44       210
           2       0.67      0.28      0.39       210
           3       0.45      0.88      0.59       210
           4       0.00      0.00      0.00       210
           5       0.44      0.54      0.48       210
           6       0.37      0.15      0.21       210
           7       0.46      0.86      0.60       210

    accuracy                           0.45      1470
   macro avg       0.40      0.45      0.39      1470
weighted avg       0.40      0.45      0.39      1470

KNN with 3 Neigh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.4884353741496599
Confusion Matrix of KNN Model is:
 [[127   8   5  20  31  13   6]
 [ 16 128   9  19  21  14   3]
 [  6  21 167   5   4   5   2]
 [ 38  46  12  50  35  12  17]
 [ 52  24   4  43  73   9   5]
 [ 30  29  17  22   9  61  42]
 [ 12   6   1  13   6  60 112]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.45      0.60      0.52       210
           2       0.49      0.61      0.54       210
           3       0.78      0.80      0.79       210
           4       0.29      0.24      0.26       210
           5       0.41      0.35      0.38       210
           6       0.35      0.29      0.32       210
           7       0.60      0.53      0.56       210

    accuracy                           0.49      1470
   macro avg       0.48      0.49      0.48      1470
weighted avg       0.48      0.49      0.48      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.43741496598639457
Confusion Matrix of SVM is:
 [[ 90   0  64   0  31  14  11]
 [  9  53  98   0  27  13  10]
 [ 10   0 191   0   2   6   1]
 [ 44  16  54   0  54  21  21]
 [ 44   8  24   0 106   6  22]
 [ 32   6  38   0   7  63  64]
 [ 11   0   0   0   4  55 140]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.38      0.43      0.40       210
           2       0.64      0.25      0.36       210
           3       0.41      0.91      0.56       210
           4       0.00      0.00      0.00       210
           5       0.46      0.50      0.48       210
           6       0.35      0.30      0.32       210
           7       0.52      0.67      0.58       210

    accuracy                           0.44      1470
   macro avg       0.39      0.44      0.39      1470
weighted avg       0.39      0.44      0.39      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.5149659863945578
Confusion Matrix of SVM is:
 [[107   1  13  35  29  16   9]
 [  2 105  15  47  20  17   4]
 [ 14  24 143   8   6  15   0]
 [ 20  23   7  73  52  19  16]
 [ 20  16   4  47 107   5  11]
 [ 15   9  12  32  10  70  62]
 [  0   2   0   9   5  42 152]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.60      0.51      0.55       210
           2       0.58      0.50      0.54       210
           3       0.74      0.68      0.71       210
           4       0.29      0.35      0.32       210
           5       0.47      0.51      0.49       210
           6       0.38      0.33      0.36       210
           7       0.60      0.72      0.66       210

    accuracy                           0.51      1470
   macro avg       0.52      0.51      0.52      1470
weighted avg       0.52      0.51      0.52      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[108   8   1  36  34  13  10]
 [  8 120   8  29  23  18   4]
 [  6  18 159  14   5   6   2]
 [ 22  35   8  64  46  22  13]
 [ 44  16   3  46  80  12   9]
 [ 30  20  11  26  10  59  54]
 [ 11   4   2   8   5  48 132]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.51      0.49       210
           2       0.54      0.57      0.56       210
           3       0.83      0.76      0.79       210
           4       0.29      0.30      0.30       210
           5       0.39      0.38      0.39       210
           6       0.33      0.28      0.30       210
           7       0.59      0.63      0.61       210

    accuracy                           0.49      1470
   macro avg       0.49      0.49      0.49      1470
weighted avg       0.49      0.49      0.49      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.48639455782312924
Co

Confusion Matrix of Decision Tree is:
 [[112   9   3  30  35  14   7]
 [ 12 111  12  26  22  19   8]
 [  4  18 161  14   4   5   4]
 [ 28  32  12  52  46  25  15]
 [ 40  19   7  36  85  11  12]
 [ 32  18  14  18  13  61  54]
 [  6   5   3  15  12  51 118]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.48      0.53      0.50       210
           2       0.52      0.53      0.53       210
           3       0.76      0.77      0.76       210
           4       0.27      0.25      0.26       210
           5       0.39      0.40      0.40       210
           6       0.33      0.29      0.31       210
           7       0.54      0.56      0.55       210

    accuracy                           0.48      1470
   macro avg       0.47      0.48      0.47      1470
weighted avg       0.47      0.48      0.47      1470

Random Forest with 1 max_depth
Accuracy of Random Forest after PCA and ICA is: 0.373469387755102
Confu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.48027210884353744
Confusion Matrix of Random Forest is:
 [[ 79   1  22   0  88   4  16]
 [  4  89  22   0  76   5  14]
 [  2  23 157   0  21   1   6]
 [ 11  16  15   0 136   7  25]
 [  3  11   5   0 177   0  14]
 [ 10   6  22   0  54  21  97]
 [  0   2   0   0  18   7 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.72      0.38      0.50       210
           2       0.60      0.42      0.50       210
           3       0.65      0.75      0.69       210
           4       0.00      0.00      0.00       210
           5       0.31      0.84      0.45       210
           6       0.47      0.10      0.16       210
           7       0.52      0.87      0.65       210

    accuracy                           0.48      1470
   macro avg       0.47      0.48      0.42      1470
weighted avg       0.47      0.48      0.42      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.5020408163265306
Confusion Matrix of Random Forest is:
 [[100   2   6   0  82   7  13]
 [  3 119  15   2  49  10  12]
 [  7  27 149   0  17   7   3]
 [ 17  40   9   1 110   9  24]
 [  6  22   2   2 165   2  11]
 [ 16  17  15   3  39  27  93]
 [  1   6   0   1  12  13 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.67      0.48      0.56       210
           2       0.51      0.57      0.54       210
           3       0.76      0.71      0.73       210
           4       0.11      0.00      0.01       210
           5       0.35      0.79      0.48       210
           6       0.36      0.13      0.19       210
           7       0.53      0.84      0.65       210

    accuracy                           0.50      1470
   macro avg       0.47      0.50      0.45      1470
weighted avg       0.47      0.50      0.45      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5523809523809524
Confusion Matrix of Random Forest is:
 [[117   4   1  27  40  13   8]
 [  4 130   7  25  24  13   7]
 [  2  15 172  10   3   7   1]
 [ 20  41   6  55  51  18  19]
 [ 21  16   1  39 114   8  11]
 [ 22  19   9  17   8  75  60]
 [  5   3   0   4   4  45 149]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.61      0.56      0.58       210
           2       0.57      0.62      0.59       210
           3       0.88      0.82      0.85       210
           4       0.31      0.26      0.28       210
           5       0.47      0.54      0.50       210
           6       0.42      0.36      0.39       210
           7       0.58      0.71      0.64       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5319727891156463
Confusion Matrix of Random Forest is:
 [[118   6   2  24  37  16   7]
 [  7 125   4  24  28  18   4]
 [  1  13 173  10   3  10   0]
 [ 18  41  11  57  47  20  16]
 [ 28  17   3  46  97  11   8]
 [ 24  16  10  17  10  72  61]
 [  5   3   0   7   5  50 140]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.56      0.57       210
           2       0.57      0.60      0.58       210
           3       0.85      0.82      0.84       210
           4       0.31      0.27      0.29       210
           5       0.43      0.46      0.44       210
           6       0.37      0.34      0.35       210
           7       0.59      0.67      0.63       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Random Forest with 20 max_depth
Acc

In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_gkb.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3183673469387755
Confusion Matrix of Logistic Regression is:
 [[ 92   0  51  16   0   0  51]
 [ 32   0 115  10   0   0  53]
 [ 16   0 178   3   0   0  13]
 [ 22   0 139  11   0   0  38]
 [ 33   0 143  14   0   0  20]
 [ 35   0  51   6   0   0 118]
 [ 16   0   6   1   0   0 187]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.37      0.44      0.40       210
           2       0.00      0.00      0.00       210
           3       0.26      0.85      0.40       210
           4       0.18      0.05      0.08       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.39      0.89      0.54       210

    accuracy                           0.32      1470
   macro avg       0.17      0.32      0.20      1470
weighted avg       0.17      0.32      0.20      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.3891156462585034
Confusion Matrix of KNN Model is:
 [[111  22  10  25  19   8  15]
 [ 39  72  23  20  23  15  18]
 [  7  22 151  13   7   6   4]
 [ 49  41  16  44  32  14  14]
 [ 41  30  22  33  58  13  13]
 [ 31  26  13  25  19  51  45]
 [ 23  13   5   5   5  74  85]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.37      0.53      0.43       210
           2       0.32      0.34      0.33       210
           3       0.63      0.72      0.67       210
           4       0.27      0.21      0.23       210
           5       0.36      0.28      0.31       210
           6       0.28      0.24      0.26       210
           7       0.44      0.40      0.42       210

    accuracy                           0.39      1470
   macro avg       0.38      0.39      0.38      1470
weighted avg       0.38      0.39      0.38      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3122448979591837
Confusion Matrix of SVM is:
 [[ 73   0  47  33   0   1  56]
 [ 23   0 112  21   0   1  53]
 [  6   0 176   6   0   0  22]
 [ 16   0 135  21   0   0  38]
 [ 24   0 138  27   0   0  21]
 [ 23   0  49  17   0   1 120]
 [  9   1   6   4   1   1 188]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.42      0.35      0.38       210
           2       0.00      0.00      0.00       210
           3       0.27      0.84      0.40       210
           4       0.16      0.10      0.12       210
           5       0.00      0.00      0.00       210
           6       0.25      0.00      0.01       210
           7       0.38      0.90      0.53       210

    accuracy                           0.31      1470
   macro avg       0.21      0.31      0.21      1470
weighted avg       0.21      0.31      0.21      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3360544217687075
Confusion Matrix of SVM is:
 [[117   0  33  34   2   5  19]
 [ 29   0 108  24   1   3  45]
 [ 10   0 174  16   0   3   7]
 [ 23   0 113  32   7   2  33]
 [ 34   0 126  34   2   3  11]
 [ 44   0  36  23   2   6  99]
 [ 28   0   5   4   0  10 163]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.41      0.56      0.47       210
           2       0.00      0.00      0.00       210
           3       0.29      0.83      0.43       210
           4       0.19      0.15      0.17       210
           5       0.14      0.01      0.02       210
           6       0.19      0.03      0.05       210
           7       0.43      0.78      0.56       210

    accuracy                           0.34      1470
   macro avg       0.24      0.34      0.24      1470
weighted avg       0.24      0.34      0.24      1470

Working on SVM Kernal: sigmoid


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.2931972789115646
Confusion Matrix of SVM is:
 [[125   3  29   4  37   5   7]
 [ 52   9  95  10  12  19  13]
 [ 34   0 163   2   3   1   7]
 [ 59   7  97  11   9   9  18]
 [ 57   5 114   6  17   7   4]
 [ 66  12  30   4  13  43  42]
 [ 44  12   9   1   5  76  63]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.29      0.60      0.39       210
           2       0.19      0.04      0.07       210
           3       0.30      0.78      0.44       210
           4       0.29      0.05      0.09       210
           5       0.18      0.08      0.11       210
           6       0.27      0.20      0.23       210
           7       0.41      0.30      0.35       210

    accuracy                           0.29      1470
   macro avg       0.27      0.29      0.24      1470
weighted avg       0.27      0.29      0.24      1470

Decision Tree with 1 max_depth
Accuracy of Decision Tree after PC

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.39183673469387753
Confusion Matrix of Decision Tree is:
 [[112  10  12  25  24  19   8]
 [ 33  57  17  21  41  25  16]
 [  8  13 151  14  17   4   3]
 [ 29  21  25  38  62  10  25]
 [ 36  15  11  43  75  14  16]
 [ 31  12   6  33  21  55  52]
 [ 19   8   5  12   8  70  88]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.42      0.53      0.47       210
           2       0.42      0.27      0.33       210
           3       0.67      0.72      0.69       210
           4       0.20      0.18      0.19       210
           5       0.30      0.36      0.33       210
           6       0.28      0.26      0.27       210
           7       0.42      0.42      0.42       210

    accuracy                           0.39      1470
   macro avg       0.39      0.39      0.39      1470
weighted avg       0.39      0.39      0.39      1470

Decision Tree with 14 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.2707482993197279
Confusion Matrix of Random Forest is:
 [[  0   0  74  17   0   0 119]
 [  0   0 120  14   0   0  76]
 [  0   0 181   2   0   0  27]
 [  0   0 140  17   0   0  53]
 [  0   0 152  14   0   0  44]
 [  0   0  53  13   0   0 144]
 [  0   0   6   4   0   0 200]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.25      0.86      0.39       210
           4       0.21      0.08      0.12       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.30      0.95      0.46       210

    accuracy                           0.27      1470
   macro avg       0.11      0.27      0.14      1470
weighted avg       0.11      0.27      0.14      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.34965986394557824
Confusion Matrix of Random Forest is:
 [[121   0  11  58   0   0  20]
 [ 35   0  51  75   0   0  49]
 [ 16   0 117  65   0   0  12]
 [ 30   0  43 101   0   1  35]
 [ 38   0  45 111   0   1  15]
 [ 51   0  15  41   0   2 101]
 [ 29   0   1   6   0   1 173]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.38      0.58      0.46       210
           2       0.00      0.00      0.00       210
           3       0.41      0.56      0.47       210
           4       0.22      0.48      0.30       210
           5       0.00      0.00      0.00       210
           6       0.40      0.01      0.02       210
           7       0.43      0.82      0.56       210

    accuracy                           0.35      1470
   macro avg       0.26      0.35      0.26      1470
weighted avg       0.26      0.35      0.26      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.36666666666666664
Confusion Matrix of Random Forest is:
 [[133   0  10  40   7   5  15]
 [ 37   0  45  57  23  10  38]
 [ 19   0 119  59   7   2   4]
 [ 31   0  34  85  24   6  30]
 [ 42   0  29  89  34   3  13]
 [ 53   0  11  35   8   8  95]
 [ 32   0   0   5   2  11 160]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.38      0.63      0.48       210
           2       0.00      0.00      0.00       210
           3       0.48      0.57      0.52       210
           4       0.23      0.40      0.29       210
           5       0.32      0.16      0.22       210
           6       0.18      0.04      0.06       210
           7       0.45      0.76      0.57       210

    accuracy                           0.37      1470
   macro avg       0.29      0.37      0.31      1470
weighted avg       0.29      0.37      0.31      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3782312925170068
Confusion Matrix of Random Forest is:
 [[129   0  10  33  14  10  14]
 [ 39   4  44  38  40  13  32]
 [ 18   0 119  34  32   2   5]
 [ 32   0  33  57  51   9  28]
 [ 42   0  29  55  68   4  12]
 [ 44   0  11  27  16  37  75]
 [ 21   0   1   4   3  39 142]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.61      0.48       210
           2       1.00      0.02      0.04       210
           3       0.48      0.57      0.52       210
           4       0.23      0.27      0.25       210
           5       0.30      0.32      0.31       210
           6       0.32      0.18      0.23       210
           7       0.46      0.68      0.55       210

    accuracy                           0.38      1470
   macro avg       0.46      0.38      0.34      1470
weighted avg       0.46      0.38      0.34      1470

Random Forest with 5 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.4197278911564626
Confusion Matrix of Random Forest is:
 [[103  18   3  32  25  18  11]
 [ 17  60  12  28  43  22  28]
 [  5   6 152  22  21   1   3]
 [ 20  19  14  56  62  13  26]
 [ 29  13   7  62  74  10  15]
 [ 25   9   2  34  22  58  60]
 [ 11   5   3   8   7  62 114]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.49      0.49      0.49       210
           2       0.46      0.29      0.35       210
           3       0.79      0.72      0.75       210
           4       0.23      0.27      0.25       210
           5       0.29      0.35      0.32       210
           6       0.32      0.28      0.29       210
           7       0.44      0.54      0.49       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.42      1470
weighted avg       0.43      0.42      0.42      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.40476190476190477
Confusion Matrix of Random Forest is:
 [[ 96  20   8  28  24  18  16]
 [ 18  68  12  23  38  25  26]
 [  8   4 155  19  21   1   2]
 [ 28  22  16  53  48  17  26]
 [ 28  21  11  49  64  20  17]
 [ 20  16   2  34  24  57  57]
 [ 10   7   4  11  10  66 102]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.46      0.46      0.46       210
           2       0.43      0.32      0.37       210
           3       0.75      0.74      0.74       210
           4       0.24      0.25      0.25       210
           5       0.28      0.30      0.29       210
           6       0.28      0.27      0.28       210
           7       0.41      0.49      0.45       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.40      1470
weighted avg       0.41      0.40      0.40      1470

Accuracy of Random Forest after PC

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_ndisbert.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],2)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3272108843537415
Confusion Matrix of Logistic Regression is:
 [[  0  16  41  65  54   0  34]
 [  0  73  42  20  23   0  52]
 [  0  22 106  11  54   0  17]
 [  0  18  53  50  54   0  35]
 [  0  25  71  34  56   0  24]
 [  0  25  12  18  14   1 140]
 [  0  14   0   1   0   0 195]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.38      0.35      0.36       210
           3       0.33      0.50      0.40       210
           4       0.25      0.24      0.24       210
           5       0.22      0.27      0.24       210
           6       1.00      0.00      0.01       210
           7       0.39      0.93      0.55       210

    accuracy                           0.33      1470
   macro avg       0.37      0.33      0.26      1470
weighted avg       0.37      0.33      0.26      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.39387755102040817
Confusion Matrix of KNN Model is:
 [[113  22  12  31  19   6   7]
 [ 46  78  16  17  18  23  12]
 [ 24  17 129  11  23   5   1]
 [ 57  31  18  47  41   9   7]
 [ 47  43  14  41  54   8   3]
 [ 27  41  11  18   9  53  51]
 [ 11  19   4   4   0  67 105]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.35      0.54      0.42       210
           2       0.31      0.37      0.34       210
           3       0.63      0.61      0.62       210
           4       0.28      0.22      0.25       210
           5       0.33      0.26      0.29       210
           6       0.31      0.25      0.28       210
           7       0.56      0.50      0.53       210

    accuracy                           0.39      1470
   macro avg       0.40      0.39      0.39      1470
weighted avg       0.40      0.39      0.39      1470

KNN with 5 Neighbors
Accuracy of KNN Model aft

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.36122448979591837
Confusion Matrix of SVM is:
 [[ 14  17  57  46  50   3  23]
 [  8  83  55  15  19   2  28]
 [  3  14 151   6  22   0  14]
 [  2  13  69  51  47   2  26]
 [  5  20  93  34  44   2  12]
 [  3  45  16  20  12   1 113]
 [  0  22   0   1   0   0 187]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.40      0.07      0.11       210
           2       0.39      0.40      0.39       210
           3       0.34      0.72      0.46       210
           4       0.29      0.24      0.27       210
           5       0.23      0.21      0.22       210
           6       0.10      0.00      0.01       210
           7       0.46      0.89      0.61       210

    accuracy                           0.36      1470
   macro avg       0.32      0.36      0.30      1470
weighted avg       0.32      0.36      0.30      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.4142857142857143
Confusion Matrix of Decision Tree is:
 [[103  26   7  28  26  10  10]
 [ 29  74  13  26  29  30   9]
 [ 19  11 128  13  30   8   1]
 [ 45  21  12  64  44  21   3]
 [ 36  28   9  58  66  11   2]
 [ 33  29   2  22  12  57  55]
 [ 10  16   6   9   3  49 117]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.37      0.49      0.42       210
           2       0.36      0.35      0.36       210
           3       0.72      0.61      0.66       210
           4       0.29      0.30      0.30       210
           5       0.31      0.31      0.31       210
           6       0.31      0.27      0.29       210
           7       0.59      0.56      0.57       210

    accuracy                           0.41      1470
   macro avg       0.42      0.41      0.42      1470
weighted avg       0.42      0.41      0.42      1470

Decision Tree with 14 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.319047619047619
Confusion Matrix of Random Forest is:
 [[ 80   2 110   0   0   0  18]
 [ 28   7 119   0   0   0  56]
 [ 11   1 183   0   0   0  15]
 [ 73   4 114   0   0   0  19]
 [ 49   2 150   0   0   0   9]
 [ 34   1  47   0   0   0 128]
 [  7   0   4   0   0   0 199]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.28      0.38      0.33       210
           2       0.41      0.03      0.06       210
           3       0.25      0.87      0.39       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.45      0.95      0.61       210

    accuracy                           0.32      1470
   macro avg       0.20      0.32      0.20      1470
weighted avg       0.20      0.32      0.20      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.37551020408163266
Confusion Matrix of Random Forest is:
 [[ 65  23  82  22   0   3  15]
 [ 15  75  63  13   0  22  22]
 [ 15  20 158   2   0   4  11]
 [ 35  22  94  40   0   7  12]
 [ 21  33 118  29   0   2   7]
 [ 12  24  24  23   1  26 100]
 [  0   5   0   7   0  10 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.31      0.35       210
           2       0.37      0.36      0.36       210
           3       0.29      0.75      0.42       210
           4       0.29      0.19      0.23       210
           5       0.00      0.00      0.00       210
           6       0.35      0.12      0.18       210
           7       0.53      0.90      0.67       210

    accuracy                           0.38      1470
   macro avg       0.32      0.38      0.32      1470
weighted avg       0.32      0.38      0.32      1470

Random Forest with 3 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.44625850340136053
Confusion Matrix of Random Forest is:
 [[102  20   7  30  32  11   8]
 [ 26  87   8  20  30  28  11]
 [ 13   7 126  14  38   9   3]
 [ 44  25  11  57  53  14   6]
 [ 31  32   9  44  82   8   4]
 [ 22  28   4  19   9  51  77]
 [  1   9   1   1   1  46 151]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.43      0.49      0.45       210
           2       0.42      0.41      0.42       210
           3       0.76      0.60      0.67       210
           4       0.31      0.27      0.29       210
           5       0.33      0.39      0.36       210
           6       0.31      0.24      0.27       210
           7       0.58      0.72      0.64       210

    accuracy                           0.45      1470
   macro avg       0.45      0.45      0.44      1470
weighted avg       0.45      0.45      0.44      1470

Random Forest with 11 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.43605442176870746
Confusion Matrix of Random Forest is:
 [[ 89  23  10  40  26  13   9]
 [ 31  80   8  26  28  30   7]
 [  7  15 138  12  29   7   2]
 [ 32  28  11  75  48  10   6]
 [ 28  43   8  57  60   9   5]
 [ 16  33   6  20   7  64  64]
 [  6  11   3   3   0  52 135]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.43      0.42      0.42       210
           2       0.34      0.38      0.36       210
           3       0.75      0.66      0.70       210
           4       0.32      0.36      0.34       210
           5       0.30      0.29      0.29       210
           6       0.35      0.30      0.32       210
           7       0.59      0.64      0.62       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.44      1470
weighted avg       0.44      0.44      0.44      1470

Random Forest with 19 max_depth
Ac

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_vbert.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.454421768707483
Confusion Matrix of Logistic Regression is:
 [[135   2  13   0  32   1  27]
 [ 35  59  77   0  16   3  20]
 [  7   1 196   0   3   1   2]
 [ 52  27  27   0  58   0  46]
 [ 53   5   9   0  99   0  44]
 [ 23  34  32   0  13   5 103]
 [  1  30   0   0   4   1 174]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.44      0.64      0.52       210
           2       0.37      0.28      0.32       210
           3       0.55      0.93      0.70       210
           4       0.00      0.00      0.00       210
           5       0.44      0.47      0.46       210
           6       0.45      0.02      0.05       210
           7       0.42      0.83      0.56       210

    accuracy                           0.45      1470
   macro avg       0.38      0.45      0.37      1470
weighted avg       0.38      0.45      0.37      1470

KNN with 3 Neighbo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.5312925170068027
Confusion Matrix of KNN Model is:
 [[137   4   2  29  29   5   4]
 [ 15 128  11  22  12  16   6]
 [  8  19 165   9   6   2   1]
 [ 41  32   9  72  37  11   8]
 [ 55  14   0  45  83   9   4]
 [ 22  28  12  31  13  62  42]
 [  9  12   0   8   5  42 134]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.48      0.65      0.55       210
           2       0.54      0.61      0.57       210
           3       0.83      0.79      0.81       210
           4       0.33      0.34      0.34       210
           5       0.45      0.40      0.42       210
           6       0.42      0.30      0.35       210
           7       0.67      0.64      0.66       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.4
Confusion Matrix of SVM is:
 [[104  26  17   1   3  20  39]
 [  5 100  55   0   0  28  22]
 [  0  19 178   0   1   9   3]
 [ 14  89  21   4   4  16  62]
 [ 28  77   9  10  13  15  58]
 [  3  65  14   1   0  27 100]
 [  0  45   0   0   0   3 162]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.68      0.50      0.57       210
           2       0.24      0.48      0.32       210
           3       0.61      0.85      0.71       210
           4       0.25      0.02      0.04       210
           5       0.62      0.06      0.11       210
           6       0.23      0.13      0.16       210
           7       0.36      0.77      0.49       210

    accuracy                           0.40      1470
   macro avg       0.43      0.40      0.34      1470
weighted avg       0.43      0.40      0.34      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is: 0.531292517006

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[126   3   3  23  34  17   4]
 [  8 111  16  29  17  24   5]
 [  6  17 165  10   7   4   1]
 [ 25  27  15  69  49  16   9]
 [ 40  19   4  46  85  10   6]
 [ 18  27  12  28  15  66  44]
 [  5   3   0  15   4  59 124]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.60      0.58       210
           2       0.54      0.53      0.53       210
           3       0.77      0.79      0.78       210
           4       0.31      0.33      0.32       210
           5       0.40      0.40      0.40       210
           6       0.34      0.31      0.33       210
           7       0.64      0.59      0.62       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Decision Tree with 11 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5142857142857142
Con

Confusion Matrix of Decision Tree is:
 [[119   3   6  28  30  17   7]
 [ 10 108  16  21  19  30   6]
 [  2  13 166  12   9   8   0]
 [ 26  25  16  56  48  29  10]
 [ 38  18   3  49  82  13   7]
 [ 19  25   9  34  26  60  37]
 [  9   6   0  17   8  42 128]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.57      0.55       210
           2       0.55      0.51      0.53       210
           3       0.77      0.79      0.78       210
           4       0.26      0.27      0.26       210
           5       0.37      0.39      0.38       210
           6       0.30      0.29      0.29       210
           7       0.66      0.61      0.63       210

    accuracy                           0.49      1470
   macro avg       0.49      0.49      0.49      1470
weighted avg       0.49      0.49      0.49      1470

Decision Tree with 19 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4931972789115646
Con

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.42993197278911566
Confusion Matrix of Random Forest is:
 [[134   2  11   0  21   5  37]
 [ 28  50  38   0   5  31  58]
 [ 10   0 173   0   1  19   7]
 [ 39   9  43   0  28  10  81]
 [ 45  12  48   0  48   2  55]
 [ 19   5  19   0   7  29 131]
 [  0   0   1   0   3   8 198]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.49      0.64      0.55       210
           2       0.64      0.24      0.35       210
           3       0.52      0.82      0.64       210
           4       0.00      0.00      0.00       210
           5       0.42      0.23      0.30       210
           6       0.28      0.14      0.18       210
           7       0.35      0.94      0.51       210

    accuracy                           0.43      1470
   macro avg       0.39      0.43      0.36      1470
weighted avg       0.39      0.43      0.36      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4884353741496599
Confusion Matrix of Random Forest is:
 [[126   5   1   0  44   9  25]
 [ 16  77  28   3  18  22  46]
 [  9   2 171   0   6  18   4]
 [ 28  12  13   3  77  19  58]
 [ 36  13   4   2 121   4  30]
 [ 18  10  15   1  28  34 104]
 [  0   4   1   0  14   5 186]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.54      0.60      0.57       210
           2       0.63      0.37      0.46       210
           3       0.73      0.81      0.77       210
           4       0.33      0.01      0.03       210
           5       0.39      0.58      0.47       210
           6       0.31      0.16      0.21       210
           7       0.41      0.89      0.56       210

    accuracy                           0.49      1470
   macro avg       0.48      0.49      0.44      1470
weighted avg       0.48      0.49      0.44      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5707482993197279
Confusion Matrix of Random Forest is:
 [[124   4   2  23  41  11   5]
 [  6 122  18  24  14  16  10]
 [  6  14 170   6   6   7   1]
 [ 20  25  10  74  57  12  12]
 [ 21   9   1  39 125   8   7]
 [  8  27   9  27  20  63  56]
 [  2   9   0   5   3  30 161]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.66      0.59      0.62       210
           2       0.58      0.58      0.58       210
           3       0.81      0.81      0.81       210
           4       0.37      0.35      0.36       210
           5       0.47      0.60      0.53       210
           6       0.43      0.30      0.35       210
           7       0.64      0.77      0.70       210

    accuracy                           0.57      1470
   macro avg       0.57      0.57      0.56      1470
weighted avg       0.57      0.57      0.56      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5537414965986395
Confusion Matrix of Random Forest is:
 [[118   5   4  27  39  13   4]
 [  8 118  17  24  19  15   9]
 [  2  11 168  10   6  12   1]
 [ 19  26  10  74  53  20   8]
 [ 26  12   1  40 110  12   9]
 [ 10  29  10  21  14  73  53]
 [  2   9   0   6   6  34 153]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.64      0.56      0.60       210
           2       0.56      0.56      0.56       210
           3       0.80      0.80      0.80       210
           4       0.37      0.35      0.36       210
           5       0.45      0.52      0.48       210
           6       0.41      0.35      0.38       210
           7       0.65      0.73      0.68       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Random Forest with 20 max_depth
Acc

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//gpt_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],5)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.44285714285714284
Confusion Matrix of Logistic Regression is:
 [[101   9  28  12  15  10  35]
 [  6  87  58   8  19   5  27]
 [  7   2 190   4   2   4   1]
 [ 40  20  24  16  52   8  50]
 [ 46  32   6  13  71   3  39]
 [ 18  29  30  11   7   9 106]
 [  2  27   2   0   1   1 177]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.46      0.48      0.47       210
           2       0.42      0.41      0.42       210
           3       0.56      0.90      0.69       210
           4       0.25      0.08      0.12       210
           5       0.43      0.34      0.38       210
           6       0.23      0.04      0.07       210
           7       0.41      0.84      0.55       210

    accuracy                           0.44      1470
   macro avg       0.39      0.44      0.39      1470
weighted avg       0.39      0.44      0.39      1470

KNN with 3 Neigh

Accuracy of SVM after PCA and ICA is: 0.40680272108843535
Confusion Matrix of SVM is:
 [[ 83   8  26   5   3  20  65]
 [  1  62  51  10   7  20  59]
 [  0   0 194   4   0   6   6]
 [ 20  16  22  20  18  31  83]
 [ 42  37   6  14  28  18  65]
 [ 12   7  26  11   0  11 143]
 [  0   8   2   0   0   0 200]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.53      0.40      0.45       210
           2       0.45      0.30      0.36       210
           3       0.59      0.92      0.72       210
           4       0.31      0.10      0.15       210
           5       0.50      0.13      0.21       210
           6       0.10      0.05      0.07       210
           7       0.32      0.95      0.48       210

    accuracy                           0.41      1470
   macro avg       0.40      0.41      0.35      1470
weighted avg       0.40      0.41      0.35      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[101  10   5  25  33  16  20]
 [  6 103   8  31  29  13  20]
 [  3  13 162  17   3  11   1]
 [ 18  23   8  68  53  13  27]
 [ 21  21   2  42  88  16  20]
 [ 16  28  12  20  17  49  68]
 [  5  16   1  11   9  35 133]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.59      0.48      0.53       210
           2       0.48      0.49      0.49       210
           3       0.82      0.77      0.79       210
           4       0.32      0.32      0.32       210
           5       0.38      0.42      0.40       210
           6       0.32      0.23      0.27       210
           7       0.46      0.63      0.53       210

    accuracy                           0.48      1470
   macro avg       0.48      0.48      0.48      1470
weighted avg       0.48      0.48      0.48      1470

Decision Tree with 11 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.47006802721088436
Co

Accuracy of Random Forest after PCA and ICA is: 0.37482993197278913
Confusion Matrix of Random Forest is:
 [[ 88  31  38   0   8   0  45]
 [ 10 136  33   0   1   0  30]
 [  6  55 141   0   2   0   6]
 [ 38  98   8   0   7   0  59]
 [ 68  83   1   0  11   0  47]
 [ 18  54  17   0   5   0 116]
 [  3  25   7   0   0   0 175]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.38      0.42      0.40       210
           2       0.28      0.65      0.39       210
           3       0.58      0.67      0.62       210
           4       0.00      0.00      0.00       210
           5       0.32      0.05      0.09       210
           6       0.00      0.00      0.00       210
           7       0.37      0.83      0.51       210

    accuracy                           0.37      1470
   macro avg       0.28      0.37      0.29      1470
weighted avg       0.28      0.37      0.29      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4142857142857143
Confusion Matrix of Random Forest is:
 [[123  29   5   1  10   0  42]
 [  6 135  32   0   8   0  29]
 [  6  49 147   0   2   0   6]
 [ 30  95   9   0  20   0  56]
 [ 54  82   1   0  31   0  42]
 [ 19  54  16   0   6   0 115]
 [  4  25   7   0   1   0 173]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.51      0.59      0.54       210
           2       0.29      0.64      0.40       210
           3       0.68      0.70      0.69       210
           4       0.00      0.00      0.00       210
           5       0.40      0.15      0.22       210
           6       0.00      0.00      0.00       210
           7       0.37      0.82      0.51       210

    accuracy                           0.41      1470
   macro avg       0.32      0.41      0.34      1470
weighted avg       0.32      0.41      0.34      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.44625850340136053
Confusion Matrix of Random Forest is:
 [[113  26   4   0  25   1  41]
 [  4 129  31   0  15   1  30]
 [  7  33 161   0   3   0   6]
 [ 19  87  10   0  38   2  54]
 [ 31  67   1   0  69   1  41]
 [ 14  54  16   0   8   4 114]
 [  4  22   2   0   2   0 180]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.54      0.56       210
           2       0.31      0.61      0.41       210
           3       0.72      0.77      0.74       210
           4       0.00      0.00      0.00       210
           5       0.43      0.33      0.37       210
           6       0.44      0.02      0.04       210
           7       0.39      0.86      0.53       210

    accuracy                           0.45      1470
   macro avg       0.41      0.45      0.38      1470
weighted avg       0.41      0.45      0.38      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4775510204081633
Confusion Matrix of Random Forest is:
 [[111  13   3   4  36   9  34]
 [  4 127  21   0  27   3  28]
 [  6  32 161   2   3   1   5]
 [ 18  54  10   5  72   3  48]
 [ 28  28   2   1 109   3  39]
 [ 11  46  14   3  14  15 107]
 [  1  21   1   0   7   6 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.53      0.57       210
           2       0.40      0.60      0.48       210
           3       0.76      0.77      0.76       210
           4       0.33      0.02      0.04       210
           5       0.41      0.52      0.46       210
           6       0.38      0.07      0.12       210
           7       0.40      0.83      0.54       210

    accuracy                           0.48      1470
   macro avg       0.47      0.48      0.42      1470
weighted avg       0.47      0.48      0.42      1470

Random Forest with 5 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5374149659863946
Confusion Matrix of Random Forest is:
 [[108   3   7  27  32  15  18]
 [  6 124   7  19  22  20  12]
 [  2   7 180  13   1   6   1]
 [ 11  19  13  70  53  15  29]
 [ 21  24   2  30 105  11  17]
 [ 13  31  12  22  17  52  63]
 [  5  13   0   1   9  31 151]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.65      0.51      0.57       210
           2       0.56      0.59      0.58       210
           3       0.81      0.86      0.84       210
           4       0.38      0.33      0.36       210
           5       0.44      0.50      0.47       210
           6       0.35      0.25      0.29       210
           7       0.52      0.72      0.60       210

    accuracy                           0.54      1470
   macro avg       0.53      0.54      0.53      1470
weighted avg       0.53      0.54      0.53      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.527891156462585
Confusion Matrix of Random Forest is:
 [[110   4   5  28  30  16  17]
 [  8 113   9  17  29  17  17]
 [  2   4 176  19   2   7   0]
 [ 12  23  13  71  47  15  29]
 [ 26  20   3  34 101  10  16]
 [ 12  28  11  24  14  57  64]
 [  8  11   0   1   8  34 148]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.52      0.57       210
           2       0.56      0.54      0.55       210
           3       0.81      0.84      0.82       210
           4       0.37      0.34      0.35       210
           5       0.44      0.48      0.46       210
           6       0.37      0.27      0.31       210
           7       0.51      0.70      0.59       210

    accuracy                           0.53      1470
   macro avg       0.52      0.53      0.52      1470
weighted avg       0.52      0.53      0.52      1470

Accuracy of Random Forest after PCA 

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//xlm_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")



Accuracy of Logistic Regression after PCA and ICA is: 0.5047619047619047
Confusion Matrix of Logistic Regression is:
 [[121   2  21   3  26  16  21]
 [  6  98  46   7  21  12  20]
 [ 29   1 164   7   1   6   2]
 [ 37  17  31  40  31  17  37]
 [ 57  17   7   7  91   2  29]
 [ 13   9  26   6   6  49 101]
 [  0   1   0   0   4  26 179]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.46      0.58      0.51       210
           2       0.68      0.47      0.55       210
           3       0.56      0.78      0.65       210
           4       0.57      0.19      0.29       210
           5       0.51      0.43      0.47       210
           6       0.38      0.23      0.29       210
           7       0.46      0.85      0.60       210

    accuracy                           0.50      1470
   macro avg       0.52      0.50      0.48      1470
weighted avg       0.52      0.50      0.48      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.4959183673469388
Confusion Matrix of SVM is:
 [[128   2  15   0  22  14  29]
 [  7  94  51   3  18   8  29]
 [ 33   2 165   3   0   4   3]
 [ 43  20  29  24  33  18  43]
 [ 58  18   5   4  87   1  37]
 [ 15   9  26   3   6  40 111]
 [  0   0   0   0   4  15 191]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.45      0.61      0.52       210
           2       0.65      0.45      0.53       210
           3       0.57      0.79      0.66       210
           4       0.65      0.11      0.19       210
           5       0.51      0.41      0.46       210
           6       0.40      0.19      0.26       210
           7       0.43      0.91      0.58       210

    accuracy                           0.50      1470
   macro avg       0.52      0.50      0.46      1470
weighted avg       0.52      0.50      0.46      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[116   9   2  13  46  15   9]
 [  3 124   7  21  31   9  15]
 [  4  15 171   7   4   6   3]
 [  8  37   9  72  38  24  22]
 [ 34  17   6  20 115   9   9]
 [ 12  24   6  27  18  67  56]
 [  8   7   3  15   5  53 119]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.63      0.55      0.59       210
           2       0.53      0.59      0.56       210
           3       0.84      0.81      0.83       210
           4       0.41      0.34      0.37       210
           5       0.45      0.55      0.49       210
           6       0.37      0.32      0.34       210
           7       0.51      0.57      0.54       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5238095238095238
Con

Accuracy of Decision Tree after PCA and ICA is: 0.48435374149659866
Confusion Matrix of Decision Tree is:
 [[120   5   5  22  34  15   9]
 [ 12 114  10  28  23  17   6]
 [  6  11 174   9   2   5   3]
 [ 23  38  12  57  29  39  12]
 [ 49  19   7  33  81  13   8]
 [ 19  19  10  33  19  68  42]
 [ 14  10   1  23  12  52  98]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.57      0.53       210
           2       0.53      0.54      0.54       210
           3       0.79      0.83      0.81       210
           4       0.28      0.27      0.27       210
           5       0.41      0.39      0.40       210
           6       0.33      0.32      0.32       210
           7       0.55      0.47      0.51       210

    accuracy                           0.48      1470
   macro avg       0.48      0.48      0.48      1470
weighted avg       0.48      0.48      0.48      1470

Decision Tree with 20 max_depth
Ac

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.48707482993197276
Confusion Matrix of Random Forest is:
 [[ 94   8   8   0  73  18   9]
 [  1  95  38   0  49  13  14]
 [  4  14 152   0  24  15   1]
 [  2  24  51   0  84  21  28]
 [ 19  27   4   0 143   1  16]
 [  1  11  25   0  32  54  87]
 [  0   2   0   0   8  22 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.78      0.45      0.57       210
           2       0.52      0.45      0.49       210
           3       0.55      0.72      0.62       210
           4       0.00      0.00      0.00       210
           5       0.35      0.68      0.46       210
           6       0.38      0.26      0.31       210
           7       0.53      0.85      0.66       210

    accuracy                           0.49      1470
   macro avg       0.44      0.49      0.44      1470
weighted avg       0.44      0.49      0.44      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.5210884353741496
Confusion Matrix of Random Forest is:
 [[ 96  10   0   7  70  18   9]
 [  1 120  17   6  40  13  13]
 [  4  16 139  13  22  15   1]
 [  1  27   6  45  82  21  28]
 [ 18  33   0   4 138   1  16]
 [  1  13   8  17  30  54  87]
 [  0   2   0   0   8  26 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.46      0.58       210
           2       0.54      0.57      0.56       210
           3       0.82      0.66      0.73       210
           4       0.49      0.21      0.30       210
           5       0.35      0.66      0.46       210
           6       0.36      0.26      0.30       210
           7       0.53      0.83      0.65       210

    accuracy                           0.52      1470
   macro avg       0.56      0.52      0.51      1470
weighted avg       0.56      0.52      0.51      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5836734693877551
Confusion Matrix of Random Forest is:
 [[127   5   1  12  43  12  10]
 [  5 137   5  23  19  16   5]
 [  7  12 165  14   2  10   0]
 [  9  27   6  85  35  29  19]
 [ 35  21   0  21 116   6  11]
 [  9  16   7  21  14  80  63]
 [  2   3   0   5   3  49 148]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.65      0.60      0.63       210
           2       0.62      0.65      0.64       210
           3       0.90      0.79      0.84       210
           4       0.47      0.40      0.43       210
           5       0.50      0.55      0.52       210
           6       0.40      0.38      0.39       210
           7       0.58      0.70      0.64       210

    accuracy                           0.58      1470
   macro avg       0.59      0.58      0.58      1470
weighted avg       0.59      0.58      0.58      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5748299319727891
Confusion Matrix of Random Forest is:
 [[130   4   1  20  37  12   6]
 [  5 136   6  25  21  10   7]
 [  7  14 168  12   0   9   0]
 [ 13  32   6  87  29  27  16]
 [ 37  23   0  23 108  10   9]
 [  9  14   7  26  14  80  60]
 [  2   5   0   8   4  55 136]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.64      0.62      0.63       210
           2       0.60      0.65      0.62       210
           3       0.89      0.80      0.84       210
           4       0.43      0.41      0.42       210
           5       0.51      0.51      0.51       210
           6       0.39      0.38      0.39       210
           7       0.58      0.65      0.61       210

    accuracy                           0.57      1470
   macro avg       0.58      0.57      0.58      1470
weighted avg       0.58      0.57      0.58      1470

Random Forest with 20 max_depth
Acc

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//bert_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],7)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3619047619047619
Confusion Matrix of Logistic Regression is:
 [[ 15  66  40   2  46   0  41]
 [  8  84  34   2  48   4  30]
 [  7  41 133   2  18   2   7]
 [  4  44  11   6  77   3  65]
 [  0  24   7   2 136   1  40]
 [  9  40  23   6  34   7  91]
 [  3  11   8   7  26   4 151]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.33      0.07      0.12       210
           2       0.27      0.40      0.32       210
           3       0.52      0.63      0.57       210
           4       0.22      0.03      0.05       210
           5       0.35      0.65      0.46       210
           6       0.33      0.03      0.06       210
           7       0.36      0.72      0.48       210

    accuracy                           0.36      1470
   macro avg       0.34      0.36      0.29      1470
weighted avg       0.34      0.36      0.29      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.3326530612244898
Confusion Matrix of SVM is:
 [[ 20 104   0   3  50   0  33]
 [  2 130   1   8  53   1  15]
 [ 12 105  62   1  27   2   1]
 [  0  74   0  10  74   3  49]
 [  0  37   0   5 135   1  32]
 [  3  93   0   9  34   0  71]
 [  0  43   0  11  22   2 132]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.54      0.10      0.16       210
           2       0.22      0.62      0.33       210
           3       0.98      0.30      0.45       210
           4       0.21      0.05      0.08       210
           5       0.34      0.64      0.45       210
           6       0.00      0.00      0.00       210
           7       0.40      0.63      0.49       210

    accuracy                           0.33      1470
   macro avg       0.39      0.33      0.28      1470
weighted avg       0.39      0.33      0.28      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 55  16  15  14  53  42  15]
 [ 15  41  21  22  56  43  12]
 [  6  25 139   4  10  21   5]
 [ 13  15  10  38  59  53  22]
 [ 19  10   6  32 104  26  13]
 [ 19   8  10  10  37  88  38]
 [ 17   2   3   7  24  94  63]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.26      0.31       210
           2       0.35      0.20      0.25       210
           3       0.68      0.66      0.67       210
           4       0.30      0.18      0.23       210
           5       0.30      0.50      0.38       210
           6       0.24      0.42      0.31       210
           7       0.38      0.30      0.33       210

    accuracy                           0.36      1470
   macro avg       0.38      0.36      0.35      1470
weighted avg       0.38      0.36      0.35      1470

Decision Tree with 10 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.38639455782312926
Co

Accuracy of Decision Tree after PCA and ICA is: 0.37482993197278913
Confusion Matrix of Decision Tree is:
 [[ 86  23  12  24  28  25  12]
 [ 25  63  28  23  27  29  15]
 [ 12  12 154   8  11   9   4]
 [ 23  27  10  61  30  22  37]
 [ 41  38   6  34  58  18  15]
 [ 28  19  13  28  24  60  38]
 [ 26  17   2  34  22  40  69]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.36      0.41      0.38       210
           2       0.32      0.30      0.31       210
           3       0.68      0.73      0.71       210
           4       0.29      0.29      0.29       210
           5       0.29      0.28      0.28       210
           6       0.30      0.29      0.29       210
           7       0.36      0.33      0.35       210

    accuracy                           0.37      1470
   macro avg       0.37      0.37      0.37      1470
weighted avg       0.37      0.37      0.37      1470

Accuracy of Decision Tree after PC

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3578231292517007
Confusion Matrix of Random Forest is:
 [[ 50  66  25   1  28   0  40]
 [ 26 111  28   0  24   0  21]
 [ 11  45 136   0  14   0   4]
 [ 31  55  10   1  56   0  57]
 [ 25  39   7   0 101   0  38]
 [ 35  55  23   0  15   1  81]
 [ 45  17  10   1  11   0 126]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.22      0.24      0.23       210
           2       0.29      0.53      0.37       210
           3       0.57      0.65      0.61       210
           4       0.33      0.00      0.01       210
           5       0.41      0.48      0.44       210
           6       1.00      0.00      0.01       210
           7       0.34      0.60      0.44       210

    accuracy                           0.36      1470
   macro avg       0.45      0.36      0.30      1470
weighted avg       0.45      0.36      0.30      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.46870748299319726
Confusion Matrix of Random Forest is:
 [[ 68  38   6  15  36  14  33]
 [  9 116   6  17  29  17  16]
 [  6  35 149   6   4   8   2]
 [  6  32   3  54  47  19  49]
 [  8  29   2  20 106  11  34]
 [  9  28   8  19  21  45  80]
 [  3   5   1  15  12  23 151]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.32      0.43       210
           2       0.41      0.55      0.47       210
           3       0.85      0.71      0.77       210
           4       0.37      0.26      0.30       210
           5       0.42      0.50      0.46       210
           6       0.33      0.21      0.26       210
           7       0.41      0.72      0.53       210

    accuracy                           0.47      1470
   macro avg       0.49      0.47      0.46      1470
weighted avg       0.49      0.47      0.46      1470

Random Forest with 11 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.45102040816326533
Confusion Matrix of Random Forest is:
 [[ 77  23   8  21  34  23  24]
 [  7  98  15  21  27  24  18]
 [  6  23 162   4   3   9   3]
 [ 15  26   6  58  38  23  44]
 [ 17  24   4  30  95  14  26]
 [ 15  18  16  27  16  50  68]
 [  7   5   4  17  17  37 123]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.37      0.44       210
           2       0.45      0.47      0.46       210
           3       0.75      0.77      0.76       210
           4       0.33      0.28      0.30       210
           5       0.41      0.45      0.43       210
           6       0.28      0.24      0.26       210
           7       0.40      0.59      0.48       210

    accuracy                           0.45      1470
   macro avg       0.45      0.45      0.45      1470
weighted avg       0.45      0.45      0.45      1470

Random Forest with 19 max_depth
Ac

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4231292517006803
Confusion Matrix of Logistic Regression is:
 [[ 47  32  45   0  62   0  24]
 [ 21  84  27   0  50   1  27]
 [ 18  11 163   0  15   0   3]
 [ 19  17  19   1 122   0  32]
 [  8  21   3   0 146   0  32]
 [ 13  40  37   0  21   0  99]
 [  1  21   4   0   3   0 181]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.37      0.22      0.28       210
           2       0.37      0.40      0.39       210
           3       0.55      0.78      0.64       210
           4       1.00      0.00      0.01       210
           5       0.35      0.70      0.46       210
           6       0.00      0.00      0.00       210
           7       0.45      0.86      0.60       210

    accuracy                           0.42      1470
   macro avg       0.44      0.42      0.34      1470
weighted avg       0.44      0.42      0.34      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.37142857142857144
Confusion Matrix of SVM is:
 [[ 60  43   6   0  88   1  12]
 [ 33  84   8   0  75   2   8]
 [ 24  20 107   2  57   0   0]
 [ 14  25   4   0 156   1  10]
 [  9  24   0   0 165   0  12]
 [ 31  71   7   0  43   2  56]
 [  4  69   0   0   5   4 128]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.34      0.29      0.31       210
           2       0.25      0.40      0.31       210
           3       0.81      0.51      0.63       210
           4       0.00      0.00      0.00       210
           5       0.28      0.79      0.41       210
           6       0.20      0.01      0.02       210
           7       0.57      0.61      0.59       210

    accuracy                           0.37      1470
   macro avg       0.35      0.37      0.32      1470
weighted avg       0.35      0.37      0.32      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 79  22  10  35  28  25  11]
 [ 25  72  12  23  31  36  11]
 [ 17   6 153  13   8  13   0]
 [ 28  28  11  62  61  13   7]
 [ 15  25   3  44  97  17   9]
 [ 25  18  15  31  15  60  46]
 [  8  11   4   7   2  53 125]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.38      0.39       210
           2       0.40      0.34      0.37       210
           3       0.74      0.73      0.73       210
           4       0.29      0.30      0.29       210
           5       0.40      0.46      0.43       210
           6       0.28      0.29      0.28       210
           7       0.60      0.60      0.60       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.44      1470
weighted avg       0.44      0.44      0.44      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.43333333333333335
Co

Accuracy of Random Forest after PCA and ICA is: 0.32653061224489793
Confusion Matrix of Random Forest is:
 [[  0   0   8   0 112   0  90]
 [  0   0   7   0  97   0 106]
 [  0   1 108   0  68   0  33]
 [  0   0   4   0 162   0  44]
 [  0   1   0   0 173   0  36]
 [  0   0   8   0  53   0 149]
 [  0   1   3   0   7   0 199]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.78      0.51      0.62       210
           4       0.00      0.00      0.00       210
           5       0.26      0.82      0.39       210
           6       0.00      0.00      0.00       210
           7       0.30      0.95      0.46       210

    accuracy                           0.33      1470
   macro avg       0.19      0.33      0.21      1470
weighted avg       0.19      0.33      0.21      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3585034013605442
Confusion Matrix of Random Forest is:
 [[  3  71  11   0 102   0  23]
 [  1  89   7   0  90   0  23]
 [  0  32 112   0  64   0   2]
 [  0  28   5   0 158   0  19]
 [  1  22   0   0 168   0  19]
 [  1  69   9   0  47   1  83]
 [  0  48   3   0   5   0 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.50      0.01      0.03       210
           2       0.25      0.42      0.31       210
           3       0.76      0.53      0.63       210
           4       0.00      0.00      0.00       210
           5       0.26      0.80      0.40       210
           6       1.00      0.00      0.01       210
           7       0.48      0.73      0.58       210

    accuracy                           0.36      1470
   macro avg       0.46      0.36      0.28      1470
weighted avg       0.46      0.36      0.28      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3836734693877551
Confusion Matrix of Random Forest is:
 [[ 13  71   9   6  88   2  21]
 [  2 104   7   5  76   0  16]
 [ 13  26 111  16  43   0   1]
 [  4  27   5  21 136   2  15]
 [  2  25   0   5 163   1  14]
 [  4  75   8   3  41   3  76]
 [  0  53   1   2   4   1 149]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.34      0.06      0.10       210
           2       0.27      0.50      0.35       210
           3       0.79      0.53      0.63       210
           4       0.36      0.10      0.16       210
           5       0.30      0.78      0.43       210
           6       0.33      0.01      0.03       210
           7       0.51      0.71      0.59       210

    accuracy                           0.38      1470
   macro avg       0.41      0.38      0.33      1470
weighted avg       0.41      0.38      0.33      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5251700680272109
Confusion Matrix of Random Forest is:
 [[ 85  23  14  24  35  18  11]
 [ 15  99  15  12  37  18  14]
 [ 12   9 166  11   4   8   0]
 [ 19  28   6  69  65  12  11]
 [ 16  26   1  17 130  10  10]
 [ 21  24  18  15  14  61  57]
 [  5  10   0   3   4  26 162]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.49      0.40      0.44       210
           2       0.45      0.47      0.46       210
           3       0.75      0.79      0.77       210
           4       0.46      0.33      0.38       210
           5       0.45      0.62      0.52       210
           6       0.40      0.29      0.34       210
           7       0.61      0.77      0.68       210

    accuracy                           0.53      1470
   macro avg       0.52      0.53      0.51      1470
weighted avg       0.52      0.53      0.51      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5034013605442177
Confusion Matrix of Random Forest is:
 [[ 83  18  15  25  34  22  13]
 [ 22  94  10  23  27  21  13]
 [ 10  10 167  11   2   9   1]
 [ 26  24   7  75  52  13  13]
 [ 23  29   2  32 107   9   8]
 [ 25  22  15  19  12  60  57]
 [  7   9   1   2   4  33 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.42      0.40      0.41       210
           2       0.46      0.45      0.45       210
           3       0.77      0.80      0.78       210
           4       0.40      0.36      0.38       210
           5       0.45      0.51      0.48       210
           6       0.36      0.29      0.32       210
           7       0.59      0.73      0.66       210

    accuracy                           0.50      1470
   macro avg       0.49      0.50      0.50      1470
weighted avg       0.49      0.50      0.50      1470

Random Forest with 20 max_depth
Acc

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4095238095238095
Confusion Matrix of Logistic Regression is:
 [[112   5  35   2  26   0  30]
 [ 14  80  63   1  25   0  27]
 [ 27  12 167   1   2   0   1]
 [ 41  30  40   3  39   2  55]
 [ 66  13   5   1  86   1  38]
 [ 20  37  40   1  14   2  96]
 [  2  48   0   1   6   1 152]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.40      0.53      0.46       210
           2       0.36      0.38      0.37       210
           3       0.48      0.80      0.60       210
           4       0.30      0.01      0.03       210
           5       0.43      0.41      0.42       210
           6       0.33      0.01      0.02       210
           7       0.38      0.72      0.50       210

    accuracy                           0.41      1470
   macro avg       0.38      0.41      0.34      1470
weighted avg       0.38      0.41      0.34      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3945578231292517
Confusion Matrix of SVM is:
 [[ 82  13  39  25  14   2  35]
 [  2  79  76  23   5   2  23]
 [  7  11 180  11   0   0   1]
 [ 13  38  44  37  18   5  55]
 [ 30  37   8  37  50   2  46]
 [  3  47  50  16   1   3  90]
 [  0  55   2   2   0   2 149]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.60      0.39      0.47       210
           2       0.28      0.38      0.32       210
           3       0.45      0.86      0.59       210
           4       0.25      0.18      0.20       210
           5       0.57      0.24      0.34       210
           6       0.19      0.01      0.03       210
           7       0.37      0.71      0.49       210

    accuracy                           0.39      1470
   macro avg       0.39      0.39      0.35      1470
weighted avg       0.39      0.39      0.35      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 85  11  10  19  57  16  12]
 [ 11  96  11  32  15  31  14]
 [ 10  15 158  14   4   8   1]
 [ 19  31   9  49  44  39  19]
 [ 34  18   2  28  88  22  18]
 [ 17  33   6  33  15  57  49]
 [  9  13   0  10  16  46 116]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.40      0.43       210
           2       0.44      0.46      0.45       210
           3       0.81      0.75      0.78       210
           4       0.26      0.23      0.25       210
           5       0.37      0.42      0.39       210
           6       0.26      0.27      0.27       210
           7       0.51      0.55      0.53       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.44      1470
weighted avg       0.44      0.44      0.44      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.45510204081632655
Co

Confusion Matrix of Decision Tree is:
 [[ 90  18   6  20  43  20  13]
 [ 18  96  13  22  18  28  15]
 [ 14  14 158  11   6   6   1]
 [ 30  36   9  50  39  29  17]
 [ 37  21   0  40  72  26  14]
 [ 24  30   9  30  20  54  43]
 [ 11  15   0  17  20  51  96]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.43      0.41       210
           2       0.42      0.46      0.44       210
           3       0.81      0.75      0.78       210
           4       0.26      0.24      0.25       210
           5       0.33      0.34      0.34       210
           6       0.25      0.26      0.25       210
           7       0.48      0.46      0.47       210

    accuracy                           0.42      1470
   macro avg       0.42      0.42      0.42      1470
weighted avg       0.42      0.42      0.42      1470

Decision Tree with 20 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.41496598639455784
Co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3816326530612245
Confusion Matrix of Random Forest is:
 [[140  20  25   0  10   0  15]
 [ 28 145  13   0   5   0  19]
 [ 38  21 147   0   0   0   4]
 [ 81  75  11   0   9   0  34]
 [106  59   0   0  18   0  27]
 [ 45  78  20   0   3   0  64]
 [ 25  68   0   0   6   0 111]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.30      0.67      0.42       210
           2       0.31      0.69      0.43       210
           3       0.68      0.70      0.69       210
           4       0.00      0.00      0.00       210
           5       0.35      0.09      0.14       210
           6       0.00      0.00      0.00       210
           7       0.41      0.53      0.46       210

    accuracy                           0.38      1470
   macro avg       0.29      0.38      0.30      1470
weighted avg       0.29      0.38      0.30      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4197278911564626
Confusion Matrix of Random Forest is:
 [[110  14  25   1  44   1  15]
 [ 19 142  13   2  17   1  16]
 [ 36  22 149   2   0   0   1]
 [ 52  70  11   5  35   0  37]
 [ 55  36   0   0  91   0  28]
 [ 30  75  18   3  10   2  72]
 [  7  68   0   2  15   0 118]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.36      0.52      0.42       210
           2       0.33      0.68      0.45       210
           3       0.69      0.71      0.70       210
           4       0.33      0.02      0.04       210
           5       0.43      0.43      0.43       210
           6       0.50      0.01      0.02       210
           7       0.41      0.56      0.47       210

    accuracy                           0.42      1470
   macro avg       0.44      0.42      0.36      1470
weighted avg       0.44      0.42      0.36      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5129251700680272
Confusion Matrix of Random Forest is:
 [[105   7  15  16  49   6  12]
 [  6 121  13  10  29  15  16]
 [  7  16 167  13   3   3   1]
 [ 10  35   9  61  47  21  27]
 [ 30  14   2  26 105   9  24]
 [ 16  26   9  26  17  45  71]
 [  6  13   0   2  15  24 150]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.50      0.54       210
           2       0.52      0.58      0.55       210
           3       0.78      0.80      0.79       210
           4       0.40      0.29      0.34       210
           5       0.40      0.50      0.44       210
           6       0.37      0.21      0.27       210
           7       0.50      0.71      0.59       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.50      1470
weighted avg       0.51      0.51      0.50      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4965986394557823
Confusion Matrix of Random Forest is:
 [[107   7  13  19  43  11  10]
 [ 13 113  18  18  22  14  12]
 [ 10  12 174   9   3   1   1]
 [ 22  28   9  57  45  28  21]
 [ 37  17   3  32  92  12  17]
 [ 20  26   8  26  18  50  62]
 [  8  11   0   7  15  32 137]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.49      0.51      0.50       210
           2       0.53      0.54      0.53       210
           3       0.77      0.83      0.80       210
           4       0.34      0.27      0.30       210
           5       0.39      0.44      0.41       210
           6       0.34      0.24      0.28       210
           7       0.53      0.65      0.58       210

    accuracy                           0.50      1470
   macro avg       0.48      0.50      0.49      1470
weighted avg       0.48      0.50      0.49      1470

Random Forest with 20 max_depth
Acc

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],5)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.40680272108843535
Confusion Matrix of Logistic Regression is:
 [[ 87   3  30   4  47   1  38]
 [ 11  37  65  11  31   2  53]
 [ 35  10 145   0   9   0  11]
 [ 26  27  13  20  52   3  69]
 [ 41   6   2   4 107   1  49]
 [ 14  21  16   5  12   2 140]
 [  0   4   0   2   2   2 200]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.41      0.41      0.41       210
           2       0.34      0.18      0.23       210
           3       0.54      0.69      0.60       210
           4       0.43      0.10      0.16       210
           5       0.41      0.51      0.46       210
           6       0.18      0.01      0.02       210
           7       0.36      0.95      0.52       210

    accuracy                           0.41      1470
   macro avg       0.38      0.41      0.34      1470
weighted avg       0.38      0.41      0.34      1470

KNN with 3 Neigh

Accuracy of SVM after PCA and ICA is: 0.37551020408163266
Confusion Matrix of SVM is:
 [[ 59  18  15  29  25   4  60]
 [  1  83  13  25   6   1  81]
 [ 27  34 106   6  12   2  23]
 [  7  44   4  54  10   4  87]
 [ 13  14   0  63  47   1  72]
 [  4  35   4  11   1   1 154]
 [  0   5   0   2   0   1 202]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.53      0.28      0.37       210
           2       0.36      0.40      0.37       210
           3       0.75      0.50      0.60       210
           4       0.28      0.26      0.27       210
           5       0.47      0.22      0.30       210
           6       0.07      0.00      0.01       210
           7       0.30      0.96      0.45       210

    accuracy                           0.38      1470
   macro avg       0.39      0.38      0.34      1470
weighted avg       0.39      0.38      0.34      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 97  18  22  12  44   6  11]
 [  7  97  27  21  17  29  12]
 [  8  28 158   6   5   4   1]
 [ 18  51  10  43  44  19  25]
 [ 32  17  12  23  93  11  22]
 [ 16  36  11  16  19  57  55]
 [  5   5   0   4  10  42 144]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.46      0.49       210
           2       0.38      0.46      0.42       210
           3       0.66      0.75      0.70       210
           4       0.34      0.20      0.26       210
           5       0.40      0.44      0.42       210
           6       0.34      0.27      0.30       210
           7       0.53      0.69      0.60       210

    accuracy                           0.47      1470
   macro avg       0.46      0.47      0.46      1470
weighted avg       0.46      0.47      0.46      1470

Decision Tree with 11 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.46802721088435373
Co

Accuracy of Random Forest after PCA and ICA is: 0.3653061224489796
Confusion Matrix of Random Forest is:
 [[ 88   0  38  32  25   0  27]
 [ 13   6  79  42  49   0  21]
 [ 25   2 150  21   8   0   4]
 [ 16   2  17  93  41   0  41]
 [ 64   0   2  49  61   0  34]
 [  3   3  30  47  39   0  88]
 [  0   3   1  22  45   0 139]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.42      0.42      0.42       210
           2       0.38      0.03      0.05       210
           3       0.47      0.71      0.57       210
           4       0.30      0.44      0.36       210
           5       0.23      0.29      0.26       210
           6       0.00      0.00      0.00       210
           7       0.39      0.66      0.49       210

    accuracy                           0.37      1470
   macro avg       0.31      0.37      0.31      1470
weighted avg       0.31      0.37      0.31      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.45170068027210886
Confusion Matrix of Random Forest is:
 [[ 57   1  37  26  64   3  22]
 [  5  44  55  58  27   4  17]
 [ 17   2 150  24  13   3   1]
 [  6   7  14  88  51   6  38]
 [ 18   3   2  25 127   1  34]
 [  4  18  23  44  19  20  82]
 [  0   9   1  11   3   8 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.27      0.36       210
           2       0.52      0.21      0.30       210
           3       0.53      0.71      0.61       210
           4       0.32      0.42      0.36       210
           5       0.42      0.60      0.49       210
           6       0.44      0.10      0.16       210
           7       0.48      0.85      0.61       210

    accuracy                           0.45      1470
   macro avg       0.46      0.45      0.41      1470
weighted avg       0.46      0.45      0.41      1470

Random Forest with 3 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5387755102040817
Confusion Matrix of Random Forest is:
 [[101   9  15  17  53   6   9]
 [  6 111  22  33  14  12  12]
 [ 10  21 162   7   6   3   1]
 [ 12  30   8  84  33  22  21]
 [ 18   9   4  36 114   9  20]
 [ 11  29  11  28  13  57  61]
 [  3   4   0   9   4  27 163]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.63      0.48      0.54       210
           2       0.52      0.53      0.52       210
           3       0.73      0.77      0.75       210
           4       0.39      0.40      0.40       210
           5       0.48      0.54      0.51       210
           6       0.42      0.27      0.33       210
           7       0.57      0.78      0.66       210

    accuracy                           0.54      1470
   macro avg       0.53      0.54      0.53      1470
weighted avg       0.53      0.54      0.53      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5299319727891156
Confusion Matrix of Random Forest is:
 [[104   8  16  18  50   8   6]
 [  9 110  22  28  15  14  12]
 [ 12  21 160   6   6   4   1]
 [ 20  28  10  75  33  24  20]
 [ 24  16   4  24 111  12  19]
 [ 13  26  10  24   8  71  58]
 [  3   5   0   6   6  42 148]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.56      0.50      0.53       210
           2       0.51      0.52      0.52       210
           3       0.72      0.76      0.74       210
           4       0.41      0.36      0.38       210
           5       0.48      0.53      0.51       210
           6       0.41      0.34      0.37       210
           7       0.56      0.70      0.62       210

    accuracy                           0.53      1470
   macro avg       0.52      0.53      0.52      1470
weighted avg       0.52      0.53      0.52      1470

Random Forest with 19 max_depth
Acc

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//xlm_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.32653061224489793
Confusion Matrix of Logistic Regression is:
 [[ 67  43  51  13   5   0  31]
 [ 25  80  54  19   3   1  28]
 [ 17  21 166   3   1   0   2]
 [ 41  60  16  35   6   0  52]
 [ 62  45  11  21  16   0  55]
 [ 41  54  19  30   3   0  63]
 [ 39  36   1  14   4   0 116]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.23      0.32      0.27       210
           2       0.24      0.38      0.29       210
           3       0.52      0.79      0.63       210
           4       0.26      0.17      0.20       210
           5       0.42      0.08      0.13       210
           6       0.00      0.00      0.00       210
           7       0.33      0.55      0.42       210

    accuracy                           0.33      1470
   macro avg       0.29      0.33      0.28      1470
weighted avg       0.29      0.33      0.28      1470

KNN with 3 Neigh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.2727891156462585
Confusion Matrix of SVM is:
 [[ 90  76   0  14   7   1  22]
 [ 33 123   7  21   4   3  19]
 [ 34 119  51   4   2   0   0]
 [ 56  80   1  35   6   1  31]
 [ 85  57   0  21   8   0  39]
 [ 52  70   0  32   7   2  47]
 [ 51  42   0  13  11   1  92]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.22      0.43      0.29       210
           2       0.22      0.59      0.32       210
           3       0.86      0.24      0.38       210
           4       0.25      0.17      0.20       210
           5       0.18      0.04      0.06       210
           6       0.25      0.01      0.02       210
           7       0.37      0.44      0.40       210

    accuracy                           0.27      1470
   macro avg       0.34      0.27      0.24      1470
weighted avg       0.34      0.27      0.24      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 72  25   9  24  19  25  36]
 [ 23  91   9  27  13  21  26]
 [ 18  34 133   7   4   9   5]
 [ 22  32   3  53  29  29  42]
 [ 28  28   2  36  37  34  45]
 [ 24  49   5  31  25  39  37]
 [ 17  18   1  23  34  49  68]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.34      0.35       210
           2       0.33      0.43      0.37       210
           3       0.82      0.63      0.72       210
           4       0.26      0.25      0.26       210
           5       0.23      0.18      0.20       210
           6       0.19      0.19      0.19       210
           7       0.26      0.32      0.29       210

    accuracy                           0.34      1470
   macro avg       0.35      0.34      0.34      1470
weighted avg       0.35      0.34      0.34      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.34829931972789113
Co

Accuracy of Decision Tree after PCA and ICA is: 0.32108843537414966
Confusion Matrix of Decision Tree is:
 [[ 86  26  12  24  18  15  29]
 [ 23  59  12  36  25  28  27]
 [ 19  24 146  12   3   5   1]
 [ 34  27  10  45  32  25  37]
 [ 36  26   6  28  44  37  33]
 [ 29  34   8  31  36  38  34]
 [ 24  21   0  33  39  39  54]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.34      0.41      0.37       210
           2       0.27      0.28      0.28       210
           3       0.75      0.70      0.72       210
           4       0.22      0.21      0.21       210
           5       0.22      0.21      0.22       210
           6       0.20      0.18      0.19       210
           7       0.25      0.26      0.25       210

    accuracy                           0.32      1470
   macro avg       0.32      0.32      0.32      1470
weighted avg       0.32      0.32      0.32      1470

Decision Tree with 20 max_depth
Ac

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3129251700680272
Confusion Matrix of Random Forest is:
 [[ 46  67  23   0   7   0  67]
 [  2 121  39   0   2   0  46]
 [  9  41 148   0   1   0  11]
 [  2 116  10   0   7   0  75]
 [  3  83   6   0  11   0 107]
 [  3 101  17   0   7   0  82]
 [  2  67   5   0   2   0 134]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.69      0.22      0.33       210
           2       0.20      0.58      0.30       210
           3       0.60      0.70      0.65       210
           4       0.00      0.00      0.00       210
           5       0.30      0.05      0.09       210
           6       0.00      0.00      0.00       210
           7       0.26      0.64      0.37       210

    accuracy                           0.31      1470
   macro avg       0.29      0.31      0.25      1470
weighted avg       0.29      0.31      0.25      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3442176870748299
Confusion Matrix of Random Forest is:
 [[ 46  60  21   7  42   0  34]
 [  2 116  36   7  23   0  26]
 [  6  38 154   0   7   0   5]
 [  2  91  10  25  41   0  41]
 [  3  69   6  14  61   0  57]
 [  3  86  14  16  33   0  58]
 [  2  55   1  12  36   0 104]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.72      0.22      0.34       210
           2       0.23      0.55      0.32       210
           3       0.64      0.73      0.68       210
           4       0.31      0.12      0.17       210
           5       0.25      0.29      0.27       210
           6       0.00      0.00      0.00       210
           7       0.32      0.50      0.39       210

    accuracy                           0.34      1470
   macro avg       0.35      0.34      0.31      1470
weighted avg       0.35      0.34      0.31      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3517006802721088
Confusion Matrix of Random Forest is:
 [[ 49  56  15  13  47   1  29]
 [  2 113  29  17  21   1  27]
 [  6  39 149   5   7   0   4]
 [  1  76   9  37  45   4  38]
 [  2  62   5  19  62   3  57]
 [  3  74  11  27  34   3  58]
 [  2  44   0  19  36   5 104]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.75      0.23      0.36       210
           2       0.24      0.54      0.34       210
           3       0.68      0.71      0.70       210
           4       0.27      0.18      0.21       210
           5       0.25      0.30      0.27       210
           6       0.18      0.01      0.03       210
           7       0.33      0.50      0.39       210

    accuracy                           0.35      1470
   macro avg       0.39      0.35      0.33      1470
weighted avg       0.39      0.35      0.33      1470

Random Forest with 5 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.37482993197278913
Confusion Matrix of Random Forest is:
 [[ 81  29   8  24  19  20  29]
 [ 15  89  18  32  12  16  28]
 [ 12  22 150   5   9   8   4]
 [ 15  31   4  55  39  30  36]
 [ 22  33   2  28  44  18  63]
 [ 21  31   4  31  29  34  60]
 [ 13  15   0  22  31  31  98]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.45      0.39      0.42       210
           2       0.36      0.42      0.39       210
           3       0.81      0.71      0.76       210
           4       0.28      0.26      0.27       210
           5       0.24      0.21      0.22       210
           6       0.22      0.16      0.19       210
           7       0.31      0.47      0.37       210

    accuracy                           0.37      1470
   macro avg       0.38      0.37      0.37      1470
weighted avg       0.38      0.37      0.37      1470

Random Forest with 13 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.3564625850340136
Confusion Matrix of Random Forest is:
 [[ 86  20  10  24  25  19  26]
 [ 22  82  17  25  18  28  18]
 [ 14  22 149   7   5  11   2]
 [ 22  30   6  49  32  28  43]
 [ 28  32   4  25  44  29  48]
 [ 22  29   8  28  36  37  50]
 [ 21  18   0  28  31  35  77]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.41      0.40       210
           2       0.35      0.39      0.37       210
           3       0.77      0.71      0.74       210
           4       0.26      0.23      0.25       210
           5       0.23      0.21      0.22       210
           6       0.20      0.18      0.19       210
           7       0.29      0.37      0.32       210

    accuracy                           0.36      1470
   macro avg       0.36      0.36      0.36      1470
weighted avg       0.36      0.36      0.36      1470

Accuracy of Random Forest after PCA