#### Info:
#### All PCA models are built using the elbow method through the scree plots in Nisha_PCA_Scree_Plots
#### 

In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    from sklearn.decomposition import FastICA
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Nisha//Input//Nisha_dataset_labels.csv")

In [31]:
# Function of Scaling, PCA, ICA
def scale_pca_ica(x_data, y_data, comp):
    scaler_model = StandardScaler()
    scaled_data = scaler_model.fit_transform(x_data)
    #print(scaled_data)
    # Doing PCA giving number of Components(dimensions)
    pca_comp = PCA(n_components=comp)
    pca_data = pca_comp.fit_transform(scaled_data)
    #print(pca_data)
    # Doing ICA on PCA transformed data to make features independent
    #ica_comp = FastICA(n_components=comp)
    ica_comp = FastICA(n_components=comp,max_iter=50000)
    ica_data = ica_comp.fit_transform(pca_data)
    #print(ica_data)
    x_train,x_test,y_train,y_test = train_test_split(ica_data,y_data,test_size=0.30,random_state=21,stratify=y_data)
    return x_train, x_test, y_train, y_test

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" after PCA and ICA is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tfidf_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.37755102040816324
Confusion Matrix of Logistic Regression is:
 [[  0  22 104  37  20  18   9]
 [  0  40 125  20  13   9   3]
 [  0  16 183   7   4   0   0]
 [  0  11  70  75  44   5   5]
 [  0  35  66  45  40  13  11]
 [  0  16  48  37  24  22  63]
 [  0   1   2   2   1   9 195]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.28      0.19      0.23       210
           3       0.31      0.87      0.45       210
           4       0.34      0.36      0.35       210
           5       0.27      0.19      0.22       210
           6       0.29      0.10      0.15       210
           7       0.68      0.93      0.79       210

    accuracy                           0.38      1470
   macro avg       0.31      0.38      0.31      1470
weighted avg       0.31      0.38      0.31      1470

KNN with 3 Neigh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.5346938775510204
Confusion Matrix of KNN Model is:
 [[128  26   6  16   9  21   4]
 [ 36 107  11  18  23  15   0]
 [  7  18 157  18   3   7   0]
 [ 43  23  17  87  25  13   2]
 [ 45  40   6  37  56  17   9]
 [ 42  12  11  18  16  83  28]
 [  8   2   1   1   1  29 168]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.41      0.61      0.49       210
           2       0.47      0.51      0.49       210
           3       0.75      0.75      0.75       210
           4       0.45      0.41      0.43       210
           5       0.42      0.27      0.33       210
           6       0.45      0.40      0.42       210
           7       0.80      0.80      0.80       210

    accuracy                           0.53      1470
   macro avg       0.54      0.53      0.53      1470
weighted avg       0.54      0.53      0.53      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

Accuracy of SVM after PCA and ICA is: 0.3129251700680272
Confusion Matrix of SVM is:
 [[ 18  20 158   2   8   3   1]
 [  5  20 174   9   1   1   0]
 [  1   3 205   1   0   0   0]
 [ 13  26 120  46   0   4   1]
 [ 25  24 118  26   8   5   4]
 [ 19  14 130   5   2  22  18]
 [ 10   2  28   0   1  28 141]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.20      0.09      0.12       210
           2       0.18      0.10      0.13       210
           3       0.22      0.98      0.36       210
           4       0.52      0.22      0.31       210
           5       0.40      0.04      0.07       210
           6       0.35      0.10      0.16       210
           7       0.85      0.67      0.75       210

    accuracy                           0.31      1470
   macro avg       0.39      0.31      0.27      1470
weighted avg       0.39      0.31      0.27      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[109  16   4  42  15  20   4]
 [ 28  98  13  34  21  14   2]
 [  7  19 150  22   6   6   0]
 [ 31  13   8 113  25  15   5]
 [ 47  33   3  51  56  10  10]
 [ 45  23   3  37  12  57  33]
 [  8   6   0   3   5  30 158]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.52      0.45       210
           2       0.47      0.47      0.47       210
           3       0.83      0.71      0.77       210
           4       0.37      0.54      0.44       210
           5       0.40      0.27      0.32       210
           6       0.38      0.27      0.31       210
           7       0.75      0.75      0.75       210

    accuracy                           0.50      1470
   macro avg       0.51      0.50      0.50      1470
weighted avg       0.51      0.50      0.50      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.507482993197279
Conf

Accuracy of Decision Tree after PCA and ICA is: 0.5204081632653061
Confusion Matrix of Decision Tree is:
 [[101  22   8  23  25  26   5]
 [ 25  98  13  19  26  27   2]
 [  2  11 167  14   7   9   0]
 [ 16  17  20  86  43  25   3]
 [ 34  31   3  43  74  17   8]
 [ 29  16   9  28  16  82  30]
 [  6   1   1   3   8  34 157]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.48      0.48       210
           2       0.50      0.47      0.48       210
           3       0.76      0.80      0.77       210
           4       0.40      0.41      0.40       210
           5       0.37      0.35      0.36       210
           6       0.37      0.39      0.38       210
           7       0.77      0.75      0.76       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.52      1470
weighted avg       0.52      0.52      0.52      1470

Decision Tree with 20 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4129251700680272
Confusion Matrix of Random Forest is:
 [[  1  37  24  78   1  62   7]
 [  0  65  57  32   3  48   5]
 [  0  28 157  15   0   8   2]
 [  0  29  16 113   0  46   6]
 [  0  59  23  49   3  58  18]
 [  0   6  22  40   1  93  48]
 [  0   1   3   0   0  31 175]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       1.00      0.00      0.01       210
           2       0.29      0.31      0.30       210
           3       0.52      0.75      0.61       210
           4       0.35      0.54      0.42       210
           5       0.38      0.01      0.03       210
           6       0.27      0.44      0.33       210
           7       0.67      0.83      0.74       210

    accuracy                           0.41      1470
   macro avg       0.50      0.41      0.35      1470
weighted avg       0.50      0.41      0.35      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.572108843537415
Confusion Matrix of Random Forest is:
 [[104  18   4  24  18  35   7]
 [ 14 114  14  22  24  20   2]
 [  9  20 154  16   4   7   0]
 [ 15   5  10 115  33  27   5]
 [ 22  27   6  44  84  16  11]
 [ 16  13   3  33  10  95  40]
 [  4   2   0   0   0  29 175]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.57      0.50      0.53       210
           2       0.57      0.54      0.56       210
           3       0.81      0.73      0.77       210
           4       0.45      0.55      0.50       210
           5       0.49      0.40      0.44       210
           6       0.41      0.45      0.43       210
           7       0.73      0.83      0.78       210

    accuracy                           0.57      1470
   macro avg       0.58      0.57      0.57      1470
weighted avg       0.58      0.57      0.57      1470

Random Forest with 11 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5755102040816327
Confusion Matrix of Random Forest is:
 [[109  15   4  19  22  35   6]
 [ 17 110  13  15  32  21   2]
 [  6  17 168  10   2   7   0]
 [ 16  10  17  99  42  24   2]
 [ 30  27   7  40  80  16  10]
 [ 19  11   7  17  14 104  38]
 [  6   2   0   0   0  26 176]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.54      0.52      0.53       210
           2       0.57      0.52      0.55       210
           3       0.78      0.80      0.79       210
           4       0.49      0.47      0.48       210
           5       0.42      0.38      0.40       210
           6       0.45      0.50      0.47       210
           7       0.75      0.84      0.79       210

    accuracy                           0.58      1470
   macro avg       0.57      0.58      0.57      1470
weighted avg       0.57      0.58      0.57      1470

Random Forest with 19 max_depth
Acc

In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//cv_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],7)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3925170068027211
Confusion Matrix of Logistic Regression is:
 [[  1  11 122  17  52   1   6]
 [  1  20 146  13  26   3   1]
 [  0   3 200   7   0   0   0]
 [  2   6  91  40  65   1   5]
 [  0  14  41  28 114   1  12]
 [  4  11  91  16  25   5  58]
 [  0   2   8   0   1   2 197]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.12      0.00      0.01       210
           2       0.30      0.10      0.14       210
           3       0.29      0.95      0.44       210
           4       0.33      0.19      0.24       210
           5       0.40      0.54      0.46       210
           6       0.38      0.02      0.04       210
           7       0.71      0.94      0.81       210

    accuracy                           0.39      1470
   macro avg       0.36      0.39      0.31      1470
weighted avg       0.36      0.39      0.31      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.1761904761904762
Confusion Matrix of SVM is:
 [[  1  38 169   0   1   1   0]
 [  0  16 194   0   0   0   0]
 [  0   0 210   0   0   0   0]
 [  0  35 175   0   0   0   0]
 [  1  78 127   0   1   2   1]
 [  1  27 175   1   0   3   3]
 [  0  25 155   0   0   2  28]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.33      0.00      0.01       210
           2       0.07      0.08      0.07       210
           3       0.17      1.00      0.30       210
           4       0.00      0.00      0.00       210
           5       0.50      0.00      0.01       210
           6       0.38      0.01      0.03       210
           7       0.88      0.13      0.23       210

    accuracy                           0.18      1470
   macro avg       0.33      0.18      0.09      1470
weighted avg       0.33      0.18      0.09      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[107  18  10  17  16  40   2]
 [ 38  58  29  19  34  32   0]
 [  9   5 162  15   6  13   0]
 [ 14   8  18  96  39  34   1]
 [ 53  10   7  46  70  17   7]
 [ 23  10  21  24   8  90  34]
 [  2   1   2   0   1  24 180]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.43      0.51      0.47       210
           2       0.53      0.28      0.36       210
           3       0.65      0.77      0.71       210
           4       0.44      0.46      0.45       210
           5       0.40      0.33      0.36       210
           6       0.36      0.43      0.39       210
           7       0.80      0.86      0.83       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.51      1470
weighted avg       0.52      0.52      0.51      1470

Decision Tree with 7 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5258503401360545
Conf

Accuracy of Decision Tree after PCA and ICA is: 0.5360544217687074
Confusion Matrix of Decision Tree is:
 [[108  25   5  26  22  22   2]
 [ 15 104  16  19  28  26   2]
 [  3  10 167  11   7  12   0]
 [ 15  24  26  78  36  29   2]
 [ 32  34   2  37  80  20   5]
 [ 21  22  16  21   9  93  28]
 [  4   4   1   3   8  32 158]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.51      0.53       210
           2       0.47      0.50      0.48       210
           3       0.72      0.80      0.75       210
           4       0.40      0.37      0.39       210
           5       0.42      0.38      0.40       210
           6       0.40      0.44      0.42       210
           7       0.80      0.75      0.78       210

    accuracy                           0.54      1470
   macro avg       0.54      0.54      0.53      1470
weighted avg       0.54      0.54      0.53      1470

Decision Tree with 15 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4673469387755102
Confusion Matrix of Random Forest is:
 [[ 89  39  40  16  11   2  13]
 [ 25 101  37  21  15   2   9]
 [  7  20 174   7   2   0   0]
 [ 27  34  44  82  16   1   6]
 [ 51  39  13  47  37   5  18]
 [ 13  35  49  34   6   5  68]
 [  3   2   3   1   0   2 199]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.41      0.42      0.42       210
           2       0.37      0.48      0.42       210
           3       0.48      0.83      0.61       210
           4       0.39      0.39      0.39       210
           5       0.43      0.18      0.25       210
           6       0.29      0.02      0.04       210
           7       0.64      0.95      0.76       210

    accuracy                           0.47      1470
   macro avg       0.43      0.47      0.41      1470
weighted avg       0.43      0.47      0.41      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.6108843537414966
Confusion Matrix of Random Forest is:
 [[109  15   3  11  42  28   2]
 [ 12 113  12  25  24  24   0]
 [  6  11 169  17   1   6   0]
 [  6  12  12 117  37  24   2]
 [ 20  24   3  42 101  10  10]
 [  8  21   8  25   9 101  38]
 [  2   2   0   1   1  16 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.67      0.52      0.58       210
           2       0.57      0.54      0.55       210
           3       0.82      0.80      0.81       210
           4       0.49      0.56      0.52       210
           5       0.47      0.48      0.48       210
           6       0.48      0.48      0.48       210
           7       0.78      0.90      0.84       210

    accuracy                           0.61      1470
   macro avg       0.61      0.61      0.61      1470
weighted avg       0.61      0.61      0.61      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.6040816326530613
Confusion Matrix of Random Forest is:
 [[110  18   3   9  36  32   2]
 [  9 117  11  22  28  23   0]
 [  6  11 173  13   1   6   0]
 [ 13  13  14 105  39  24   2]
 [ 20  25   4  39  96  17   9]
 [ 15  17   7  24   9 100  38]
 [  1   1   0   1   1  19 187]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.63      0.52      0.57       210
           2       0.58      0.56      0.57       210
           3       0.82      0.82      0.82       210
           4       0.49      0.50      0.50       210
           5       0.46      0.46      0.46       210
           6       0.45      0.48      0.46       210
           7       0.79      0.89      0.83       210

    accuracy                           0.60      1470
   macro avg       0.60      0.60      0.60      1470
weighted avg       0.60      0.60      0.60      1470

Random Forest with 19 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tf_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3768707482993197
Confusion Matrix of Logistic Regression is:
 [[  0   7 103  33  42  17   8]
 [  0  14 126  19  39   9   3]
 [  0   7 200   2   1   0   0]
 [  0  13  94  43  48   7   5]
 [  0   5  71  37  75  12  10]
 [  1   9  66  23  25  24  62]
 [  0   0   4   1   1   6 198]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.25      0.07      0.11       210
           3       0.30      0.95      0.46       210
           4       0.27      0.20      0.23       210
           5       0.32      0.36      0.34       210
           6       0.32      0.11      0.17       210
           7       0.69      0.94      0.80       210

    accuracy                           0.38      1470
   macro avg       0.31      0.38      0.30      1470
weighted avg       0.31      0.38      0.30      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.18571428571428572
Confusion Matrix of SVM is:
 [[  1  10 194   2   0   3   0]
 [  0   1 206   3   0   0   0]
 [  0   0 210   0   0   0   0]
 [  0   5 201   2   1   1   0]
 [  2  17 182   5   1   3   0]
 [  2  15 178   3   1  10   1]
 [  2  10 127   0   1  22  48]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.14      0.00      0.01       210
           2       0.02      0.00      0.01       210
           3       0.16      1.00      0.28       210
           4       0.13      0.01      0.02       210
           5       0.25      0.00      0.01       210
           6       0.26      0.05      0.08       210
           7       0.98      0.23      0.37       210

    accuracy                           0.19      1470
   macro avg       0.28      0.19      0.11      1470
weighted avg       0.28      0.19      0.11      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[108  15   3  18  33  31   2]
 [ 36  94  13  26  19  22   0]
 [  7  20 163   8   6   6   0]
 [ 25  32  16  74  46  15   2]
 [ 41  37   7  44  51  24   6]
 [ 27  15   3  23  30  76  36]
 [  6   1   3   0   6  30 164]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.43      0.51      0.47       210
           2       0.44      0.45      0.44       210
           3       0.78      0.78      0.78       210
           4       0.38      0.35      0.37       210
           5       0.27      0.24      0.25       210
           6       0.37      0.36      0.37       210
           7       0.78      0.78      0.78       210

    accuracy                           0.50      1470
   macro avg       0.49      0.50      0.49      1470
weighted avg       0.49      0.50      0.49      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.49455782312925173
Co

Confusion Matrix of Decision Tree is:
 [[ 97  18   4  24  37  27   3]
 [ 22  88  16  28  33  23   0]
 [  6  10 169  10   7   8   0]
 [ 25  22  22  63  49  25   4]
 [ 30  33  11  39  70  21   6]
 [ 22  20   7  19  29  77  36]
 [  4   5   5   0   5  29 162]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.46      0.47       210
           2       0.45      0.42      0.43       210
           3       0.72      0.80      0.76       210
           4       0.34      0.30      0.32       210
           5       0.30      0.33      0.32       210
           6       0.37      0.37      0.37       210
           7       0.77      0.77      0.77       210

    accuracy                           0.49      1470
   macro avg       0.49      0.49      0.49      1470
weighted avg       0.49      0.49      0.49      1470

Random Forest with 1 max_depth
Accuracy of Random Forest after PCA and ICA is: 0.3006802721088435
Conf

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.38299319727891157
Confusion Matrix of Random Forest is:
 [[ 64   0  35   0  94   9   8]
 [ 25   0  60   0 114   7   4]
 [  3   0 166   0  36   1   4]
 [ 29   0  37   0 133   2   9]
 [ 46   0  17   0 129   5  13]
 [ 35   0  50   0  51  16  58]
 [ 10   0   6   0   0   6 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.30      0.30      0.30       210
           2       0.00      0.00      0.00       210
           3       0.45      0.79      0.57       210
           4       0.00      0.00      0.00       210
           5       0.23      0.61      0.34       210
           6       0.35      0.08      0.13       210
           7       0.66      0.90      0.76       210

    accuracy                           0.38      1470
   macro avg       0.28      0.38      0.30      1470
weighted avg       0.28      0.38      0.30      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.42857142857142855
Confusion Matrix of Random Forest is:
 [[115   7  22   0  37  26   3]
 [ 56  25  48   0  58  20   3]
 [  5   2 163   0  30   8   2]
 [ 61  13  31   1  84  16   4]
 [ 80   6  17   0  90   6  11]
 [ 50   4  22   0  31  55  48]
 [ 10   0   4   0   0  15 181]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.31      0.55      0.39       210
           2       0.44      0.12      0.19       210
           3       0.53      0.78      0.63       210
           4       1.00      0.00      0.01       210
           5       0.27      0.43      0.33       210
           6       0.38      0.26      0.31       210
           7       0.72      0.86      0.78       210

    accuracy                           0.43      1470
   macro avg       0.52      0.43      0.38      1470
weighted avg       0.52      0.43      0.38      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5380952380952381
Confusion Matrix of Random Forest is:
 [[108  14   2  14  36  32   4]
 [ 25  88  15  24  38  19   1]
 [  5  12 160  18   7   7   1]
 [ 18  14  12  78  65  20   3]
 [ 33  18   7  35  95  15   7]
 [ 22  13   6  18  28  83  40]
 [  2   2   1   0   0  26 179]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.51      0.51      0.51       210
           2       0.55      0.42      0.47       210
           3       0.79      0.76      0.77       210
           4       0.42      0.37      0.39       210
           5       0.35      0.45      0.40       210
           6       0.41      0.40      0.40       210
           7       0.76      0.85      0.80       210

    accuracy                           0.54      1470
   macro avg       0.54      0.54      0.54      1470
weighted avg       0.54      0.54      0.54      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5530612244897959
Confusion Matrix of Random Forest is:
 [[112  11   3  14  33  33   4]
 [ 13 101  16  26  30  23   1]
 [  4  11 168  12   6   8   1]
 [ 16  16  16  71  66  22   3]
 [ 34  19   5  37  90  17   8]
 [ 22  12   7  18  17  92  42]
 [  2   2   1   0   1  25 179]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.55      0.53      0.54       210
           2       0.59      0.48      0.53       210
           3       0.78      0.80      0.79       210
           4       0.40      0.34      0.37       210
           5       0.37      0.43      0.40       210
           6       0.42      0.44      0.43       210
           7       0.75      0.85      0.80       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Random Forest with 20 max_depth
Acc

### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.45374149659863944
Confusion Matrix of Logistic Regression is:
 [[100   0  50   0  33   6  21]
 [ 11  58  86   0  31   5  19]
 [ 16   0 185   0   2   5   2]
 [ 42  14  43   0  64  12  35]
 [ 42   8  17   0 113   2  28]
 [ 26   6  31   0  10  31 106]
 [  3   0   0   0   4  23 180]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.42      0.48      0.44       210
           2       0.67      0.28      0.39       210
           3       0.45      0.88      0.59       210
           4       0.00      0.00      0.00       210
           5       0.44      0.54      0.48       210
           6       0.37      0.15      0.21       210
           7       0.46      0.86      0.60       210

    accuracy                           0.45      1470
   macro avg       0.40      0.45      0.39      1470
weighted avg       0.40      0.45      0.39      1470

KNN with 3 Neigh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.4884353741496599
Confusion Matrix of KNN Model is:
 [[127   8   5  20  31  13   6]
 [ 16 128   9  19  21  14   3]
 [  6  21 167   5   4   5   2]
 [ 38  46  12  50  35  12  17]
 [ 52  24   4  43  73   9   5]
 [ 30  29  17  22   9  61  42]
 [ 12   6   1  13   6  60 112]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.45      0.60      0.52       210
           2       0.49      0.61      0.54       210
           3       0.78      0.80      0.79       210
           4       0.29      0.24      0.26       210
           5       0.41      0.35      0.38       210
           6       0.35      0.29      0.32       210
           7       0.60      0.53      0.56       210

    accuracy                           0.49      1470
   macro avg       0.48      0.49      0.48      1470
weighted avg       0.48      0.49      0.48      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.43741496598639457
Confusion Matrix of SVM is:
 [[ 90   0  64   0  31  14  11]
 [  9  53  98   0  27  13  10]
 [ 10   0 191   0   2   6   1]
 [ 44  16  54   0  54  21  21]
 [ 44   8  24   0 106   6  22]
 [ 32   6  38   0   7  63  64]
 [ 11   0   0   0   4  55 140]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.38      0.43      0.40       210
           2       0.64      0.25      0.36       210
           3       0.41      0.91      0.56       210
           4       0.00      0.00      0.00       210
           5       0.46      0.50      0.48       210
           6       0.35      0.30      0.32       210
           7       0.52      0.67      0.58       210

    accuracy                           0.44      1470
   macro avg       0.39      0.44      0.39      1470
weighted avg       0.39      0.44      0.39      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.5142857142857142
Confusion Matrix of SVM is:
 [[107   1  13  35  29  16   9]
 [  2 105  15  47  20  17   4]
 [ 14  24 143   8   6  15   0]
 [ 20  23   7  73  52  19  16]
 [ 20  16   4  47 107   5  11]
 [ 15  10  12  32  10  69  62]
 [  0   2   0   9   5  42 152]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.60      0.51      0.55       210
           2       0.58      0.50      0.54       210
           3       0.74      0.68      0.71       210
           4       0.29      0.35      0.32       210
           5       0.47      0.51      0.49       210
           6       0.38      0.33      0.35       210
           7       0.60      0.72      0.66       210

    accuracy                           0.51      1470
   macro avg       0.52      0.51      0.52      1470
weighted avg       0.52      0.51      0.52      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[103   7   1  49  29  16   5]
 [  4 133   5  34  17  14   3]
 [  3  19 160  14   9   3   2]
 [ 14  49  10  70  35  15  17]
 [ 20  30   2  62  75  11  10]
 [ 23  26  16  30   9  57  49]
 [  7   5   2  10   8  61 117]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.59      0.49      0.54       210
           2       0.49      0.63      0.56       210
           3       0.82      0.76      0.79       210
           4       0.26      0.33      0.29       210
           5       0.41      0.36      0.38       210
           6       0.32      0.27      0.29       210
           7       0.58      0.56      0.57       210

    accuracy                           0.49      1470
   macro avg       0.50      0.49      0.49      1470
weighted avg       0.50      0.49      0.49      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4897959183673469
Con

Confusion Matrix of Decision Tree is:
 [[117   9   3  27  34  13   7]
 [ 13 110   8  26  29  20   4]
 [  4  17 166  11   2   8   2]
 [ 30  39   6  58  39  24  14]
 [ 37  15   8  42  80  18  10]
 [ 27  19  19  23  16  55  51]
 [  9   7   1  13  13  59 108]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.56      0.52       210
           2       0.51      0.52      0.52       210
           3       0.79      0.79      0.79       210
           4       0.29      0.28      0.28       210
           5       0.38      0.38      0.38       210
           6       0.28      0.26      0.27       210
           7       0.55      0.51      0.53       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.47      1470
weighted avg       0.47      0.47      0.47      1470

Random Forest with 1 max_depth
Accuracy of Random Forest after PCA and ICA is: 0.41360544217687073
Con

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.49387755102040815
Confusion Matrix of Random Forest is:
 [[ 89   2  20   0  79   5  15]
 [  5 106  21   0  60   6  12]
 [  3  24 155   0  18   7   3]
 [ 18  21  12   0 127   9  23]
 [  7  14   4   0 172   0  13]
 [ 15   8  20   0  48  22  97]
 [  1   2   0   0  17   8 182]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.64      0.42      0.51       210
           2       0.60      0.50      0.55       210
           3       0.67      0.74      0.70       210
           4       0.00      0.00      0.00       210
           5       0.33      0.82      0.47       210
           6       0.39      0.10      0.16       210
           7       0.53      0.87      0.66       210

    accuracy                           0.49      1470
   macro avg       0.45      0.49      0.44      1470
weighted avg       0.45      0.49      0.44      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.5047619047619047
Confusion Matrix of Random Forest is:
 [[ 96   4  14   0  77   5  14]
 [  9 119  19   0  47   6  10]
 [  4  27 155   0  14   8   2]
 [ 20  44  11   1 104   7  23]
 [  8  25   3   1 160   2  11]
 [ 18  20  19   0  36  34  83]
 [  1   6   0   0  13  13 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.46      0.52       210
           2       0.49      0.57      0.52       210
           3       0.70      0.74      0.72       210
           4       0.50      0.00      0.01       210
           5       0.35      0.76      0.48       210
           6       0.45      0.16      0.24       210
           7       0.55      0.84      0.67       210

    accuracy                           0.50      1470
   macro avg       0.52      0.50      0.45      1470
weighted avg       0.52      0.50      0.45      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5496598639455782
Confusion Matrix of Random Forest is:
 [[120   7   2  25  35  13   8]
 [  4 124   7  31  25  11   8]
 [  2  17 169  11   3   7   1]
 [ 18  41   4  60  50  19  18]
 [ 21  18   3  37 112   8  11]
 [ 20  15  11  25   9  67  63]
 [  4   3   0   3   7  37 156]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.63      0.57      0.60       210
           2       0.55      0.59      0.57       210
           3       0.86      0.80      0.83       210
           4       0.31      0.29      0.30       210
           5       0.46      0.53      0.50       210
           6       0.41      0.32      0.36       210
           7       0.59      0.74      0.66       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5238095238095238
Confusion Matrix of Random Forest is:
 [[118   8   2  25  36  14   7]
 [ 10 125   6  25  24  14   6]
 [  1  12 174   9   5   9   0]
 [ 20  44   7  50  46  21  22]
 [ 28  21   4  43  96   8  10]
 [ 25  18   9  15  12  70  61]
 [  4   4   0   7   3  55 137]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.57      0.56      0.57       210
           2       0.54      0.60      0.57       210
           3       0.86      0.83      0.84       210
           4       0.29      0.24      0.26       210
           5       0.43      0.46      0.44       210
           6       0.37      0.33      0.35       210
           7       0.56      0.65      0.60       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.52      1470
weighted avg       0.52      0.52      0.52      1470

Random Forest with 20 max_depth
Acc

In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_gkb.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3183673469387755
Confusion Matrix of Logistic Regression is:
 [[ 92   0  51  16   0   0  51]
 [ 32   0 115  10   0   0  53]
 [ 16   0 178   3   0   0  13]
 [ 22   0 139  11   0   0  38]
 [ 33   0 143  14   0   0  20]
 [ 35   0  51   6   0   0 118]
 [ 16   0   6   1   0   0 187]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.37      0.44      0.40       210
           2       0.00      0.00      0.00       210
           3       0.26      0.85      0.40       210
           4       0.18      0.05      0.08       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.39      0.89      0.54       210

    accuracy                           0.32      1470
   macro avg       0.17      0.32      0.20      1470
weighted avg       0.17      0.32      0.20      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of KNN Model is:
 [[112  22  10  25  18   8  15]
 [ 39  72  23  20  23  15  18]
 [  7  21 152  13   7   6   4]
 [ 50  41  16  44  31  14  14]
 [ 41  30  22  33  58  13  13]
 [ 31  26  13  25  19  51  45]
 [ 23  13   5   5   5  74  85]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.37      0.53      0.44       210
           2       0.32      0.34      0.33       210
           3       0.63      0.72      0.67       210
           4       0.27      0.21      0.23       210
           5       0.36      0.28      0.31       210
           6       0.28      0.24      0.26       210
           7       0.44      0.40      0.42       210

    accuracy                           0.39      1470
   macro avg       0.38      0.39      0.38      1470
weighted avg       0.38      0.39      0.38      1470

KNN with 5 Neighbors
Accuracy of KNN Model after PCA and ICA is: 0.38299319727891157
Confusion Matrix of KNN M

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3122448979591837
Confusion Matrix of SVM is:
 [[ 73   0  47  33   0   1  56]
 [ 23   0 112  21   0   1  53]
 [  6   0 176   6   0   0  22]
 [ 16   0 135  21   0   0  38]
 [ 24   0 138  27   0   0  21]
 [ 23   0  49  17   0   1 120]
 [  9   1   6   4   1   1 188]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.42      0.35      0.38       210
           2       0.00      0.00      0.00       210
           3       0.27      0.84      0.40       210
           4       0.16      0.10      0.12       210
           5       0.00      0.00      0.00       210
           6       0.25      0.00      0.01       210
           7       0.38      0.90      0.53       210

    accuracy                           0.31      1470
   macro avg       0.21      0.31      0.21      1470
weighted avg       0.21      0.31      0.21      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3360544217687075
Confusion Matrix of SVM is:
 [[117   0  33  34   2   5  19]
 [ 29   0 108  24   1   3  45]
 [ 10   0 174  16   0   3   7]
 [ 23   0 113  32   7   2  33]
 [ 34   0 126  34   2   3  11]
 [ 45   0  36  22   2   6  99]
 [ 28   0   5   4   0  10 163]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.41      0.56      0.47       210
           2       0.00      0.00      0.00       210
           3       0.29      0.83      0.43       210
           4       0.19      0.15      0.17       210
           5       0.14      0.01      0.02       210
           6       0.19      0.03      0.05       210
           7       0.43      0.78      0.56       210

    accuracy                           0.34      1470
   macro avg       0.24      0.34      0.24      1470
weighted avg       0.24      0.34      0.24      1470

Working on SVM Kernal: sigmoid


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.2931972789115646
Confusion Matrix of SVM is:
 [[125   3  29   4  37   5   7]
 [ 52   9  95  10  12  19  13]
 [ 34   0 163   2   3   1   7]
 [ 59   7  97  11   9   9  18]
 [ 57   5 114   6  17   7   4]
 [ 66  12  30   4  13  43  42]
 [ 44  12   9   1   5  76  63]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.29      0.60      0.39       210
           2       0.19      0.04      0.07       210
           3       0.30      0.78      0.44       210
           4       0.29      0.05      0.09       210
           5       0.18      0.08      0.11       210
           6       0.27      0.20      0.23       210
           7       0.41      0.30      0.35       210

    accuracy                           0.29      1470
   macro avg       0.27      0.29      0.24      1470
weighted avg       0.27      0.29      0.24      1470

Decision Tree with 1 max_depth
Accuracy of Decision Tree after PC

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 96  18  13  31  17  23  12]
 [ 31  53  16  38  30  28  14]
 [  8   8 150  23  13   5   3]
 [ 28  21  23  67  35  14  22]
 [ 36  17   9  80  38  20  10]
 [ 26  16   5  34  20  65  44]
 [ 15  11   3  17  13  75  76]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.40      0.46      0.43       210
           2       0.37      0.25      0.30       210
           3       0.68      0.71      0.70       210
           4       0.23      0.32      0.27       210
           5       0.23      0.18      0.20       210
           6       0.28      0.31      0.30       210
           7       0.42      0.36      0.39       210

    accuracy                           0.37      1470
   macro avg       0.37      0.37      0.37      1470
weighted avg       0.37      0.37      0.37      1470

Decision Tree with 14 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.3768707482993197
Con

Accuracy of Random Forest after PCA and ICA is: 0.2707482993197279
Confusion Matrix of Random Forest is:
 [[ 47   0  73   5   0   0  85]
 [ 21   0 120   6   0   0  63]
 [ 18   0 181   2   0   0   9]
 [ 21   0 140   5   0   0  44]
 [ 21   0 151   5   0   0  33]
 [ 33   0  53   5   0   0 119]
 [ 38   0   6   1   0   0 165]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.24      0.22      0.23       210
           2       0.00      0.00      0.00       210
           3       0.25      0.86      0.39       210
           4       0.17      0.02      0.04       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.32      0.79      0.45       210

    accuracy                           0.27      1470
   macro avg       0.14      0.27      0.16      1470
weighted avg       0.14      0.27      0.16      1470

Random Forest with 2 max_depth
Accu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Random Forest is:
 [[127   0  18  45   0   0  20]
 [ 39   0  82  39   0   0  50]
 [ 19   0 164  17   0   0  10]
 [ 32   0  91  50   0   1  36]
 [ 40   0  91  63   0   1  15]
 [ 52   0  24  31   0   1 102]
 [ 32   0   1   6   0   2 169]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.37      0.60      0.46       210
           2       0.00      0.00      0.00       210
           3       0.35      0.78      0.48       210
           4       0.20      0.24      0.22       210
           5       0.00      0.00      0.00       210
           6       0.20      0.00      0.01       210
           7       0.42      0.80      0.55       210

    accuracy                           0.35      1470
   macro avg       0.22      0.35      0.25      1470
weighted avg       0.22      0.35      0.25      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.37482993197278913
Confusion Matrix of Random Forest is:
 [[133   0  11  34   9  10  13]
 [ 40   0  47  46  29  12  36]
 [ 19   0 119  46  20   2   4]
 [ 32   0  35  76  31   7  29]
 [ 44   0  30  67  54   4  11]
 [ 51   0  13  31  10  15  90]
 [ 30   0   0   5   2  19 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.38      0.63      0.48       210
           2       0.00      0.00      0.00       210
           3       0.47      0.57      0.51       210
           4       0.25      0.36      0.30       210
           5       0.35      0.26      0.30       210
           6       0.22      0.07      0.11       210
           7       0.46      0.73      0.56       210

    accuracy                           0.37      1470
   macro avg       0.30      0.37      0.32      1470
weighted avg       0.30      0.37      0.32      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3870748299319728
Confusion Matrix of Random Forest is:
 [[131   5   5  33  13  11  12]
 [ 38  18  30  42  37  12  33]
 [ 20   4 120  38  22   1   5]
 [ 31  10  23  70  40   8  28]
 [ 44  15  14  63  59   3  12]
 [ 51   2  10  29  13  35  70]
 [ 28   0   1   4   3  38 136]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.38      0.62      0.47       210
           2       0.33      0.09      0.14       210
           3       0.59      0.57      0.58       210
           4       0.25      0.33      0.29       210
           5       0.32      0.28      0.30       210
           6       0.32      0.17      0.22       210
           7       0.46      0.65      0.54       210

    accuracy                           0.39      1470
   macro avg       0.38      0.39      0.36      1470
weighted avg       0.38      0.39      0.36      1470

Random Forest with 5 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.4238095238095238
Confusion Matrix of Random Forest is:
 [[103  14   3  32  27  18  13]
 [ 17  59  10  28  47  20  29]
 [  5   6 154  19  22   1   3]
 [ 23  24  15  55  58  10  25]
 [ 24  17   8  58  81  12  10]
 [ 26   9   2  31  25  58  59]
 [ 13   6   3   8   7  60 113]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.49      0.49      0.49       210
           2       0.44      0.28      0.34       210
           3       0.79      0.73      0.76       210
           4       0.24      0.26      0.25       210
           5       0.30      0.39      0.34       210
           6       0.32      0.28      0.30       210
           7       0.45      0.54      0.49       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.42      1470
weighted avg       0.43      0.42      0.42      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.41020408163265304
Confusion Matrix of Random Forest is:
 [[ 92  23   7  27  25  22  14]
 [ 15  68  16  26  35  27  23]
 [  7   5 154  18  22   2   2]
 [ 29  20  19  55  45  15  27]
 [ 26  17  11  49  69  22  16]
 [ 18  15   3  34  24  61  55]
 [ 12   7   5  11   9  62 104]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.46      0.44      0.45       210
           2       0.44      0.32      0.37       210
           3       0.72      0.73      0.72       210
           4       0.25      0.26      0.26       210
           5       0.30      0.33      0.31       210
           6       0.29      0.29      0.29       210
           7       0.43      0.50      0.46       210

    accuracy                           0.41      1470
   macro avg       0.41      0.41      0.41      1470
weighted avg       0.41      0.41      0.41      1470

Accuracy of Multinomial Naive Baye

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_ndisbert.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],2)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3272108843537415
Confusion Matrix of Logistic Regression is:
 [[  0  16  41  65  54   0  34]
 [  0  73  42  20  23   0  52]
 [  0  22 106  11  54   0  17]
 [  0  18  53  50  54   0  35]
 [  0  25  71  34  56   0  24]
 [  0  25  12  18  14   1 140]
 [  0  14   0   1   0   0 195]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.38      0.35      0.36       210
           3       0.33      0.50      0.40       210
           4       0.25      0.24      0.24       210
           5       0.22      0.27      0.24       210
           6       1.00      0.00      0.01       210
           7       0.39      0.93      0.55       210

    accuracy                           0.33      1470
   macro avg       0.37      0.33      0.26      1470
weighted avg       0.37      0.33      0.26      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.39387755102040817
Confusion Matrix of KNN Model is:
 [[113  21  13  31  19   6   7]
 [ 46  78  16  17  18  23  12]
 [ 24  17 129  11  23   5   1]
 [ 57  31  18  47  41   9   7]
 [ 47  43  14  41  54   8   3]
 [ 27  42  11  18   8  53  51]
 [ 11  19   4   4   0  67 105]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.35      0.54      0.42       210
           2       0.31      0.37      0.34       210
           3       0.63      0.61      0.62       210
           4       0.28      0.22      0.25       210
           5       0.33      0.26      0.29       210
           6       0.31      0.25      0.28       210
           7       0.56      0.50      0.53       210

    accuracy                           0.39      1470
   macro avg       0.40      0.39      0.39      1470
weighted avg       0.40      0.39      0.39      1470

KNN with 5 Neighbors
Accuracy of KNN Model aft

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.36122448979591837
Confusion Matrix of SVM is:
 [[ 14  17  57  46  50   3  23]
 [  8  83  55  15  19   2  28]
 [  3  14 151   6  22   0  14]
 [  2  13  69  51  47   2  26]
 [  5  20  93  34  44   2  12]
 [  3  45  16  20  12   1 113]
 [  0  22   0   1   0   0 187]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.40      0.07      0.11       210
           2       0.39      0.40      0.39       210
           3       0.34      0.72      0.46       210
           4       0.29      0.24      0.27       210
           5       0.23      0.21      0.22       210
           6       0.10      0.00      0.01       210
           7       0.46      0.89      0.61       210

    accuracy                           0.36      1470
   macro avg       0.32      0.36      0.30      1470
weighted avg       0.32      0.36      0.30      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 92  22   6  34  33  16   7]
 [ 29  78  16  28  28  23   8]
 [ 19  15 130   9  27  10   0]
 [ 39  19   9  65  56  19   3]
 [ 36  30  11  55  63  13   2]
 [ 24  24   7  22  12  61  60]
 [ 12  15   5  13   1  57 107]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.37      0.44      0.40       210
           2       0.38      0.37      0.38       210
           3       0.71      0.62      0.66       210
           4       0.29      0.31      0.30       210
           5       0.29      0.30      0.29       210
           6       0.31      0.29      0.30       210
           7       0.57      0.51      0.54       210

    accuracy                           0.41      1470
   macro avg       0.42      0.41      0.41      1470
weighted avg       0.42      0.41      0.41      1470

Decision Tree with 15 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.3945578231292517
Con

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3707482993197279
Confusion Matrix of Random Forest is:
 [[ 60  19  93  20   0   7  11]
 [ 16  70  64  12   0  24  24]
 [ 10  18 166   1   0   6   9]
 [ 36  18  99  38   0  10   9]
 [ 21  28 124  28   0   2   7]
 [ 11  22  25  24   1  25 102]
 [  0   4   0   7   0  13 186]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.39      0.29      0.33       210
           2       0.39      0.33      0.36       210
           3       0.29      0.79      0.43       210
           4       0.29      0.18      0.22       210
           5       0.00      0.00      0.00       210
           6       0.29      0.12      0.17       210
           7       0.53      0.89      0.67       210

    accuracy                           0.37      1470
   macro avg       0.31      0.37      0.31      1470
weighted avg       0.31      0.37      0.31      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.4380952380952381
Confusion Matrix of Random Forest is:
 [[101  21   8  27  33  12   8]
 [ 25  84  10  17  30  33  11]
 [ 11   6 122  14  42  12   3]
 [ 43  25  15  53  53  15   6]
 [ 28  34  10  45  82   7   4]
 [ 21  34   4  21   4  48  78]
 [  2  10   1   1   0  42 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.44      0.48      0.46       210
           2       0.39      0.40      0.40       210
           3       0.72      0.58      0.64       210
           4       0.30      0.25      0.27       210
           5       0.34      0.39      0.36       210
           6       0.28      0.23      0.25       210
           7       0.58      0.73      0.65       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.43      1470
weighted avg       0.44      0.44      0.43      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.42925170068027213
Confusion Matrix of Random Forest is:
 [[ 90  21  14  38  27  10  10]
 [ 32  84   9  24  24  29   8]
 [ 10  15 138  15  19  11   2]
 [ 31  24  14  71  51  13   6]
 [ 28  40  14  53  61  11   3]
 [ 18  36   4  24   7  56  65]
 [  7   9   4   4   2  53 131]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.42      0.43      0.42       210
           2       0.37      0.40      0.38       210
           3       0.70      0.66      0.68       210
           4       0.31      0.34      0.32       210
           5       0.32      0.29      0.30       210
           6       0.31      0.27      0.28       210
           7       0.58      0.62      0.60       210

    accuracy                           0.43      1470
   macro avg       0.43      0.43      0.43      1470
weighted avg       0.43      0.43      0.43      1470

Random Forest with 19 max_depth
Ac

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_vbert.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.454421768707483
Confusion Matrix of Logistic Regression is:
 [[135   2  13   0  32   1  27]
 [ 35  59  77   0  16   3  20]
 [  7   1 196   0   3   1   2]
 [ 52  27  27   0  58   0  46]
 [ 53   5   9   0  99   0  44]
 [ 23  34  32   0  13   5 103]
 [  1  30   0   0   4   1 174]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.44      0.64      0.52       210
           2       0.37      0.28      0.32       210
           3       0.55      0.93      0.70       210
           4       0.00      0.00      0.00       210
           5       0.44      0.47      0.46       210
           6       0.45      0.02      0.05       210
           7       0.42      0.83      0.56       210

    accuracy                           0.45      1470
   macro avg       0.38      0.45      0.37      1470
weighted avg       0.38      0.45      0.37      1470

KNN with 3 Neighbo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.5306122448979592
Confusion Matrix of KNN Model is:
 [[137   4   2  29  29   5   4]
 [ 15 128  11  22  12  16   6]
 [  8  19 165   9   6   2   1]
 [ 41  32   9  72  37  11   8]
 [ 56  14   0  45  82   9   4]
 [ 22  28  12  31  13  62  42]
 [  9  12   0   8   5  42 134]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.48      0.65      0.55       210
           2       0.54      0.61      0.57       210
           3       0.83      0.79      0.81       210
           4       0.33      0.34      0.34       210
           5       0.45      0.39      0.42       210
           6       0.42      0.30      0.35       210
           7       0.67      0.64      0.66       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

KNN with 5 Neighbors
Accuracy of KNN Model afte

Accuracy of SVM after PCA and ICA is: 0.5312925170068027
Confusion Matrix of SVM is:
 [[101   9   5  48  33  11   3]
 [  4 128   7  42   8  16   5]
 [  4  22 144  27   5   8   0]
 [ 10  33  10  95  40  21   1]
 [  6   9   0  78 102  10   5]
 [  9  40   6  38   8  59  50]
 [  0   9   0   5   1  43 152]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.75      0.48      0.59       210
           2       0.51      0.61      0.56       210
           3       0.84      0.69      0.75       210
           4       0.29      0.45      0.35       210
           5       0.52      0.49      0.50       210
           6       0.35      0.28      0.31       210
           7       0.70      0.72      0.71       210

    accuracy                           0.53      1470
   macro avg       0.57      0.53      0.54      1470
weighted avg       0.57      0.53      0.54      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.5183673469387755
Confusion Matrix of Decision Tree is:
 [[124   6   4  24  39   9   4]
 [ 12 120   8  24  16  10  20]
 [  4  14 158  15  14   4   1]
 [ 22  28  12  52  71  20   5]
 [ 39  11   5  35 103   8   9]
 [ 20  27  11  33  19  45  55]
 [  3   9   0   6   5  27 160]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.59      0.57       210
           2       0.56      0.57      0.56       210
           3       0.80      0.75      0.77       210
           4       0.28      0.25      0.26       210
           5       0.39      0.49      0.43       210
           6       0.37      0.21      0.27       210
           7       0.63      0.76      0.69       210

    accuracy                           0.52      1470
   macro avg       0.51      0.52      0.51      1470
weighted avg       0.51      0.52      0.51      1470

Decision Tree with 9 max_depth
Accu

Accuracy of Decision Tree after PCA and ICA is: 0.48367346938775513
Confusion Matrix of Decision Tree is:
 [[120   7   3  28  34  17   1]
 [ 12 108  15  30  12  21  12]
 [  4  13 157  18  10   7   1]
 [ 27  30   8  60  56  21   8]
 [ 37  14   3  48  84  16   8]
 [ 17  19   8  35  19  60  52]
 [  5   8   3  16  13  43 122]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.54      0.57      0.56       210
           2       0.54      0.51      0.53       210
           3       0.80      0.75      0.77       210
           4       0.26      0.29      0.27       210
           5       0.37      0.40      0.38       210
           6       0.32      0.29      0.30       210
           7       0.60      0.58      0.59       210

    accuracy                           0.48      1470
   macro avg       0.49      0.48      0.49      1470
weighted avg       0.49      0.48      0.49      1470

Random Forest with 1 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4238095238095238
Confusion Matrix of Random Forest is:
 [[154   2   3   0  16   4  31]
 [ 35  48  42   0   3  32  50]
 [ 14   5 161   0   8  12  10]
 [ 69  13  28   0  17  17  66]
 [ 72  22  16   0  44   2  54]
 [ 38   3  19   0   5  21 124]
 [  5   0   4   0   0   6 195]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.73      0.52       210
           2       0.52      0.23      0.32       210
           3       0.59      0.77      0.67       210
           4       0.00      0.00      0.00       210
           5       0.47      0.21      0.29       210
           6       0.22      0.10      0.14       210
           7       0.37      0.93      0.53       210

    accuracy                           0.42      1470
   macro avg       0.37      0.42      0.35      1470
weighted avg       0.37      0.42      0.35      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.46258503401360546
Confusion Matrix of Random Forest is:
 [[155   8   1   0  18   5  23]
 [ 32 117  21   0   7   3  30]
 [ 14  23 157   0   8   6   2]
 [ 68  47  10   0  36   0  49]
 [ 73  17   1   0  69   1  49]
 [ 39  48   9   0   6  12  96]
 [  5  29   0   0   3   3 170]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.74      0.52       210
           2       0.40      0.56      0.47       210
           3       0.79      0.75      0.77       210
           4       0.00      0.00      0.00       210
           5       0.47      0.33      0.39       210
           6       0.40      0.06      0.10       210
           7       0.41      0.81      0.54       210

    accuracy                           0.46      1470
   macro avg       0.41      0.46      0.40      1470
weighted avg       0.41      0.46      0.40      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.508843537414966
Confusion Matrix of Random Forest is:
 [[141   8   1   6  27   7  20]
 [ 22 121  18   3  11   9  26]
 [ 13  19 165   0   7   4   2]
 [ 44  43   9  16  57  10  31]
 [ 48  14   1  14  99   2  32]
 [ 25  39   8  11   9  24  94]
 [  3  16   0   4   1   4 182]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.48      0.67      0.56       210
           2       0.47      0.58      0.51       210
           3       0.82      0.79      0.80       210
           4       0.30      0.08      0.12       210
           5       0.47      0.47      0.47       210
           6       0.40      0.11      0.18       210
           7       0.47      0.87      0.61       210

    accuracy                           0.51      1470
   macro avg       0.48      0.51      0.46      1470
weighted avg       0.48      0.51      0.46      1470

Random Forest with 5 max_depth
Accur

Accuracy of Random Forest after PCA and ICA is: 0.572108843537415
Confusion Matrix of Random Forest is:
 [[126   3   3  26  37   8   7]
 [  9 121  14  26  14  11  15]
 [  4  13 173   5   6   8   1]
 [ 20  24   9  73  54  21   9]
 [ 30   9   3  43 109   8   8]
 [  9  18   7  28  19  76  53]
 [  2   7   0   7   3  28 163]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.63      0.60      0.61       210
           2       0.62      0.58      0.60       210
           3       0.83      0.82      0.83       210
           4       0.35      0.35      0.35       210
           5       0.45      0.52      0.48       210
           6       0.47      0.36      0.41       210
           7       0.64      0.78      0.70       210

    accuracy                           0.57      1470
   macro avg       0.57      0.57      0.57      1470
weighted avg       0.57      0.57      0.57      1470

Random Forest with 13 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5619047619047619
Confusion Matrix of Random Forest is:
 [[127   4   3  24  34  13   5]
 [ 10 118  14  26  15  16  11]
 [  2  15 172   6   7   8   0]
 [ 20  24   9  76  52  19  10]
 [ 28  11   3  49  99  10  10]
 [  9  23   6  30  14  75  53]
 [  1   7   0   9   4  30 159]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.64      0.60      0.62       210
           2       0.58      0.56      0.57       210
           3       0.83      0.82      0.82       210
           4       0.35      0.36      0.35       210
           5       0.44      0.47      0.46       210
           6       0.44      0.36      0.39       210
           7       0.64      0.76      0.69       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Accuracy of Multinomial Naive Bayes

In [19]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//gpt_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],5)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4448979591836735
Confusion Matrix of Logistic Regression is:
 [[101   9  28  12  15  10  35]
 [  6  87  58   8  19   5  27]
 [  7   2 190   4   2   4   1]
 [ 40  20  24  17  52   7  50]
 [ 46  31   6  13  72   3  39]
 [ 18  29  30  11   7  10 105]
 [  2  27   2   0   1   1 177]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.46      0.48      0.47       210
           2       0.42      0.41      0.42       210
           3       0.56      0.90      0.69       210
           4       0.26      0.08      0.12       210
           5       0.43      0.34      0.38       210
           6       0.25      0.05      0.08       210
           7       0.41      0.84      0.55       210

    accuracy                           0.44      1470
   macro avg       0.40      0.44      0.39      1470
weighted avg       0.40      0.44      0.39      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.40680272108843535
Confusion Matrix of SVM is:
 [[ 83   8  26   5   3  20  65]
 [  1  62  52  10   7  20  58]
 [  0   0 194   4   0   6   6]
 [ 20  16  22  20  18  31  83]
 [ 42  37   6  14  28  18  65]
 [ 12   7  26  11   0  11 143]
 [  0   8   2   0   0   0 200]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.53      0.40      0.45       210
           2       0.45      0.30      0.36       210
           3       0.59      0.92      0.72       210
           4       0.31      0.10      0.15       210
           5       0.50      0.13      0.21       210
           6       0.10      0.05      0.07       210
           7       0.32      0.95      0.48       210

    accuracy                           0.41      1470
   macro avg       0.40      0.41      0.35      1470
weighted avg       0.40      0.41      0.35      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.58      0.50      0.54       210
           2       0.48      0.50      0.49       210
           3       0.78      0.78      0.78       210
           4       0.31      0.31      0.31       210
           5       0.37      0.36      0.36       210
           6       0.29      0.22      0.25       210
           7       0.46      0.62      0.53       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.47      1470
weighted avg       0.47      0.47      0.47      1470

Decision Tree with 11 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.44829931972789117
Confusion Matrix of Decision Tree is:
 [[103   8   5  28  30  17  19]
 [  9  96  13  25  34  12  21]
 [  2  14 167  13   1  12   1]
 [ 27  24  12  59  44  20  24]
 [ 39  17   2  36  75  22  19]
 [ 15  28  10  28  18  40  71]
 [  9  16   0   7  16  43 119]]
Cl

Accuracy of Random Forest after PCA and ICA is: 0.3816326530612245
Confusion Matrix of Random Forest is:
 [[ 87  27  39   3   3   0  51]
 [  8 132  34   0   3   3  30]
 [  8  48 147   0   0   1   6]
 [ 33  92   9   3   9   2  62]
 [ 69  80   1   1  11   1  47]
 [ 16  49  17   2   3   3 120]
 [  0  25   7   0   0   0 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.39      0.41      0.40       210
           2       0.29      0.63      0.40       210
           3       0.58      0.70      0.63       210
           4       0.33      0.01      0.03       210
           5       0.38      0.05      0.09       210
           6       0.30      0.01      0.03       210
           7       0.36      0.85      0.51       210

    accuracy                           0.38      1470
   macro avg       0.38      0.38      0.30      1470
weighted avg       0.38      0.38      0.30      1470

Random Forest with 2 max_depth
Accu

KeyboardInterrupt: 

In [32]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//xlm_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")



Accuracy of Logistic Regression after PCA and ICA is: 0.5047619047619047
Confusion Matrix of Logistic Regression is:
 [[121   2  21   3  26  16  21]
 [  6  98  46   7  21  12  20]
 [ 29   1 164   7   1   6   2]
 [ 37  17  31  40  31  17  37]
 [ 57  17   7   7  91   2  29]
 [ 13   9  26   6   6  49 101]
 [  0   1   0   0   4  26 179]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.46      0.58      0.51       210
           2       0.68      0.47      0.55       210
           3       0.56      0.78      0.65       210
           4       0.57      0.19      0.29       210
           5       0.51      0.43      0.47       210
           6       0.38      0.23      0.29       210
           7       0.46      0.85      0.60       210

    accuracy                           0.50      1470
   macro avg       0.52      0.50      0.48      1470
weighted avg       0.52      0.50      0.48      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.4959183673469388
Confusion Matrix of SVM is:
 [[128   2  15   0  22  14  29]
 [  7  94  51   3  18   8  29]
 [ 33   2 165   3   0   4   3]
 [ 43  20  29  24  33  18  43]
 [ 58  18   5   4  87   1  37]
 [ 15   9  26   3   6  40 111]
 [  0   0   0   0   4  15 191]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.45      0.61      0.52       210
           2       0.65      0.45      0.53       210
           3       0.57      0.79      0.66       210
           4       0.65      0.11      0.19       210
           5       0.51      0.41      0.46       210
           6       0.40      0.19      0.26       210
           7       0.43      0.91      0.58       210

    accuracy                           0.50      1470
   macro avg       0.52      0.50      0.46      1470
weighted avg       0.52      0.50      0.46      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.57      0.55       210
           2       0.56      0.61      0.59       210
           3       0.80      0.79      0.79       210
           4       0.34      0.32      0.33       210
           5       0.44      0.44      0.44       210
           6       0.36      0.30      0.33       210
           7       0.51      0.55      0.53       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.51      1470
weighted avg       0.51      0.51      0.51      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5129251700680272
Confusion Matrix of Decision Tree is:
 [[122   6   8  14  38  16   6]
 [  9 124   6  18  29  14  10]
 [  9  13 158  19   2   6   3]
 [ 17  32  11  81  25  28  16]
 [ 32  26   2  31 100  11   8]
 [ 14  24   7  30  14  65  56]
 [  9   7   1  15  11  63 104]]
Cla

Accuracy of Decision Tree after PCA and ICA is: 0.47210884353741495
Confusion Matrix of Decision Tree is:
 [[113   8   9  15  42  18   5]
 [ 10 114   9  21  32  15   9]
 [  9   8 162  12   8   8   3]
 [ 20  32  11  67  25  38  17]
 [ 45  24   2  40  79  14   6]
 [ 20  25   8  31  16  61  49]
 [  9  15   2  21  12  53  98]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.54      0.52       210
           2       0.50      0.54      0.52       210
           3       0.80      0.77      0.78       210
           4       0.32      0.32      0.32       210
           5       0.37      0.38      0.37       210
           6       0.29      0.29      0.29       210
           7       0.52      0.47      0.49       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.47      1470
weighted avg       0.47      0.47      0.47      1470

Random Forest with 1 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.5204081632653061
Confusion Matrix of Random Forest is:
 [[120   3   3   2  56  17   9]
 [  9 124   1   0  51  13  12]
 [ 40  17 144   1   2   5   1]
 [ 47  27   3   2  85  24  22]
 [ 23  12   0   0 156   4  15]
 [ 26  15   8   1  24  57  79]
 [  1   2   0   0   8  37 162]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.45      0.57      0.50       210
           2       0.62      0.59      0.60       210
           3       0.91      0.69      0.78       210
           4       0.33      0.01      0.02       210
           5       0.41      0.74      0.53       210
           6       0.36      0.27      0.31       210
           7       0.54      0.77      0.64       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.48      1470
weighted avg       0.52      0.52      0.48      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5816326530612245
Confusion Matrix of Random Forest is:
 [[126   7   4  11  38  15   9]
 [  5 143   1  22  19  11   9]
 [  9  14 164  13   2   7   1]
 [  7  35   6  80  34  31  17]
 [ 27  21   2  22 116  12  10]
 [ 14  16   8  19  11  76  66]
 [  0   1   0   5   5  49 150]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.67      0.60      0.63       210
           2       0.60      0.68      0.64       210
           3       0.89      0.78      0.83       210
           4       0.47      0.38      0.42       210
           5       0.52      0.55      0.53       210
           6       0.38      0.36      0.37       210
           7       0.57      0.71      0.64       210

    accuracy                           0.58      1470
   macro avg       0.58      0.58      0.58      1470
weighted avg       0.58      0.58      0.58      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5714285714285714
Confusion Matrix of Random Forest is:
 [[126   4   4  14  41  14   7]
 [  5 147   3  20  15  14   6]
 [  9  12 169  10   2   8   0]
 [  6  40   8  80  32  25  19]
 [ 29  19   2  28 111  14   7]
 [ 12  23  11  22  11  67  64]
 [  1   3   0   9   5  52 140]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.67      0.60      0.63       210
           2       0.59      0.70      0.64       210
           3       0.86      0.80      0.83       210
           4       0.44      0.38      0.41       210
           5       0.51      0.53      0.52       210
           6       0.35      0.32      0.33       210
           7       0.58      0.67      0.62       210

    accuracy                           0.57      1470
   macro avg       0.57      0.57      0.57      1470
weighted avg       0.57      0.57      0.57      1470

Random Forest with 19 max_depth
Acc

### Fine Tuned Transformers Models

In [26]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//bert_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],7)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3619047619047619
Confusion Matrix of Logistic Regression is:
 [[ 15  66  40   2  46   0  41]
 [  8  84  34   2  48   4  30]
 [  7  41 133   2  18   2   7]
 [  4  44  11   6  77   3  65]
 [  0  24   7   2 136   1  40]
 [  9  40  23   6  34   7  91]
 [  3  11   8   7  26   4 151]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.33      0.07      0.12       210
           2       0.27      0.40      0.32       210
           3       0.52      0.63      0.57       210
           4       0.22      0.03      0.05       210
           5       0.35      0.65      0.46       210
           6       0.33      0.03      0.06       210
           7       0.36      0.72      0.48       210

    accuracy                           0.36      1470
   macro avg       0.34      0.36      0.29      1470
weighted avg       0.34      0.36      0.29      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.3326530612244898
Confusion Matrix of SVM is:
 [[ 20 104   0   3  50   0  33]
 [  2 130   1   8  53   1  15]
 [ 12 105  62   1  27   2   1]
 [  0  74   0  10  74   3  49]
 [  0  37   0   5 135   1  32]
 [  3  93   0   9  34   0  71]
 [  0  43   0  11  22   2 132]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.54      0.10      0.16       210
           2       0.22      0.62      0.33       210
           3       0.98      0.30      0.45       210
           4       0.21      0.05      0.08       210
           5       0.34      0.64      0.45       210
           6       0.00      0.00      0.00       210
           7       0.40      0.63      0.49       210

    accuracy                           0.33      1470
   macro avg       0.39      0.33      0.28      1470
weighted avg       0.39      0.33      0.28      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.3727891156462585
Confusion Matrix of Decision Tree is:
 [[ 52  34  13  20  36  40  15]
 [  9  78  24  16  31  40  12]
 [  4  31 142   3   4  22   4]
 [ 12  30  11  52  37  40  28]
 [ 13  19   3  59  75  28  13]
 [ 17  18  14  18  29  75  39]
 [ 20   5   2  15  10  84  74]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.41      0.25      0.31       210
           2       0.36      0.37      0.37       210
           3       0.68      0.68      0.68       210
           4       0.28      0.25      0.26       210
           5       0.34      0.36      0.35       210
           6       0.23      0.36      0.28       210
           7       0.40      0.35      0.37       210

    accuracy                           0.37      1470
   macro avg       0.39      0.37      0.37      1470
weighted avg       0.39      0.37      0.37      1470

Decision Tree with 11 max_depth
Acc

Confusion Matrix of Decision Tree is:
 [[ 80  20   8  27  26  34  15]
 [ 17  74  18  31  29  27  14]
 [ 14  16 159   3   3  11   4]
 [ 24  26  11  61  35  34  19]
 [ 40  34   4  41  47  22  22]
 [ 23  17  13  31  30  56  40]
 [ 28   9   4  28  20  49  72]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.38      0.37       210
           2       0.38      0.35      0.36       210
           3       0.73      0.76      0.74       210
           4       0.27      0.29      0.28       210
           5       0.25      0.22      0.24       210
           6       0.24      0.27      0.25       210
           7       0.39      0.34      0.36       210

    accuracy                           0.37      1470
   macro avg       0.37      0.37      0.37      1470
weighted avg       0.37      0.37      0.37      1470

Random Forest with 1 max_depth
Accuracy of Random Forest after PCA and ICA is: 0.33197278911564626
Con

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3598639455782313
Confusion Matrix of Random Forest is:
 [[ 36  58  31   0  45   1  39]
 [  7 107  30   1  42   0  23]
 [  9  45 133   0  19   0   4]
 [  5  49  15   2  84   0  55]
 [  1  37   8   1 127   1  35]
 [  8  53  23   1  45   0  80]
 [  5  16  11   0  54   0 124]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.51      0.17      0.26       210
           2       0.29      0.51      0.37       210
           3       0.53      0.63      0.58       210
           4       0.40      0.01      0.02       210
           5       0.31      0.60      0.41       210
           6       0.00      0.00      0.00       210
           7       0.34      0.59      0.44       210

    accuracy                           0.36      1470
   macro avg       0.34      0.36      0.29      1470
weighted avg       0.34      0.36      0.29      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.47210884353741495
Confusion Matrix of Random Forest is:
 [[ 61  37   7  14  43  19  29]
 [  8 116  10  16  28  14  18]
 [  4  32 152   4   4  12   2]
 [  8  31   3  57  52  12  47]
 [  4  25   2  20 115  11  33]
 [ 10  31  11  23  16  42  77]
 [  3   4   0  12  16  24 151]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.62      0.29      0.40       210
           2       0.42      0.55      0.48       210
           3       0.82      0.72      0.77       210
           4       0.39      0.27      0.32       210
           5       0.42      0.55      0.48       210
           6       0.31      0.20      0.24       210
           7       0.42      0.72      0.53       210

    accuracy                           0.47      1470
   macro avg       0.49      0.47      0.46      1470
weighted avg       0.49      0.47      0.46      1470

Random Forest with 11 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.46870748299319726
Confusion Matrix of Random Forest is:
 [[ 85  24   7  24  24  16  30]
 [ 14  96  16  16  31  22  15]
 [  7  22 161   5   3   9   3]
 [ 16  25   5  60  38  18  48]
 [ 21  24   4  30  95  10  26]
 [ 17  18  11  25  19  58  62]
 [  4   3   0  19  14  36 134]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.52      0.40      0.45       210
           2       0.45      0.46      0.45       210
           3       0.79      0.77      0.78       210
           4       0.34      0.29      0.31       210
           5       0.42      0.45      0.44       210
           6       0.34      0.28      0.31       210
           7       0.42      0.64      0.51       210

    accuracy                           0.47      1470
   macro avg       0.47      0.47      0.46      1470
weighted avg       0.47      0.47      0.46      1470

Random Forest with 19 max_depth
Ac

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4231292517006803
Confusion Matrix of Logistic Regression is:
 [[ 47  32  45   0  62   0  24]
 [ 21  84  27   0  50   1  27]
 [ 18  11 163   0  15   0   3]
 [ 19  17  19   1 122   0  32]
 [  8  21   3   0 146   0  32]
 [ 13  40  37   0  21   0  99]
 [  1  21   4   0   3   0 181]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.37      0.22      0.28       210
           2       0.37      0.40      0.39       210
           3       0.55      0.78      0.64       210
           4       1.00      0.00      0.01       210
           5       0.35      0.70      0.46       210
           6       0.00      0.00      0.00       210
           7       0.45      0.86      0.60       210

    accuracy                           0.42      1470
   macro avg       0.44      0.42      0.34      1470
weighted avg       0.44      0.42      0.34      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.37142857142857144
Confusion Matrix of SVM is:
 [[ 60  43   6   0  88   1  12]
 [ 33  84   8   0  75   2   8]
 [ 24  20 107   2  57   0   0]
 [ 14  25   4   0 156   1  10]
 [  9  24   0   0 165   0  12]
 [ 31  71   7   0  43   2  56]
 [  4  69   0   0   5   4 128]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.34      0.29      0.31       210
           2       0.25      0.40      0.31       210
           3       0.81      0.51      0.63       210
           4       0.00      0.00      0.00       210
           5       0.28      0.79      0.41       210
           6       0.20      0.01      0.02       210
           7       0.57      0.61      0.59       210

    accuracy                           0.37      1470
   macro avg       0.35      0.37      0.32      1470
weighted avg       0.35      0.37      0.32      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.39      0.32      0.35       210
           2       0.43      0.37      0.40       210
           3       0.65      0.75      0.70       210
           4       0.29      0.29      0.29       210
           5       0.35      0.42      0.38       210
           6       0.27      0.26      0.26       210
           7       0.58      0.61      0.60       210

    accuracy                           0.43      1470
   macro avg       0.42      0.43      0.43      1470
weighted avg       0.42      0.43      0.43      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4306122448979592
Confusion Matrix of Decision Tree is:
 [[ 68  24  18  36  30  23  11]
 [ 20  78  14  27  33  25  13]
 [ 16  10 159  14   3   8   0]
 [ 20  22  13  61  68  16  10]
 [ 21  20   3  43  97  17   9]
 [ 20  30  26  24  13  42  55]
 [  8  12   3   8   5  46 128]]
Cla

Accuracy of Random Forest after PCA and ICA is: 0.3272108843537415
Confusion Matrix of Random Forest is:
 [[  0   0   8   0 112   0  90]
 [  0   0   7   0  98   0 105]
 [  0   0 108   0  69   0  33]
 [  0   0   4   0 162   0  44]
 [  0   0   0   0 174   0  36]
 [  0   0   9   0  54   0 147]
 [  0   0   3   0   8   0 199]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.78      0.51      0.62       210
           4       0.00      0.00      0.00       210
           5       0.26      0.83      0.39       210
           6       0.00      0.00      0.00       210
           7       0.30      0.95      0.46       210

    accuracy                           0.33      1470
   macro avg       0.19      0.33      0.21      1470
weighted avg       0.19      0.33      0.21      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3489795918367347
Confusion Matrix of Random Forest is:
 [[  3  44  11   7  96   0  49]
 [  1  31   7   3  88   0  80]
 [  0  17 114   0  63   0  16]
 [  1  11   6  16 141   0  35]
 [  1   8   1  16 155   0  29]
 [  1  22  11   6  41   0 129]
 [  0   9   2   1   4   0 194]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.43      0.01      0.03       210
           2       0.22      0.15      0.18       210
           3       0.75      0.54      0.63       210
           4       0.33      0.08      0.12       210
           5       0.26      0.74      0.39       210
           6       0.00      0.00      0.00       210
           7       0.36      0.92      0.52       210

    accuracy                           0.35      1470
   macro avg       0.34      0.35      0.27      1470
weighted avg       0.34      0.35      0.27      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.40476190476190477
Confusion Matrix of Random Forest is:
 [[ 48  38  10  20  72   0  22]
 [ 18  87   7   6  76   1  15]
 [ 24  17 113  27  28   1   0]
 [  8  23   5  30 130   1  13]
 [  4  22   0  10 158   0  16]
 [ 15  58   9   6  40   4  78]
 [  1  47   0   2   4   1 155]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.41      0.23      0.29       210
           2       0.30      0.41      0.35       210
           3       0.78      0.54      0.64       210
           4       0.30      0.14      0.19       210
           5       0.31      0.75      0.44       210
           6       0.50      0.02      0.04       210
           7       0.52      0.74      0.61       210

    accuracy                           0.40      1470
   macro avg       0.45      0.40      0.37      1470
weighted avg       0.45      0.40      0.37      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5163265306122449
Confusion Matrix of Random Forest is:
 [[ 75  24  16  23  38  21  13]
 [ 14  99  17  16  34  16  14]
 [ 12   7 167  13   3   8   0]
 [ 15  25   8  69  71  10  12]
 [ 19  22   1  20 129  12   7]
 [ 24  20  18  12  14  60  62]
 [  4   9   1   3   5  28 160]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.46      0.36      0.40       210
           2       0.48      0.47      0.48       210
           3       0.73      0.80      0.76       210
           4       0.44      0.33      0.38       210
           5       0.44      0.61      0.51       210
           6       0.39      0.29      0.33       210
           7       0.60      0.76      0.67       210

    accuracy                           0.52      1470
   macro avg       0.51      0.52      0.50      1470
weighted avg       0.51      0.52      0.50      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4993197278911565
Confusion Matrix of Random Forest is:
 [[ 84  19  16  23  33  21  14]
 [ 20  99  11  19  31  20  10]
 [ 15   8 166   9   4   7   1]
 [ 23  23   8  76  59  11  10]
 [ 23  28   1  33 103  14   8]
 [ 30  21  17  19  10  54  59]
 [  5  10   0   5   4  34 152]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.42      0.40      0.41       210
           2       0.48      0.47      0.47       210
           3       0.76      0.79      0.77       210
           4       0.41      0.36      0.39       210
           5       0.42      0.49      0.45       210
           6       0.34      0.26      0.29       210
           7       0.60      0.72      0.66       210

    accuracy                           0.50      1470
   macro avg       0.49      0.50      0.49      1470
weighted avg       0.49      0.50      0.49      1470

Random Forest with 20 max_depth
Acc

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4095238095238095
Confusion Matrix of Logistic Regression is:
 [[112   5  35   2  26   0  30]
 [ 14  80  63   1  25   0  27]
 [ 27  12 167   1   2   0   1]
 [ 41  30  40   3  39   2  55]
 [ 66  13   5   1  86   1  38]
 [ 20  37  40   1  14   2  96]
 [  2  48   0   1   6   1 152]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.40      0.53      0.46       210
           2       0.36      0.38      0.37       210
           3       0.48      0.80      0.60       210
           4       0.30      0.01      0.03       210
           5       0.43      0.41      0.42       210
           6       0.33      0.01      0.02       210
           7       0.38      0.72      0.50       210

    accuracy                           0.41      1470
   macro avg       0.38      0.41      0.34      1470
weighted avg       0.38      0.41      0.34      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3945578231292517
Confusion Matrix of SVM is:
 [[ 82  13  39  25  14   2  35]
 [  2  79  76  23   5   2  23]
 [  7  11 180  11   0   0   1]
 [ 13  38  44  37  18   5  55]
 [ 30  37   8  37  50   2  46]
 [  3  47  50  16   1   3  90]
 [  0  55   2   2   0   2 149]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.60      0.39      0.47       210
           2       0.28      0.38      0.32       210
           3       0.45      0.86      0.59       210
           4       0.25      0.18      0.20       210
           5       0.57      0.24      0.34       210
           6       0.19      0.01      0.03       210
           7       0.37      0.71      0.49       210

    accuracy                           0.39      1470
   macro avg       0.39      0.39      0.35      1470
weighted avg       0.39      0.39      0.35      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 89  22  17  18  42  11  11]
 [ 12 106  13  28  19   9  23]
 [ 18  15 164   9   2   2   0]
 [ 20  43  17  45  34  24  27]
 [ 35  26   7  32  75  10  25]
 [ 16  31  10  28  17  35  73]
 [  8  11   1  12  18  20 140]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.42      0.44       210
           2       0.42      0.50      0.46       210
           3       0.72      0.78      0.75       210
           4       0.26      0.21      0.24       210
           5       0.36      0.36      0.36       210
           6       0.32      0.17      0.22       210
           7       0.47      0.67      0.55       210

    accuracy                           0.44      1470
   macro avg       0.43      0.44      0.43      1470
weighted avg       0.43      0.44      0.43      1470

Decision Tree with 13 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4421768707482993
Con

Accuracy of Random Forest after PCA and ICA is: 0.36394557823129253
Confusion Matrix of Random Forest is:
 [[ 67   2  24   0  99   0  18]
 [ 18  25  40   0  55   0  72]
 [ 21   0 164   0   5   0  20]
 [ 55   8  16   0  62   0  69]
 [ 33   3   2   0 137   0  35]
 [ 42  12  18   0  45   0  93]
 [ 36   4   0   0  28   0 142]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.25      0.32      0.28       210
           2       0.46      0.12      0.19       210
           3       0.62      0.78      0.69       210
           4       0.00      0.00      0.00       210
           5       0.32      0.65      0.43       210
           6       0.00      0.00      0.00       210
           7       0.32      0.68      0.43       210

    accuracy                           0.36      1470
   macro avg       0.28      0.36      0.29      1470
weighted avg       0.28      0.36      0.29      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.43673469387755104
Confusion Matrix of Random Forest is:
 [[132   7  23   1  24   0  23]
 [ 21 120  13   3  20   0  33]
 [ 23   4 165   5   1   0  12]
 [ 57  34  12  16  32   0  59]
 [ 79  19   2   3  73   0  34]
 [ 29  46  18   1  20   0  96]
 [ 14  50   0   0  10   0 136]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.37      0.63      0.47       210
           2       0.43      0.57      0.49       210
           3       0.71      0.79      0.74       210
           4       0.55      0.08      0.13       210
           5       0.41      0.35      0.37       210
           6       0.00      0.00      0.00       210
           7       0.35      0.65      0.45       210

    accuracy                           0.44      1470
   macro avg       0.40      0.44      0.38      1470
weighted avg       0.40      0.44      0.38      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.45034013605442175
Confusion Matrix of Random Forest is:
 [[118  10  23   0  36   1  22]
 [ 17 139  11   0  17   8  18]
 [ 22  22 162   1   1   1   1]
 [ 47  61  11   1  41   2  47]
 [ 62  24   2   1  85   0  36]
 [ 25  54  14   1  16  17  83]
 [  8  52   0   0   5   5 140]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.39      0.56      0.46       210
           2       0.38      0.66      0.49       210
           3       0.73      0.77      0.75       210
           4       0.25      0.00      0.01       210
           5       0.42      0.40      0.41       210
           6       0.50      0.08      0.14       210
           7       0.40      0.67      0.50       210

    accuracy                           0.45      1470
   macro avg       0.44      0.45      0.39      1470
weighted avg       0.44      0.45      0.39      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5081632653061224
Confusion Matrix of Random Forest is:
 [[102  10  14  21  45   5  13]
 [ 10 125  10  12  22  15  16]
 [ 10  13 172  11   2   1   1]
 [ 17  41   8  56  45  22  21]
 [ 33  19   2  25  96   5  30]
 [ 14  27  10  33  11  44  71]
 [  2  11   0   2  11  32 152]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.54      0.49      0.51       210
           2       0.51      0.60      0.55       210
           3       0.80      0.82      0.81       210
           4       0.35      0.27      0.30       210
           5       0.41      0.46      0.43       210
           6       0.35      0.21      0.26       210
           7       0.50      0.72      0.59       210

    accuracy                           0.51      1470
   macro avg       0.50      0.51      0.49      1470
weighted avg       0.50      0.51      0.49      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4816326530612245
Confusion Matrix of Random Forest is:
 [[103   8  12  19  47   9  12]
 [ 16 115  11  11  20  25  12]
 [ 14  14 167  11   2   1   1]
 [ 22  37  10  54  41  28  18]
 [ 36  20   4  27  88  12  23]
 [ 13  28  10  30  17  51  61]
 [  8   9   0   6  12  45 130]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.49      0.49      0.49       210
           2       0.50      0.55      0.52       210
           3       0.78      0.80      0.79       210
           4       0.34      0.26      0.29       210
           5       0.39      0.42      0.40       210
           6       0.30      0.24      0.27       210
           7       0.51      0.62      0.56       210

    accuracy                           0.48      1470
   macro avg       0.47      0.48      0.47      1470
weighted avg       0.47      0.48      0.47      1470

Random Forest with 20 max_depth
Acc

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],5)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.40680272108843535
Confusion Matrix of Logistic Regression is:
 [[ 87   3  30   4  47   1  38]
 [ 11  37  65  11  31   2  53]
 [ 35  10 145   0   9   0  11]
 [ 26  27  13  20  52   3  69]
 [ 41   6   2   4 107   1  49]
 [ 14  21  16   5  12   2 140]
 [  0   4   0   2   2   2 200]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.41      0.41      0.41       210
           2       0.34      0.18      0.23       210
           3       0.54      0.69      0.60       210
           4       0.43      0.10      0.16       210
           5       0.41      0.51      0.46       210
           6       0.18      0.01      0.02       210
           7       0.36      0.95      0.52       210

    accuracy                           0.41      1470
   macro avg       0.38      0.41      0.34      1470
weighted avg       0.38      0.41      0.34      1470

KNN with 3 Neigh

Accuracy of SVM after PCA and ICA is: 0.37551020408163266
Confusion Matrix of SVM is:
 [[ 59  18  15  29  25   4  60]
 [  1  83  13  25   6   1  81]
 [ 27  34 106   6  12   2  23]
 [  7  44   4  54  10   4  87]
 [ 13  14   0  63  47   1  72]
 [  4  35   4  11   1   1 154]
 [  0   5   0   2   0   1 202]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.53      0.28      0.37       210
           2       0.36      0.40      0.37       210
           3       0.75      0.50      0.60       210
           4       0.28      0.26      0.27       210
           5       0.47      0.22      0.30       210
           6       0.07      0.00      0.01       210
           7       0.30      0.96      0.45       210

    accuracy                           0.38      1470
   macro avg       0.39      0.38      0.34      1470
weighted avg       0.39      0.38      0.34      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[114  11  10  11  45   9  10]
 [ 30  99  19  18  12  21  11]
 [ 24  18 153   4   3   8   0]
 [ 49  34   5  52  32  21  17]
 [ 48  15   4  21  92  12  18]
 [ 22  36  12  19  17  58  46]
 [ 10   7   0  12  10  36 135]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.38      0.54      0.45       210
           2       0.45      0.47      0.46       210
           3       0.75      0.73      0.74       210
           4       0.38      0.25      0.30       210
           5       0.44      0.44      0.44       210
           6       0.35      0.28      0.31       210
           7       0.57      0.64      0.60       210

    accuracy                           0.48      1470
   macro avg       0.47      0.48      0.47      1470
weighted avg       0.47      0.48      0.47      1470

Decision Tree with 12 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.47210884353741495
Co

Accuracy of Random Forest after PCA and ICA is: 0.3598639455782313
Confusion Matrix of Random Forest is:
 [[ 87   0  38  44  16   0  25]
 [ 12   0  76  96   5   0  21]
 [ 26   0 149  26   5   0   4]
 [ 16   0  17 126  10   0  41]
 [ 62   0   2  84  28   0  34]
 [  3   0  29  85   5   0  88]
 [  0   0   1  70   0   0 139]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.42      0.41      0.42       210
           2       0.00      0.00      0.00       210
           3       0.48      0.71      0.57       210
           4       0.24      0.60      0.34       210
           5       0.41      0.13      0.20       210
           6       0.00      0.00      0.00       210
           7       0.39      0.66      0.49       210

    accuracy                           0.36      1470
   macro avg       0.28      0.36      0.29      1470
weighted avg       0.28      0.36      0.29      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.445578231292517
Confusion Matrix of Random Forest is:
 [[ 54   1  37  23  75   0  20]
 [  6  25  56  67  26   2  28]
 [ 20   1 149  23  13   2   2]
 [  5   4  15  89  53   0  44]
 [ 13   0   2  28 135   1  31]
 [  4   9  23  44  23  12  95]
 [  0   1   1  11   5   1 191]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.26      0.35       210
           2       0.61      0.12      0.20       210
           3       0.53      0.71      0.60       210
           4       0.31      0.42      0.36       210
           5       0.41      0.64      0.50       210
           6       0.67      0.06      0.11       210
           7       0.46      0.91      0.62       210

    accuracy                           0.45      1470
   macro avg       0.50      0.45      0.39      1470
weighted avg       0.50      0.45      0.39      1470

Random Forest with 3 max_depth
Accur

Accuracy of Random Forest after PCA and ICA is: 0.5319727891156463
Confusion Matrix of Random Forest is:
 [[ 94  12  15  14  55  10  10]
 [  6 110  19  31  16  17  11]
 [ 10  19 163   7   6   4   1]
 [ 11  33  10  76  37  19  24]
 [ 17  10   5  34 113  11  20]
 [  8  28  12  27  13  60  62]
 [  1   4   0   7   5  27 166]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.64      0.45      0.53       210
           2       0.51      0.52      0.52       210
           3       0.73      0.78      0.75       210
           4       0.39      0.36      0.37       210
           5       0.46      0.54      0.50       210
           6       0.41      0.29      0.34       210
           7       0.56      0.79      0.66       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.52      1470
weighted avg       0.53      0.53      0.52      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5326530612244897
Confusion Matrix of Random Forest is:
 [[103   9  13  20  46  11   8]
 [  7 109  22  22  17  20  13]
 [  7  19 163   9   8   4   0]
 [ 19  31  10  65  35  27  23]
 [ 21  15   2  28 113  11  20]
 [ 12  23  11  22   8  77  57]
 [  3   6   0   5   9  34 153]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.60      0.49      0.54       210
           2       0.51      0.52      0.52       210
           3       0.74      0.78      0.76       210
           4       0.38      0.31      0.34       210
           5       0.48      0.54      0.51       210
           6       0.42      0.37      0.39       210
           7       0.56      0.73      0.63       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Random Forest with 19 max_depth
Acc

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//xlm_base_finetuned_vectorized_Nisha_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['Labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3224489795918367
Confusion Matrix of Logistic Regression is:
 [[ 67  43  51  13   5   0  31]
 [ 25  81  53  19   3   1  28]
 [ 17  27 160   3   1   0   2]
 [ 41  60  16  35   6   0  52]
 [ 63  45  11  21  15   0  55]
 [ 41  54  19  30   3   0  63]
 [ 39  36   1  14   4   0 116]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.23      0.32      0.27       210
           2       0.23      0.39      0.29       210
           3       0.51      0.76      0.61       210
           4       0.26      0.17      0.20       210
           5       0.41      0.07      0.12       210
           6       0.00      0.00      0.00       210
           7       0.33      0.55      0.42       210

    accuracy                           0.32      1470
   macro avg       0.28      0.32      0.27      1470
weighted avg       0.28      0.32      0.27      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.27414965986394557
Confusion Matrix of SVM is:
 [[ 90  76   0  14   7   1  22]
 [ 31 125   7  21   4   3  19]
 [ 34 119  51   4   2   0   0]
 [ 56  80   1  35   6   1  31]
 [ 85  57   0  21   8   0  39]
 [ 53  70   0  32   6   2  47]
 [ 51  42   0  13  11   1  92]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.23      0.43      0.30       210
           2       0.22      0.60      0.32       210
           3       0.86      0.24      0.38       210
           4       0.25      0.17      0.20       210
           5       0.18      0.04      0.06       210
           6       0.25      0.01      0.02       210
           7       0.37      0.44      0.40       210

    accuracy                           0.27      1470
   macro avg       0.34      0.27      0.24      1470
weighted avg       0.34      0.27      0.24      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.354421768707483
Confusion Matrix of Decision Tree is:
 [[ 83  29   8  32  19  21  18]
 [ 11  87  18  36  15  23  20]
 [ 17  19 143  10   9  10   2]
 [ 21  33   5  62  32  23  34]
 [ 30  31   2  51  34  22  40]
 [ 25  37  11  33  28  38  38]
 [ 26  18   0  31  35  26  74]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.39      0.40      0.39       210
           2       0.34      0.41      0.38       210
           3       0.76      0.68      0.72       210
           4       0.24      0.30      0.27       210
           5       0.20      0.16      0.18       210
           6       0.23      0.18      0.20       210
           7       0.33      0.35      0.34       210

    accuracy                           0.35      1470
   macro avg       0.36      0.35      0.35      1470
weighted avg       0.36      0.35      0.35      1470

Decision Tree with 14 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.30680272108843537
Confusion Matrix of Random Forest is:
 [[ 67  70  22   0   0   0  51]
 [ 11 122  40   0   0   0  37]
 [ 17  44 144   0   0   0   5]
 [ 22 115  13   0   0   0  60]
 [ 30  82  15   0   0   0  83]
 [ 13 104  19   0   0   0  74]
 [ 13  68  11   0   0   0 118]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.39      0.32      0.35       210
           2       0.20      0.58      0.30       210
           3       0.55      0.69      0.61       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.28      0.56      0.37       210

    accuracy                           0.31      1470
   macro avg       0.20      0.31      0.23      1470
weighted avg       0.20      0.31      0.23      1470

Random Forest with 2 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3149659863945578
Confusion Matrix of Random Forest is:
 [[ 63  70  15   0   1   0  61]
 [  7 125  36   0   1   0  41]
 [ 11  44 146   0   3   0   6]
 [ 11 115  11   0   4   0  69]
 [ 14  81  10   1   1   0 103]
 [  8 103  15   1   0   0  83]
 [  9  67   6   0   0   0 128]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.51      0.30      0.38       210
           2       0.21      0.60      0.31       210
           3       0.61      0.70      0.65       210
           4       0.00      0.00      0.00       210
           5       0.10      0.00      0.01       210
           6       0.00      0.00      0.00       210
           7       0.26      0.61      0.37       210

    accuracy                           0.31      1470
   macro avg       0.24      0.31      0.24      1470
weighted avg       0.24      0.31      0.24      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3360544217687075
Confusion Matrix of Random Forest is:
 [[ 56  68  20   2  29   1  34]
 [  5 123  36   1  13   1  31]
 [  8  44 150   0   5   0   3]
 [  7 109  11   6  34   0  43]
 [  9  77  10   5  50   0  59]
 [  6  99  13   5  24   1  62]
 [  6  62   4   5  24   1 108]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.27      0.36       210
           2       0.21      0.59      0.31       210
           3       0.61      0.71      0.66       210
           4       0.25      0.03      0.05       210
           5       0.28      0.24      0.26       210
           6       0.25      0.00      0.01       210
           7       0.32      0.51      0.39       210

    accuracy                           0.34      1470
   macro avg       0.36      0.34      0.29      1470
weighted avg       0.36      0.34      0.29      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.3619047619047619
Confusion Matrix of Random Forest is:
 [[ 82  36   7  24  22  14  25]
 [ 10  97  17  31  10  18  27]
 [ 11  30 146   8   3   6   6]
 [ 21  38   3  50  34  19  45]
 [ 24  39   4  26  34  22  61]
 [ 18  45   5  33  27  25  57]
 [ 15  19   0  19  35  24  98]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.45      0.39      0.42       210
           2       0.32      0.46      0.38       210
           3       0.80      0.70      0.74       210
           4       0.26      0.24      0.25       210
           5       0.21      0.16      0.18       210
           6       0.20      0.12      0.15       210
           7       0.31      0.47      0.37       210

    accuracy                           0.36      1470
   macro avg       0.36      0.36      0.36      1470
weighted avg       0.36      0.36      0.36      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.3435374149659864
Confusion Matrix of Random Forest is:
 [[ 86  27   9  27  21  18  22]
 [ 15  75  15  36  16  28  25]
 [ 12  19 150   8   4  13   4]
 [ 23  29   4  48  40  25  41]
 [ 33  31   2  30  40  30  44]
 [ 22  30  10  33  34  35  46]
 [ 25  26   0  24  36  28  71]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.41      0.40       210
           2       0.32      0.36      0.34       210
           3       0.79      0.71      0.75       210
           4       0.23      0.23      0.23       210
           5       0.21      0.19      0.20       210
           6       0.20      0.17      0.18       210
           7       0.28      0.34      0.31       210

    accuracy                           0.34      1470
   macro avg       0.35      0.34      0.34      1470
weighted avg       0.35      0.34      0.34      1470

Random Forest with 20 max_depth
Acc