#### Info:
#### All PCA models are built using the elbow method through the scree plots in Kabita_PCA_Scree_Plots
#### 

In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    from sklearn.decomposition import FastICA
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Kabita//Input//kabita_dataset_labels.csv")

In [2]:
# Function of Scaling, PCA, ICA
def scale_pca_ica(x_data, y_data, comp):
    scaler_model = StandardScaler()
    scaled_data = scaler_model.fit_transform(x_data)
    #print(scaled_data)
    # Doing PCA giving number of Components(dimensions)
    pca_comp = PCA(n_components=comp)
    pca_data = pca_comp.fit_transform(scaled_data)
    #print(pca_data)
    # Doing ICA on PCA transformed data to make features independent
    ica_comp = FastICA(n_components=comp)
    #ica_comp = FastICA(n_components=comp,max_iter=5000)
    ica_data = ica_comp.fit_transform(pca_data)
    #print(ica_data)
    x_train,x_test,y_train,y_test = train_test_split(ica_data,y_data,test_size=0.30,random_state=21,stratify=y_data)
    return x_train, x_test, y_train, y_test

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_train, x_test, y_train, y_test, model_name):
    ml_model.fit(x_train, y_train)
    ml_pred_val = ml_model.predict(x_test)
    print("Accuracy of "+model_name+" after PCA and ICA is:", ml_model.score(x_test,y_test))
    print("Confusion Matrix of "+model_name+" is:\n", confusion_matrix(y_test,ml_pred_val))
    print("Classification Report of "+model_name+" is:\n", classification_report(y_test,ml_pred_val))
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tfidf_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.38231292517006804
Confusion Matrix of Logistic Regression is:
 [[ 50   0  65  12  70   4   9]
 [ 69   1  68   2  65   1   4]
 [ 36   0 158   6   8   1   1]
 [ 24   0  51  33  86   5  11]
 [ 24   0  50   6 105   3  22]
 [  7   0  53   9  24  19  98]
 [  0   0   6   0   3   5 196]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.24      0.24      0.24       210
           2       1.00      0.00      0.01       210
           3       0.35      0.75      0.48       210
           4       0.49      0.16      0.24       210
           5       0.29      0.50      0.37       210
           6       0.50      0.09      0.15       210
           7       0.57      0.93      0.71       210

    accuracy                           0.38      1470
   macro avg       0.49      0.38      0.31      1470
weighted avg       0.49      0.38      0.31      1470

KNN with 3 Neigh

Accuracy of SVM after PCA and ICA is: 0.19863945578231293
Confusion Matrix of SVM is:
 [[  0   1 196   5   8   0   0]
 [  0   4 203   2   1   0   0]
 [  0   0 208   1   1   0   0]
 [  0   0 193   8   9   0   0]
 [  0   2 171  12  23   0   2]
 [  0   2 194   4   5   2   3]
 [  0   5 143   4   6   5  47]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.29      0.02      0.04       210
           3       0.16      0.99      0.27       210
           4       0.22      0.04      0.07       210
           5       0.43      0.11      0.17       210
           6       0.29      0.01      0.02       210
           7       0.90      0.22      0.36       210

    accuracy                           0.20      1470
   macro avg       0.33      0.20      0.13      1470
weighted avg       0.33      0.20      0.13      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3142857142857143
Confusion Matrix of SVM is:
 [[ 20  13 149  15   8   4   1]
 [  9   7 187   6   0   1   0]
 [  1   0 207   0   1   1   0]
 [  7   9 143  38   4   8   1]
 [ 19  14 120  24  21   7   5]
 [  5  10 116   9   2  39  29]
 [  0   2  26   0   9  43 130]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.33      0.10      0.15       210
           2       0.13      0.03      0.05       210
           3       0.22      0.99      0.36       210
           4       0.41      0.18      0.25       210
           5       0.47      0.10      0.16       210
           6       0.38      0.19      0.25       210
           7       0.78      0.62      0.69       210

    accuracy                           0.31      1470
   macro avg       0.39      0.31      0.27      1470
weighted avg       0.39      0.31      0.27      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.46598639455782315
Confusion Matrix of Decision Tree is:
 [[ 53  25   3  46  67  10   6]
 [ 11 109   8  41  28  13   0]
 [ 12  23 113  39   9  13   1]
 [  7  27   6  97  58  10   5]
 [ 16  37   2  40  92   8  15]
 [  1  14   6  53  17  55  64]
 [  0   3   1  12   3  25 166]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.25      0.34       210
           2       0.46      0.52      0.49       210
           3       0.81      0.54      0.65       210
           4       0.30      0.46      0.36       210
           5       0.34      0.44      0.38       210
           6       0.41      0.26      0.32       210
           7       0.65      0.79      0.71       210

    accuracy                           0.47      1470
   macro avg       0.50      0.47      0.46      1470
weighted avg       0.50      0.47      0.46      1470

Decision Tree with 6 max_depth
Acc

Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.44      0.44      0.44       210
           2       0.57      0.56      0.56       210
           3       0.81      0.79      0.80       210
           4       0.39      0.46      0.42       210
           5       0.37      0.35      0.36       210
           6       0.38      0.35      0.37       210
           7       0.67      0.67      0.67       210

    accuracy                           0.52      1470
   macro avg       0.52      0.52      0.52      1470
weighted avg       0.52      0.52      0.52      1470

Decision Tree with 16 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5251700680272109
Confusion Matrix of Decision Tree is:
 [[ 93  19   5  28  40  18   7]
 [ 12 120  12  24  27  15   0]
 [  6   8 165   9   4  17   1]
 [ 36  19  10  94  25  22   4]
 [ 39  29   7  34  80  10  11]
 [ 18  14   3  33  15  81  46]
 [  5   4   5   5  16  36 139]]
Cla

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3945578231292517
Confusion Matrix of Random Forest is:
 [[112   0  18  45   1  24  10]
 [ 89   0  66  26   0  23   6]
 [ 39   0 143  14   0  13   1]
 [ 85   0  13  81   0  26   5]
 [108   0  18  35   2  30  17]
 [ 16   0  12  40   0  82  60]
 [  0   0   1   1   0  48 160]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.25      0.53      0.34       210
           2       0.00      0.00      0.00       210
           3       0.53      0.68      0.59       210
           4       0.33      0.39      0.36       210
           5       0.67      0.01      0.02       210
           6       0.33      0.39      0.36       210
           7       0.62      0.76      0.68       210

    accuracy                           0.39      1470
   macro avg       0.39      0.39      0.34      1470
weighted avg       0.39      0.39      0.34      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.41496598639455784
Confusion Matrix of Random Forest is:
 [[ 97   0  17  66   3  16  11]
 [ 71   0  63  47   0  25   4]
 [ 13   0 143  39   1  13   1]
 [ 60   0  12 108   2  22   6]
 [ 85   0  16  63   7  19  20]
 [  9   0  11  48   0  81  61]
 [  0   0   1   1   0  34 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.29      0.46      0.36       210
           2       0.00      0.00      0.00       210
           3       0.54      0.68      0.60       210
           4       0.29      0.51      0.37       210
           5       0.54      0.03      0.06       210
           6       0.39      0.39      0.39       210
           7       0.63      0.83      0.71       210

    accuracy                           0.41      1470
   macro avg       0.38      0.41      0.36      1470
weighted avg       0.38      0.41      0.36      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.45102040816326533
Confusion Matrix of Random Forest is:
 [[ 74   6  13  63  26  18  10]
 [ 32  49  18  43  38  28   2]
 [ 13   3 140  39   1  13   1]
 [ 43   1  11 103  22  23   7]
 [ 53   5  10  62  43  18  19]
 [  9   5   7  43   1  77  68]
 [  0   0   1   0   0  32 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.33      0.35      0.34       210
           2       0.71      0.23      0.35       210
           3       0.70      0.67      0.68       210
           4       0.29      0.49      0.37       210
           5       0.33      0.20      0.25       210
           6       0.37      0.37      0.37       210
           7       0.62      0.84      0.72       210

    accuracy                           0.45      1470
   macro avg       0.48      0.45      0.44      1470
weighted avg       0.48      0.45      0.44      1470

Random Forest with 5 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5789115646258504
Confusion Matrix of Random Forest is:
 [[ 89   7   1  40  49  20   4]
 [ 11 118   8  22  30  21   0]
 [  7   4 164  15   4  15   1]
 [ 23   8  10 113  30  23   3]
 [ 19  13   4  46  96  18  14]
 [  8   6   1  29  11 100  55]
 [  0   1   0   0   2  36 171]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.57      0.42      0.49       210
           2       0.75      0.56      0.64       210
           3       0.87      0.78      0.82       210
           4       0.43      0.54      0.48       210
           5       0.43      0.46      0.44       210
           6       0.43      0.48      0.45       210
           7       0.69      0.81      0.75       210

    accuracy                           0.58      1470
   macro avg       0.60      0.58      0.58      1470
weighted avg       0.60      0.58      0.58      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.580952380952381
Confusion Matrix of Random Forest is:
 [[ 91   9   2  33  48  21   6]
 [  9 118   9  24  29  21   0]
 [  8   6 166  12   4  13   1]
 [ 25  13  10 112  22  25   3]
 [ 23  13   4  40  97  19  14]
 [ 10   8   1  26  13  96  56]
 [  1   1   0   0   2  32 174]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.54      0.43      0.48       210
           2       0.70      0.56      0.62       210
           3       0.86      0.79      0.83       210
           4       0.45      0.53      0.49       210
           5       0.45      0.46      0.46       210
           6       0.42      0.46      0.44       210
           7       0.69      0.83      0.75       210

    accuracy                           0.58      1470
   macro avg       0.59      0.58      0.58      1470
weighted avg       0.59      0.58      0.58      1470

Accuracy of Random Forest after PCA 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//cv_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],5)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3659863945578231
Confusion Matrix of Logistic Regression is:
 [[ 18   1  99   0  81   0  11]
 [  4   4 157   0  44   0   1]
 [  5   0 200   0   4   0   1]
 [ 22   3  91   0  84   0  10]
 [  8   4  54   0 130   0  14]
 [  7   1  95   0  25   1  81]
 [  1   1  19   0   3   1 185]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.28      0.09      0.13       210
           2       0.29      0.02      0.04       210
           3       0.28      0.95      0.43       210
           4       0.00      0.00      0.00       210
           5       0.35      0.62      0.45       210
           6       0.50      0.00      0.01       210
           7       0.61      0.88      0.72       210

    accuracy                           0.37      1470
   macro avg       0.33      0.37      0.25      1470
weighted avg       0.33      0.37      0.25      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.5095238095238095
Confusion Matrix of KNN Model is:
 [[124  20   4  21  28  13   0]
 [ 26 123  11  14  18  18   0]
 [ 12  10 164   5   5  13   1]
 [ 53  34  13  52  32  22   4]
 [ 60  25   6  28  71  13   7]
 [ 28  18   5  27  13  72  47]
 [  6   8   1   3  18  31 143]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.40      0.59      0.48       210
           2       0.52      0.59      0.55       210
           3       0.80      0.78      0.79       210
           4       0.35      0.25      0.29       210
           5       0.38      0.34      0.36       210
           6       0.40      0.34      0.37       210
           7       0.71      0.68      0.69       210

    accuracy                           0.51      1470
   macro avg       0.51      0.51      0.50      1470
weighted avg       0.51      0.51      0.50      1470

KNN with 4 Neighbors
Accuracy of KNN Model afte

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.15850340136054422
Confusion Matrix of SVM is:
 [[  0  39 167   4   0   0   0]
 [  0  13 197   0   0   0   0]
 [  0   0 210   0   0   0   0]
 [  4  34 169   3   0   0   0]
 [  5  62 134   8   1   0   0]
 [  1  20 189   0   0   0   0]
 [  1   8 187   3   0   5   6]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.07      0.06      0.07       210
           3       0.17      1.00      0.29       210
           4       0.17      0.01      0.03       210
           5       1.00      0.00      0.01       210
           6       0.00      0.00      0.00       210
           7       1.00      0.03      0.06       210

    accuracy                           0.16      1470
   macro avg       0.34      0.16      0.06      1470
weighted avg       0.34      0.16      0.06      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 37   5  15  37  98  15   3]
 [  0  33  60  48  49  19   1]
 [  0   6 167   8  11  17   1]
 [  0   9  26  61  84  24   6]
 [  0   8  13  28 144  10   7]
 [  0  10  16  23  20  91  50]
 [  0   0   4   9   7  49 141]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       1.00      0.18      0.30       210
           2       0.46      0.16      0.23       210
           3       0.55      0.80      0.65       210
           4       0.29      0.29      0.29       210
           5       0.35      0.69      0.46       210
           6       0.40      0.43      0.42       210
           7       0.67      0.67      0.67       210

    accuracy                           0.46      1470
   macro avg       0.53      0.46      0.43      1470
weighted avg       0.53      0.46      0.43      1470

Decision Tree with 6 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4891156462585034
Conf

Accuracy of Decision Tree after PCA and ICA is: 0.5306122448979592
Confusion Matrix of Decision Tree is:
 [[ 94  16   4  30  39  23   4]
 [ 16 116  10  25  19  23   1]
 [  8   7 164  12   3  15   1]
 [ 34   9  11  81  37  30   8]
 [ 38  22   6  25  96  15   8]
 [ 21  17   3  25  10  86  48]
 [  1   7   0   4  16  39 143]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.44      0.45      0.45       210
           2       0.60      0.55      0.57       210
           3       0.83      0.78      0.80       210
           4       0.40      0.39      0.39       210
           5       0.44      0.46      0.45       210
           6       0.37      0.41      0.39       210
           7       0.67      0.68      0.68       210

    accuracy                           0.53      1470
   macro avg       0.54      0.53      0.53      1470
weighted avg       0.54      0.53      0.53      1470

Decision Tree with 16 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3843537414965986
Confusion Matrix of Random Forest is:
 [[138   3  26  14  15   0  14]
 [ 56   8 111  19  10   0   6]
 [ 12   9 181   7   0   0   1]
 [ 87   5  52  24  22   0  20]
 [117   1  22  15  21   0  34]
 [ 28   1  73   9   4   0  95]
 [  3   1  12   0   1   0 193]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.31      0.66      0.42       210
           2       0.29      0.04      0.07       210
           3       0.38      0.86      0.53       210
           4       0.27      0.11      0.16       210
           5       0.29      0.10      0.15       210
           6       0.00      0.00      0.00       210
           7       0.53      0.92      0.67       210

    accuracy                           0.38      1470
   macro avg       0.30      0.38      0.29      1470
weighted avg       0.30      0.38      0.29      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.46870748299319726
Confusion Matrix of Random Forest is:
 [[ 99  16   2  17  55  11  10]
 [ 37  73  43  20  16  15   6]
 [ 10  39 139   6   2  13   1]
 [ 47  26  15  33  56  18  15]
 [ 47  15   9  10  96   5  28]
 [ 19   7  14  11  12  57  90]
 [  3   1   1   0   1  12 192]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.38      0.47      0.42       210
           2       0.41      0.35      0.38       210
           3       0.62      0.66      0.64       210
           4       0.34      0.16      0.21       210
           5       0.40      0.46      0.43       210
           6       0.44      0.27      0.33       210
           7       0.56      0.91      0.70       210

    accuracy                           0.47      1470
   macro avg       0.45      0.47      0.44      1470
weighted avg       0.45      0.47      0.44      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5877551020408164
Confusion Matrix of Random Forest is:
 [[ 90   8   1  28  61  20   2]
 [ 11 124   6  24  27  18   0]
 [  4   6 160  15   8  16   1]
 [ 17   9  13  92  45  25   9]
 [ 22   7   5  30 123  13  10]
 [  9  10   1  27   6  98  59]
 [  0   0   0   6   0  27 177]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.43      0.50       210
           2       0.76      0.59      0.66       210
           3       0.86      0.76      0.81       210
           4       0.41      0.44      0.43       210
           5       0.46      0.59      0.51       210
           6       0.45      0.47      0.46       210
           7       0.69      0.84      0.76       210

    accuracy                           0.59      1470
   macro avg       0.60      0.59      0.59      1470
weighted avg       0.60      0.59      0.59      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.572108843537415
Confusion Matrix of Random Forest is:
 [[ 89  10   0  28  59  18   6]
 [ 14 120   8  27  20  20   1]
 [  6   2 166  12   6  17   1]
 [ 25   9  13  81  44  28  10]
 [ 22  14   5  32 116  13   8]
 [ 14  10   1  24   7  93  61]
 [  0   1   0   4   1  28 176]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.52      0.42      0.47       210
           2       0.72      0.57      0.64       210
           3       0.86      0.79      0.82       210
           4       0.39      0.39      0.39       210
           5       0.46      0.55      0.50       210
           6       0.43      0.44      0.44       210
           7       0.67      0.84      0.74       210

    accuracy                           0.57      1470
   macro avg       0.58      0.57      0.57      1470
weighted avg       0.58      0.57      0.57      1470

Random Forest with 20 max_depth
Accu

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tf_500_vectors.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.29183673469387755
Confusion Matrix of Logistic Regression is:
 [[ 63   0  38   3  89   3  14]
 [113   0  20   0  67   7   3]
 [148   1  39   2  18   1   1]
 [ 29   0  46   3 119   2  11]
 [ 45   1  20   1 117   6  20]
 [ 13   2  57   1  29   9  99]
 [  0   0   6   0   2   4 198]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.15      0.30      0.20       210
           2       0.00      0.00      0.00       210
           3       0.17      0.19      0.18       210
           4       0.30      0.01      0.03       210
           5       0.27      0.56      0.36       210
           6       0.28      0.04      0.07       210
           7       0.57      0.94      0.71       210

    accuracy                           0.29      1470
   macro avg       0.25      0.29      0.22      1470
weighted avg       0.25      0.29      0.22      1470

KNN with 3 Neigh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.21156462585034014
Confusion Matrix of SVM is:
 [[  0   0 165   0  45   0   0]
 [  0   0 187   0  23   0   0]
 [  0   0 208   0   2   0   0]
 [  1   0 163   0  46   0   0]
 [  0   1 132   0  76   1   0]
 [  0   1 178   1  26   4   0]
 [  0   2 157   2  13  13  23]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.00      0.00      0.00       210
           2       0.00      0.00      0.00       210
           3       0.17      0.99      0.30       210
           4       0.00      0.00      0.00       210
           5       0.33      0.36      0.34       210
           6       0.22      0.02      0.04       210
           7       1.00      0.11      0.20       210

    accuracy                           0.21      1470
   macro avg       0.25      0.21      0.12      1470
weighted avg       0.25      0.21      0.12      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.5414965986394558
Confusion Matrix of Decision Tree is:
 [[ 78  11   3  22  47  45   4]
 [ 20  99  17  11  36  27   0]
 [  3   0 164  10   4  29   0]
 [ 28   9  18  92  29  30   4]
 [ 22  18   7  32  97  26   8]
 [  5   8   3  16  20 109  49]
 [  5   1   0   1   4  42 157]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.48      0.37      0.42       210
           2       0.68      0.47      0.56       210
           3       0.77      0.78      0.78       210
           4       0.50      0.44      0.47       210
           5       0.41      0.46      0.43       210
           6       0.35      0.52      0.42       210
           7       0.71      0.75      0.73       210

    accuracy                           0.54      1470
   macro avg       0.56      0.54      0.54      1470
weighted avg       0.56      0.54      0.54      1470

Decision Tree with 8 max_depth
Accu

Confusion Matrix of Decision Tree is:
 [[ 89  21   3  28  39  21   9]
 [ 14 125   9  15  27  19   1]
 [  5   8 167  12   3  15   0]
 [ 36  22  13  82  28  21   8]
 [ 36  25   8  28  87  19   7]
 [ 20  16   6  22  17  90  39]
 [ 10   4   0   2  15  36 143]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.42      0.42      0.42       210
           2       0.57      0.60      0.58       210
           3       0.81      0.80      0.80       210
           4       0.43      0.39      0.41       210
           5       0.40      0.41      0.41       210
           6       0.41      0.43      0.42       210
           7       0.69      0.68      0.69       210

    accuracy                           0.53      1470
   macro avg       0.53      0.53      0.53      1470
weighted avg       0.53      0.53      0.53      1470

Decision Tree with 18 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5374149659863946
Con

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3891156462585034
Confusion Matrix of Random Forest is:
 [[ 61   6  52   8  51  16  16]
 [ 84   2  62   7  42   8   5]
 [ 14   0 186   0   7   2   1]
 [ 15   1  57  41  66  20  10]
 [ 55   3  27  14  70  16  25]
 [  8   0  67   7  17  39  72]
 [  0   0   9   0   1  27 173]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.26      0.29      0.27       210
           2       0.17      0.01      0.02       210
           3       0.40      0.89      0.56       210
           4       0.53      0.20      0.29       210
           5       0.28      0.33      0.30       210
           6       0.30      0.19      0.23       210
           7       0.57      0.82      0.68       210

    accuracy                           0.39      1470
   macro avg       0.36      0.39      0.33      1470
weighted avg       0.36      0.39      0.33      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.572108843537415
Confusion Matrix of Random Forest is:
 [[ 82  14   1  27  61  21   4]
 [ 12 114   8  15  44  16   1]
 [  5   6 165  12   4  17   1]
 [ 21  11  11 103  32  28   4]
 [ 27  12   4  35 103  20   9]
 [ 12   6   1  18  17 102  54]
 [  0   0   0   0   1  37 172]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.52      0.39      0.44       210
           2       0.70      0.54      0.61       210
           3       0.87      0.79      0.82       210
           4       0.49      0.49      0.49       210
           5       0.39      0.49      0.44       210
           6       0.42      0.49      0.45       210
           7       0.70      0.82      0.76       210

    accuracy                           0.57      1470
   macro avg       0.58      0.57      0.57      1470
weighted avg       0.58      0.57      0.57      1470

Random Forest with 11 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5816326530612245
Confusion Matrix of Random Forest is:
 [[ 85  13   2  28  56  20   6]
 [  8 125   7  19  34  16   1]
 [  5   6 169   8   5  16   1]
 [ 18  14  11 104  28  30   5]
 [ 31  12   4  32 100  20  11]
 [  6  13   1  17  14 102  57]
 [  1   0   0   2   1  36 170]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.55      0.40      0.47       210
           2       0.68      0.60      0.64       210
           3       0.87      0.80      0.84       210
           4       0.50      0.50      0.50       210
           5       0.42      0.48      0.45       210
           6       0.42      0.49      0.45       210
           7       0.68      0.81      0.74       210

    accuracy                           0.58      1470
   macro avg       0.59      0.58      0.58      1470
weighted avg       0.59      0.58      0.58      1470

Random Forest with 19 max_depth
Acc

### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.454421768707483
Confusion Matrix of Logistic Regression is:
 [[ 95   0  30   0  54   8  23]
 [  8  45  90   0  47  10  10]
 [ 17   1 183   0   2   7   0]
 [ 35   6  39   0  69  29  32]
 [ 39   6  25   0 121   2  17]
 [ 30   0  26   0  10  50  94]
 [  7   0   0   0   2  27 174]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.41      0.45      0.43       210
           2       0.78      0.21      0.34       210
           3       0.47      0.87      0.61       210
           4       0.00      0.00      0.00       210
           5       0.40      0.58      0.47       210
           6       0.38      0.24      0.29       210
           7       0.50      0.83      0.62       210

    accuracy                           0.45      1470
   macro avg       0.42      0.45      0.39      1470
weighted avg       0.42      0.45      0.39      1470

KNN with 3 Neighbo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.49455782312925173
Confusion Matrix of KNN Model is:
 [[141   7   4  13  26   9  10]
 [ 13 141  10  20  12  13   1]
 [ 15  13 160  12   5   5   0]
 [ 40  32  14  61  28  19  16]
 [ 58  41  10  36  57   5   3]
 [ 35  19   4  20  13  57  62]
 [ 14  13   2  19   4  48 110]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.45      0.67      0.54       210
           2       0.53      0.67      0.59       210
           3       0.78      0.76      0.77       210
           4       0.34      0.29      0.31       210
           5       0.39      0.27      0.32       210
           6       0.37      0.27      0.31       210
           7       0.54      0.52      0.53       210

    accuracy                           0.49      1470
   macro avg       0.49      0.49      0.48      1470
weighted avg       0.49      0.49      0.48      1470

KNN with 4 Neighbors
Accuracy of KNN Model aft

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report of Bernoulli Naive Bayes is:
               precision    recall  f1-score   support

           1       0.48      0.41      0.44       210
           2       0.59      0.38      0.46       210
           3       0.51      0.67      0.58       210
           4       0.00      0.00      0.00       210
           5       0.45      0.52      0.48       210
           6       0.33      0.55      0.41       210
           7       0.47      0.65      0.55       210

    accuracy                           0.45      1470
   macro avg       0.41      0.45      0.42      1470
weighted avg       0.41      0.45      0.42      1470

Working on SVM Kernal: linear
Accuracy of SVM after PCA and ICA is: 0.4163265306122449
Confusion Matrix of SVM is:
 [[113   0  41   0  40  10   6]
 [ 15  54  90   0  41   8   2]
 [ 10   2 194   0   0   4   0]
 [ 71   9  45   0  58  13  14]
 [ 65  10  33   0  93   2   7]
 [ 76   5  31   0  10  40  48]
 [ 53   1   0   0   2  36 118]]
Classification Re

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.48639455782312924
Confusion Matrix of SVM is:
 [[119   0   1  25  37  20   8]
 [  2  89  17  65  13  23   1]
 [  9  20 138  21   7  15   0]
 [ 13  16  22  65  41  36  17]
 [ 31  18   5  55  92   4   5]
 [  6   9   8  38   8  77  64]
 [  2   1   0  11   2  59 135]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.65      0.57      0.61       210
           2       0.58      0.42      0.49       210
           3       0.72      0.66      0.69       210
           4       0.23      0.31      0.27       210
           5       0.46      0.44      0.45       210
           6       0.33      0.37      0.35       210
           7       0.59      0.64      0.61       210

    accuracy                           0.49      1470
   macro avg       0.51      0.49      0.49      1470
weighted avg       0.51      0.49      0.49      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[110   5   1   9  58  17  10]
 [  1 132   8  29  20  16   4]
 [  6   9 151  27   1  16   0]
 [ 17  15  11  66  48  27  26]
 [ 17  31   5  36 109   8   4]
 [ 13  15   5  16  15  73  73]
 [  2   4   0   3   3  58 140]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.66      0.52      0.59       210
           2       0.63      0.63      0.63       210
           3       0.83      0.72      0.77       210
           4       0.35      0.31      0.33       210
           5       0.43      0.52      0.47       210
           6       0.34      0.35      0.34       210
           7       0.54      0.67      0.60       210

    accuracy                           0.53      1470
   macro avg       0.54      0.53      0.53      1470
weighted avg       0.54      0.53      0.53      1470

Decision Tree with 8 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5258503401360545
Conf

Confusion Matrix of Decision Tree is:
 [[105   9   2  25  42  19   8]
 [ 15 122   6  22  26  15   4]
 [  5   6 163  14  11  11   0]
 [ 20  21  13  71  35  35  15]
 [ 41  24   4  44  73  20   4]
 [ 15  20  10  34  17  65  49]
 [ 10   7   1  20   9  55 108]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.50      0.50       210
           2       0.58      0.58      0.58       210
           3       0.82      0.78      0.80       210
           4       0.31      0.34      0.32       210
           5       0.34      0.35      0.35       210
           6       0.30      0.31      0.30       210
           7       0.57      0.51      0.54       210

    accuracy                           0.48      1470
   macro avg       0.49      0.48      0.48      1470
weighted avg       0.49      0.48      0.48      1470

Decision Tree with 19 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.49115646258503404
Co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.49523809523809526
Confusion Matrix of Random Forest is:
 [[100   2   3   0  78   9  18]
 [ 13  91  28   0  56   9  13]
 [ 20   8 160   0  14   7   1]
 [ 22  25  26   0  83  27  27]
 [ 20  17   3   0 160   1   9]
 [ 14   7  17   0  31  44  97]
 [  1   2   3   0   6  25 173]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.48      0.50       210
           2       0.60      0.43      0.50       210
           3       0.67      0.76      0.71       210
           4       0.00      0.00      0.00       210
           5       0.37      0.76      0.50       210
           6       0.36      0.21      0.27       210
           7       0.51      0.82      0.63       210

    accuracy                           0.50      1470
   macro avg       0.43      0.50      0.44      1470
weighted avg       0.43      0.50      0.44      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.5047619047619047
Confusion Matrix of Random Forest is:
 [[117   4   2   0  59  11  17]
 [  6 115  26   0  41  12  10]
 [ 11  22 150   0  19   7   1]
 [ 24  32  22   0  78  27  27]
 [ 33  25   3   0 139   1   9]
 [ 17   8  17   0  27  51  90]
 [  1   2   1   0   6  30 170]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.56      0.56      0.56       210
           2       0.55      0.55      0.55       210
           3       0.68      0.71      0.70       210
           4       0.00      0.00      0.00       210
           5       0.38      0.66      0.48       210
           6       0.37      0.24      0.29       210
           7       0.52      0.81      0.64       210

    accuracy                           0.50      1470
   macro avg       0.44      0.50      0.46      1470
weighted avg       0.44      0.50      0.46      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.527891156462585
Confusion Matrix of Random Forest is:
 [[119   4   1   0  57  15  14]
 [  4 134  12   0  36  18   6]
 [  7  12 160   0  19  12   0]
 [ 21  33  23   1  77  31  24]
 [ 40  23   5   0 132   1   9]
 [ 18  11  12   0  24  61  84]
 [  2   2   1   0   5  31 169]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.56      0.57      0.57       210
           2       0.61      0.64      0.62       210
           3       0.75      0.76      0.75       210
           4       1.00      0.00      0.01       210
           5       0.38      0.63      0.47       210
           6       0.36      0.29      0.32       210
           7       0.55      0.80      0.66       210

    accuracy                           0.53      1470
   macro avg       0.60      0.53      0.49      1470
weighted avg       0.60      0.53      0.49      1470

Random Forest with 5 max_depth
Accur

Accuracy of Random Forest after PCA and ICA is: 0.5761904761904761
Confusion Matrix of Random Forest is:
 [[125   4   2  14  41  15   9]
 [  7 140   8  19  17  13   6]
 [  5  11 172  11   4   7   0]
 [ 19  14   9  62  45  38  23]
 [ 38  24   4  24 109   4   7]
 [ 14   9   2  14  13  87  71]
 [  4   2   0   8   3  41 152]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.60      0.59       210
           2       0.69      0.67      0.68       210
           3       0.87      0.82      0.85       210
           4       0.41      0.30      0.34       210
           5       0.47      0.52      0.49       210
           6       0.42      0.41      0.42       210
           7       0.57      0.72      0.64       210

    accuracy                           0.58      1470
   macro avg       0.57      0.58      0.57      1470
weighted avg       0.57      0.58      0.57      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5544217687074829
Confusion Matrix of Random Forest is:
 [[124   4   1  15  37  18  11]
 [  8 136   8  24  17  14   3]
 [  5  11 170   8   5  11   0]
 [ 18  12   8  71  41  38  22]
 [ 39  26   3  44  84   7   7]
 [ 15  11   5  15  15  83  66]
 [  5   3   0   7   6  42 147]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.59      0.58       210
           2       0.67      0.65      0.66       210
           3       0.87      0.81      0.84       210
           4       0.39      0.34      0.36       210
           5       0.41      0.40      0.40       210
           6       0.39      0.40      0.39       210
           7       0.57      0.70      0.63       210

    accuracy                           0.55      1470
   macro avg       0.55      0.55      0.55      1470
weighted avg       0.55      0.55      0.55      1470

Accuracy of Random Forest after PCA

In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_gkb.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.2891156462585034
Confusion Matrix of Logistic Regression is:
 [[ 78   0  79   0   1   0  52]
 [ 33   0 122   0   2   0  53]
 [ 19   0 172   0   1   0  18]
 [ 32   0 136   0   5   0  37]
 [ 29   0 146   0   3   0  32]
 [ 33   0  41   0   0   0 136]
 [ 29   0   9   0   0   0 172]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.31      0.37      0.34       210
           2       0.00      0.00      0.00       210
           3       0.24      0.82      0.38       210
           4       0.00      0.00      0.00       210
           5       0.25      0.01      0.03       210
           6       0.00      0.00      0.00       210
           7       0.34      0.82      0.48       210

    accuracy                           0.29      1470
   macro avg       0.16      0.29      0.17      1470
weighted avg       0.16      0.29      0.17      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of KNN Model is:
 [[ 98  22  13  31  27   9  10]
 [ 44  98  12  15  22   8  11]
 [ 11  17 159   8   9   5   1]
 [ 56  44  10  47  36   5  12]
 [ 57  51  12  24  47   6  13]
 [ 48  35   7  23  14  32  51]
 [ 30  32   5  26  12  38  67]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.28      0.47      0.35       210
           2       0.33      0.47      0.39       210
           3       0.73      0.76      0.74       210
           4       0.27      0.22      0.24       210
           5       0.28      0.22      0.25       210
           6       0.31      0.15      0.20       210
           7       0.41      0.32      0.36       210

    accuracy                           0.37      1470
   macro avg       0.37      0.37      0.36      1470
weighted avg       0.37      0.37      0.36      1470

KNN with 4 Neighbors
Accuracy of KNN Model after PCA and ICA is: 0.3761904761904762
Confusion Matrix of KNN Mo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.2782312925170068
Confusion Matrix of SVM is:
 [[ 54   0  77   0  13  13  53]
 [ 23   0 123   0  10   2  52]
 [ 14   0 172   0   6   2  16]
 [ 23   0 133   0  17   3  34]
 [ 22   0 145   0  11   0  32]
 [ 27   0  38   0   8   6 131]
 [ 26   0   9   0   2   7 166]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.29      0.26      0.27       210
           2       0.00      0.00      0.00       210
           3       0.25      0.82      0.38       210
           4       0.00      0.00      0.00       210
           5       0.16      0.05      0.08       210
           6       0.18      0.03      0.05       210
           7       0.34      0.79      0.48       210

    accuracy                           0.28      1470
   macro avg       0.17      0.28      0.18      1470
weighted avg       0.17      0.28      0.18      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3006802721088435
Confusion Matrix of SVM is:
 [[ 78   0  86   8   1  17  20]
 [ 22   0 131   5   1   7  44]
 [  9   0 176   2   0  13  10]
 [ 23   0 147   6   0   7  27]
 [ 23   0 153   2   0  10  22]
 [ 40   0  45   2   0  50  73]
 [ 28   0  11   5   0  34 132]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.35      0.37      0.36       210
           2       0.00      0.00      0.00       210
           3       0.23      0.84      0.37       210
           4       0.20      0.03      0.05       210
           5       0.00      0.00      0.00       210
           6       0.36      0.24      0.29       210
           7       0.40      0.63      0.49       210

    accuracy                           0.30      1470
   macro avg       0.22      0.30      0.22      1470
weighted avg       0.22      0.30      0.22      1470

Working on SVM Kernal: rbf


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3326530612244898
Confusion Matrix of SVM is:
 [[ 90   0  58  17   6  18  21]
 [ 34   0 117   6   2   8  43]
 [ 13   0 175   3   4  13   2]
 [ 32   0  94  30  20   7  27]
 [ 28   0 122  15  13  10  22]
 [ 42   0  27  11   4  53  73]
 [ 31   0  11   1   2  37 128]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.33      0.43      0.38       210
           2       0.00      0.00      0.00       210
           3       0.29      0.83      0.43       210
           4       0.36      0.14      0.20       210
           5       0.25      0.06      0.10       210
           6       0.36      0.25      0.30       210
           7       0.41      0.61      0.49       210

    accuracy                           0.33      1470
   macro avg       0.29      0.33      0.27      1470
weighted avg       0.29      0.33      0.27      1470

Working on SVM Kernal: sigmoid


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.23605442176870747
Confusion Matrix of SVM is:
 [[ 69  22  53  20   0   2  44]
 [ 50   5 101   4   0  11  39]
 [ 38   2 155   2   0   0  13]
 [ 81   7  87   4   0   9  22]
 [ 62   8 109   0   0   9  22]
 [ 51  21  22   9   0  26  81]
 [ 41  11   6   5   0  59  88]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.18      0.33      0.23       210
           2       0.07      0.02      0.03       210
           3       0.29      0.74      0.42       210
           4       0.09      0.02      0.03       210
           5       0.00      0.00      0.00       210
           6       0.22      0.12      0.16       210
           7       0.28      0.42      0.34       210

    accuracy                           0.24      1470
   macro avg       0.16      0.24      0.17      1470
weighted avg       0.16      0.24      0.17      1470

Decision Tree with 1 max_depth
Accuracy of Decision Tree after P

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 87  19   5  18  38  31  12]
 [ 32  65   8   9  53  22  21]
 [  7  21 138   5  32   5   2]
 [ 27  21   4  51  64  15  28]
 [ 30  32   7  26  83  12  20]
 [ 34  10   3  14  19  57  73]
 [ 20   9   2   9   4  40 126]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.37      0.41      0.39       210
           2       0.37      0.31      0.34       210
           3       0.83      0.66      0.73       210
           4       0.39      0.24      0.30       210
           5       0.28      0.40      0.33       210
           6       0.31      0.27      0.29       210
           7       0.45      0.60      0.51       210

    accuracy                           0.41      1470
   macro avg       0.43      0.41      0.41      1470
weighted avg       0.43      0.41      0.41      1470

Decision Tree with 9 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4061224489795918
Conf

Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.39      0.39      0.39       210
           2       0.40      0.44      0.42       210
           3       0.76      0.75      0.75       210
           4       0.24      0.21      0.23       210
           5       0.24      0.26      0.25       210
           6       0.25      0.21      0.23       210
           7       0.39      0.42      0.41       210

    accuracy                           0.38      1470
   macro avg       0.38      0.38      0.38      1470
weighted avg       0.38      0.38      0.38      1470

Decision Tree with 20 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.38571428571428573
Confusion Matrix of Decision Tree is:
 [[ 81  19   9  33  26  21  21]
 [ 20  92  11  32  26  20   9]
 [  7  11 160  10  13   2   7]
 [ 36  22  13  44  51  23  21]
 [ 38  33  10  36  51  20  22]
 [ 24  18   7  23  22  49  67]
 [ 19  17   3  20  15  46  90]]
Cl

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3380952380952381
Confusion Matrix of Random Forest is:
 [[105   0  21  16  40   0  28]
 [ 36   0  56   6  62   0  50]
 [ 22   0 125   3  46   0  14]
 [ 51   0  34  30  65   0  30]
 [ 36   0  51  13  83   0  27]
 [ 54   0  12  13  15   0 116]
 [ 47   0   2   2   5   0 154]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.30      0.50      0.37       210
           2       0.00      0.00      0.00       210
           3       0.42      0.60      0.49       210
           4       0.36      0.14      0.20       210
           5       0.26      0.40      0.32       210
           6       0.00      0.00      0.00       210
           7       0.37      0.73      0.49       210

    accuracy                           0.34      1470
   macro avg       0.24      0.34      0.27      1470
weighted avg       0.24      0.34      0.27      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.35374149659863946
Confusion Matrix of Random Forest is:
 [[103   0  15  22  35  18  17]
 [ 37   0  52   8  62  10  41]
 [ 24   0 124   6  42   0  14]
 [ 46   0  25  41  65   6  27]
 [ 34   0  36  27  83   7  23]
 [ 50   0  10  16  11  22 101]
 [ 42   0   2   3   4  12 147]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.31      0.49      0.38       210
           2       0.00      0.00      0.00       210
           3       0.47      0.59      0.52       210
           4       0.33      0.20      0.25       210
           5       0.27      0.40      0.32       210
           6       0.29      0.10      0.15       210
           7       0.40      0.70      0.51       210

    accuracy                           0.35      1470
   macro avg       0.30      0.35      0.30      1470
weighted avg       0.30      0.35      0.30      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.37006802721088433
Confusion Matrix of Random Forest is:
 [[102   2  15  25  31  20  15]
 [ 37  15  51  11  44  12  40]
 [ 24  13 133   8  27   2   3]
 [ 43   2  25  53  54   6  27]
 [ 33   8  36  32  71   9  21]
 [ 49   1  10  18   9  38  85]
 [ 40   0   2   3   4  29 132]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.31      0.49      0.38       210
           2       0.37      0.07      0.12       210
           3       0.49      0.63      0.55       210
           4       0.35      0.25      0.29       210
           5       0.30      0.34      0.32       210
           6       0.33      0.18      0.23       210
           7       0.41      0.63      0.50       210

    accuracy                           0.37      1470
   macro avg       0.36      0.37      0.34      1470
weighted avg       0.36      0.37      0.34      1470

Random Forest with 5 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4421768707482993
Confusion Matrix of Random Forest is:
 [[ 84  17   7  30  38  11  23]
 [ 26  90   7  15  43  12  17]
 [  6  10 158  12  18   3   3]
 [ 27   6   8  59  67  11  32]
 [ 28  22   7  42  80   9  22]
 [ 23  10   4  25  12  64  72]
 [ 21   8   1   7   8  50 115]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.39      0.40      0.40       210
           2       0.55      0.43      0.48       210
           3       0.82      0.75      0.79       210
           4       0.31      0.28      0.30       210
           5       0.30      0.38      0.34       210
           6       0.40      0.30      0.35       210
           7       0.40      0.55      0.47       210

    accuracy                           0.44      1470
   macro avg       0.45      0.44      0.44      1470
weighted avg       0.45      0.44      0.44      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.41904761904761906
Confusion Matrix of Random Forest is:
 [[ 75  16  11  36  34  18  20]
 [ 25  92  10  22  33  15  13]
 [  7   9 163  12  13   3   3]
 [ 29  15  11  64  51  15  25]
 [ 33  36   6  38  57  22  18]
 [ 22  20   6  22  13  66  61]
 [ 16  10   3  17  12  53  99]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.36      0.36      0.36       210
           2       0.46      0.44      0.45       210
           3       0.78      0.78      0.78       210
           4       0.30      0.30      0.30       210
           5       0.27      0.27      0.27       210
           6       0.34      0.31      0.33       210
           7       0.41      0.47      0.44       210

    accuracy                           0.42      1470
   macro avg       0.42      0.42      0.42      1470
weighted avg       0.42      0.42      0.42      1470

Accuracy of Random Forest after PC

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_ndisbert.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],2)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3258503401360544
Confusion Matrix of Logistic Regression is:
 [[ 93  35  44   0   0   0  38]
 [ 17 110  31   3   0   0  49]
 [ 62  47  87   0   0   0  14]
 [ 84  38  48   0   0   0  40]
 [ 60  53  62   0   0   0  35]
 [ 18  42   6   0   0   0 144]
 [  0  21   0   0   0   0 189]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.28      0.44      0.34       210
           2       0.32      0.52      0.40       210
           3       0.31      0.41      0.36       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.00      0.00      0.00       210
           7       0.37      0.90      0.53       210

    accuracy                           0.33      1470
   macro avg       0.18      0.33      0.23      1470
weighted avg       0.18      0.33      0.23      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of KNN Model is:
 [[110  19  15  27  24  10   5]
 [ 26 115   9  19  22  12   7]
 [ 30  10 145  14   9   2   0]
 [ 73  37  12  48  26   7   7]
 [ 64  42  15  37  41   5   6]
 [ 32  33   9  19  10  42  65]
 [ 11  33   4   5   7  35 115]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.32      0.52      0.40       210
           2       0.40      0.55      0.46       210
           3       0.69      0.69      0.69       210
           4       0.28      0.23      0.25       210
           5       0.29      0.20      0.23       210
           6       0.37      0.20      0.26       210
           7       0.56      0.55      0.55       210

    accuracy                           0.42      1470
   macro avg       0.42      0.42      0.41      1470
weighted avg       0.42      0.42      0.41      1470

KNN with 4 Neighbors
Accuracy of KNN Model after PCA and ICA is: 0.4204081632653061
Confusion Matrix of KNN Mo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.35034013605442177
Confusion Matrix of SVM is:
 [[ 91  38  56   0   0   1  24]
 [ 25 120  36   0   0   0  29]
 [ 49  20 132   0   0   1   8]
 [ 85  42  54   0   0   1  28]
 [ 63  62  70   1   0   1  13]
 [ 20  57   7   0   0   2 124]
 [  0  40   0   0   0   0 170]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.27      0.43      0.34       210
           2       0.32      0.57      0.41       210
           3       0.37      0.63      0.47       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.33      0.01      0.02       210
           7       0.43      0.81      0.56       210

    accuracy                           0.35      1470
   macro avg       0.25      0.35      0.26      1470
weighted avg       0.25      0.35      0.26      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3843537414965986
Confusion Matrix of SVM is:
 [[ 62  29  35  60   7  11   6]
 [ 17  93  31  36   6  16  11]
 [ 21  16 112  33  23   3   2]
 [ 45  33  35  65  13  13   6]
 [ 31  40  68  47  13   7   4]
 [ 14  36   0  28   0  51  81]
 [  3  12   0   0   0  26 169]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.32      0.30      0.31       210
           2       0.36      0.44      0.40       210
           3       0.40      0.53      0.46       210
           4       0.24      0.31      0.27       210
           5       0.21      0.06      0.10       210
           6       0.40      0.24      0.30       210
           7       0.61      0.80      0.69       210

    accuracy                           0.38      1470
   macro avg       0.36      0.38      0.36      1470
weighted avg       0.36      0.38      0.36      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 82  41   9  30  30  12   6]
 [  9 104   6  16  44  16  15]
 [ 22  21 122  16  27   2   0]
 [ 56  39   8  44  42  10  11]
 [ 30  61  14  21  73   3   8]
 [ 24  27   6  10   4  40  99]
 [  1   8   4   2   2  14 179]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.37      0.39      0.38       210
           2       0.35      0.50      0.41       210
           3       0.72      0.58      0.64       210
           4       0.32      0.21      0.25       210
           5       0.33      0.35      0.34       210
           6       0.41      0.19      0.26       210
           7       0.56      0.85      0.68       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.42      1470
weighted avg       0.44      0.44      0.42      1470

Decision Tree with 8 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.42517006802721086
Con

Confusion Matrix of Decision Tree is:
 [[ 82  23  13  38  36  13   5]
 [  9  93  11  28  33  24  12]
 [ 15   8 154  14  14   3   2]
 [ 51  23  21  53  41  15   6]
 [ 19  33  24  48  62  16   8]
 [ 16  28   6  23  12  66  59]
 [  3  18   5   4   4  69 107]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.42      0.39      0.40       210
           2       0.41      0.44      0.43       210
           3       0.66      0.73      0.69       210
           4       0.25      0.25      0.25       210
           5       0.31      0.30      0.30       210
           6       0.32      0.31      0.32       210
           7       0.54      0.51      0.52       210

    accuracy                           0.42      1470
   macro avg       0.42      0.42      0.42      1470
weighted avg       0.42      0.42      0.42      1470

Decision Tree with 19 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.42585034013605444
Co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.391156462585034
Confusion Matrix of Random Forest is:
 [[ 85  40  67   0   0   8  10]
 [ 13 125  41   0   0  14  17]
 [ 30  17 158   0   0   3   2]
 [ 82  43  67   0   0   7  11]
 [ 55  60  81   0   0   9   5]
 [ 35  36   9   0   0  24 106]
 [  1   9   0   0   0  17 183]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.28      0.40      0.33       210
           2       0.38      0.60      0.46       210
           3       0.37      0.75      0.50       210
           4       0.00      0.00      0.00       210
           5       0.00      0.00      0.00       210
           6       0.29      0.11      0.16       210
           7       0.55      0.87      0.67       210

    accuracy                           0.39      1470
   macro avg       0.27      0.39      0.30      1470
weighted avg       0.27      0.39      0.30      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3952380952380952
Confusion Matrix of Random Forest is:
 [[ 83  41  60   0   5  12   9]
 [ 12 132  36   0   3  10  17]
 [ 30  23 149   0   3   3   2]
 [ 80  47  59   1   3   8  12]
 [ 53  62  74   0   6  10   5]
 [ 31  44   6   3   2  22 102]
 [  1  14   0   0   0   7 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.29      0.40      0.33       210
           2       0.36      0.63      0.46       210
           3       0.39      0.71      0.50       210
           4       0.25      0.00      0.01       210
           5       0.27      0.03      0.05       210
           6       0.31      0.10      0.16       210
           7       0.56      0.90      0.69       210

    accuracy                           0.40      1470
   macro avg       0.35      0.40      0.31      1470
weighted avg       0.35      0.40      0.31      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.4598639455782313
Confusion Matrix of Random Forest is:
 [[ 88  19   8  37  34  19   5]
 [  9  95   5  31  38  17  15]
 [ 17   4 140  29  17   3   0]
 [ 48  15  11  69  45  13   9]
 [ 33  26  11  48  69  16   7]
 [ 12  22   3  20  14  55  84]
 [  2   8   0   2   1  37 160]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.42      0.42      0.42       210
           2       0.50      0.45      0.48       210
           3       0.79      0.67      0.72       210
           4       0.29      0.33      0.31       210
           5       0.32      0.33      0.32       210
           6       0.34      0.26      0.30       210
           7       0.57      0.76      0.65       210

    accuracy                           0.46      1470
   macro avg       0.46      0.46      0.46      1470
weighted avg       0.46      0.46      0.46      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4435374149659864
Confusion Matrix of Random Forest is:
 [[ 76  16  11  50  34  17   6]
 [ 12  96   7  23  37  23  12]
 [ 12   2 154  19  18   5   0]
 [ 48  21  18  61  40  12  10]
 [ 30  36  15  44  61  14  10]
 [ 13  25   3  17  13  65  74]
 [  2  12   1   3   5  48 139]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.39      0.36      0.38       210
           2       0.46      0.46      0.46       210
           3       0.74      0.73      0.74       210
           4       0.28      0.29      0.29       210
           5       0.29      0.29      0.29       210
           6       0.35      0.31      0.33       210
           7       0.55      0.66      0.60       210

    accuracy                           0.44      1470
   macro avg       0.44      0.44      0.44      1470
weighted avg       0.44      0.44      0.44      1470

Random Forest with 20 max_depth
Acc

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_vbert.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")

# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4435374149659864
Confusion Matrix of Logistic Regression is:
 [[130   5   9   0  42   1  23]
 [ 17  65  88   0  18   3  19]
 [  7   0 193   0   8   1   1]
 [ 44  16  33   0  61   2  54]
 [ 53   9  22   0  81   0  45]
 [ 18  20  27   0  15   2 128]
 [  2  19   1   0   7   0 181]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.48      0.62      0.54       210
           2       0.49      0.31      0.38       210
           3       0.52      0.92      0.66       210
           4       0.00      0.00      0.00       210
           5       0.35      0.39      0.37       210
           6       0.22      0.01      0.02       210
           7       0.40      0.86      0.55       210

    accuracy                           0.44      1470
   macro avg       0.35      0.44      0.36      1470
weighted avg       0.35      0.44      0.36      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of KNN Model after PCA and ICA is: 0.5292517006802722
Confusion Matrix of KNN Model is:
 [[134  14   3  31  22   5   1]
 [  7 149  10  23   9   6   6]
 [  3  15 174  10   4   4   0]
 [ 34  27  19  64  34  26   6]
 [ 61  17  14  36  69  10   3]
 [ 22  33  13  35   5  47  55]
 [  8  10   0  15  10  26 141]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.50      0.64      0.56       210
           2       0.56      0.71      0.63       210
           3       0.75      0.83      0.79       210
           4       0.30      0.30      0.30       210
           5       0.45      0.33      0.38       210
           6       0.38      0.22      0.28       210
           7       0.67      0.67      0.67       210

    accuracy                           0.53      1470
   macro avg       0.51      0.53      0.51      1470
weighted avg       0.51      0.53      0.51      1470

KNN with 4 Neighbors
Accuracy of KNN Model afte

Accuracy of SVM after PCA and ICA is: 0.4197278911564626
Confusion Matrix of SVM is:
 [[ 84  21  10   0  12  35  48]
 [  3 112  64   0   2  10  19]
 [  1   7 190   0   0  10   2]
 [  5  65  34   0  11  26  69]
 [ 24  62  17   0  30  13  64]
 [  2  46  17   0   0  19 126]
 [  0  27   0   0   0   1 182]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.71      0.40      0.51       210
           2       0.33      0.53      0.41       210
           3       0.57      0.90      0.70       210
           4       0.00      0.00      0.00       210
           5       0.55      0.14      0.23       210
           6       0.17      0.09      0.12       210
           7       0.36      0.87      0.51       210

    accuracy                           0.42      1470
   macro avg       0.38      0.42      0.35      1470
weighted avg       0.38      0.42      0.35      1470

Working on SVM Kernal: poly


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.5210884353741496
Confusion Matrix of SVM is:
 [[ 91   3   1  49  52  12   2]
 [  9 119   9  41  14  13   5]
 [  1   3 158  34   9   5   0]
 [ 17   7  10  98  49  24   5]
 [ 17   4   2  79  95   9   4]
 [ 10  12  12  48  12  62  54]
 [  0   5   0  10   8  44 143]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.63      0.43      0.51       210
           2       0.78      0.57      0.66       210
           3       0.82      0.75      0.79       210
           4       0.27      0.47      0.34       210
           5       0.40      0.45      0.42       210
           6       0.37      0.30      0.33       210
           7       0.67      0.68      0.68       210

    accuracy                           0.52      1470
   macro avg       0.56      0.52      0.53      1470
weighted avg       0.56      0.52      0.53      1470

Working on SVM Kernal: rbf
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[126   4   4  22  41  11   2]
 [  7 121  11  38  16  13   4]
 [  3   6 167  25   6   3   0]
 [ 29  14   7  78  49  24   9]
 [ 48   9   6  42  83  12  10]
 [ 11  12  10  37  20  61  59]
 [  7   9   1  10  11  23 149]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.55      0.60      0.57       210
           2       0.69      0.58      0.63       210
           3       0.81      0.80      0.80       210
           4       0.31      0.37      0.34       210
           5       0.37      0.40      0.38       210
           6       0.41      0.29      0.34       210
           7       0.64      0.71      0.67       210

    accuracy                           0.53      1470
   macro avg       0.54      0.53      0.53      1470
weighted avg       0.54      0.53      0.53      1470

Decision Tree with 9 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5231292517006803
Conf

Confusion Matrix of Decision Tree is:
 [[107   4   6  29  41  18   5]
 [  9 122  12  32   9  17   9]
 [  4  10 171  10   7   8   0]
 [ 30  15  10  57  49  38  11]
 [ 46  16   9  36  75  18  10]
 [ 10  15  10  29  22  56  68]
 [  3  11   1   5  14  46 130]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.51      0.51      0.51       210
           2       0.63      0.58      0.61       210
           3       0.78      0.81      0.80       210
           4       0.29      0.27      0.28       210
           5       0.35      0.36      0.35       210
           6       0.28      0.27      0.27       210
           7       0.56      0.62      0.59       210

    accuracy                           0.49      1470
   macro avg       0.48      0.49      0.49      1470
weighted avg       0.48      0.49      0.49      1470

Decision Tree with 19 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.49387755102040815
Co

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4421768707482993
Confusion Matrix of Random Forest is:
 [[134   5   7   0  34   0  30]
 [  9  95  57   0  26   0  23]
 [  4   3 162   1  36   0   4]
 [ 34  27  24   0  62   1  62]
 [ 51  14   4   0  83   0  58]
 [  9  35  20   1  16   0 129]
 [  0  30   1   0   3   0 176]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.56      0.64      0.59       210
           2       0.45      0.45      0.45       210
           3       0.59      0.77      0.67       210
           4       0.00      0.00      0.00       210
           5       0.32      0.40      0.35       210
           6       0.00      0.00      0.00       210
           7       0.37      0.84      0.51       210

    accuracy                           0.44      1470
   macro avg       0.33      0.44      0.37      1470
weighted avg       0.33      0.44      0.37      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5789115646258504
Confusion Matrix of Random Forest is:
 [[122   6   3  23  44   9   3]
 [  3 145   8  21  15   9   9]
 [  3   6 175  11  11   4   0]
 [ 21  12   9  82  53  21  12]
 [ 36  11   5  35 107   9   7]
 [ 13  16  11  23  17  57  73]
 [  5   6   0   6   5  25 163]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.60      0.58      0.59       210
           2       0.72      0.69      0.70       210
           3       0.83      0.83      0.83       210
           4       0.41      0.39      0.40       210
           5       0.42      0.51      0.46       210
           6       0.43      0.27      0.33       210
           7       0.61      0.78      0.68       210

    accuracy                           0.58      1470
   macro avg       0.57      0.58      0.57      1470
weighted avg       0.57      0.58      0.57      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.564625850340136
Confusion Matrix of Random Forest is:
 [[120   5   3  25  42  14   1]
 [  3 143   6  24  13  12   9]
 [  5   7 177  10   9   2   0]
 [ 22  14   8  76  51  28  11]
 [ 41   9   6  40  93  14   7]
 [  9  18  12  24  14  66  67]
 [  4   6   0   7   7  31 155]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.57      0.58       210
           2       0.71      0.68      0.69       210
           3       0.83      0.84      0.84       210
           4       0.37      0.36      0.37       210
           5       0.41      0.44      0.42       210
           6       0.40      0.31      0.35       210
           7       0.62      0.74      0.67       210

    accuracy                           0.56      1470
   macro avg       0.56      0.56      0.56      1470
weighted avg       0.56      0.56      0.56      1470

Random Forest with 19 max_depth
Accu

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//gpt_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4095238095238095
Confusion Matrix of Logistic Regression is:
 [[ 79   7  23  12  38   7  44]
 [ 18  75  60   8  24   2  23]
 [ 17   0 178   3   1   3   8]
 [ 35  27  34  17  27   3  67]
 [ 52  24   8  15  64   1  46]
 [ 18  21  33   7   4   5 122]
 [  2  16   5   1   0   2 184]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.36      0.38      0.37       210
           2       0.44      0.36      0.39       210
           3       0.52      0.85      0.65       210
           4       0.27      0.08      0.12       210
           5       0.41      0.30      0.35       210
           6       0.22      0.02      0.04       210
           7       0.37      0.88      0.52       210

    accuracy                           0.41      1470
   macro avg       0.37      0.41      0.35      1470
weighted avg       0.37      0.41      0.35      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.40272108843537413
Confusion Matrix of SVM is:
 [[ 83   6  20  11   8  36  46]
 [ 14  75  56  16   2  27  20]
 [ 15   0 173   1   0  13   8]
 [ 30  18  30  25   1  30  76]
 [ 60  27   4  19  12  31  57]
 [ 15  15  23   4   0  30 123]
 [  0   7   1   0   0   8 194]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.38      0.40      0.39       210
           2       0.51      0.36      0.42       210
           3       0.56      0.82      0.67       210
           4       0.33      0.12      0.17       210
           5       0.52      0.06      0.10       210
           6       0.17      0.14      0.16       210
           7       0.37      0.92      0.53       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.35      1470
weighted avg       0.41      0.40      0.35      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 70  13   4  21  52  33  17]
 [  5 107   1  36  36  15  10]
 [ 11   9 141  29   1  18   1]
 [ 17  21   9  68  30  31  34]
 [ 23  32   0  39  80  18  18]
 [ 10  27   6  23   7  55  82]
 [  3  22   1   5   2  38 139]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.50      0.33      0.40       210
           2       0.46      0.51      0.49       210
           3       0.87      0.67      0.76       210
           4       0.31      0.32      0.32       210
           5       0.38      0.38      0.38       210
           6       0.26      0.26      0.26       210
           7       0.46      0.66      0.54       210

    accuracy                           0.45      1470
   macro avg       0.47      0.45      0.45      1470
weighted avg       0.47      0.45      0.45      1470

Decision Tree with 7 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.47210884353741495
Con

Confusion Matrix of Decision Tree is:
 [[ 90   9   2  34  36  20  19]
 [ 12 111  10  28  26  12  11]
 [  9   8 169   8   5   9   2]
 [ 29  19  21  48  44  21  28]
 [ 29  32   5  40  75  10  19]
 [ 18  16  11  30  19  58  58]
 [  6  15   2  16   9  54 108]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.47      0.43      0.45       210
           2       0.53      0.53      0.53       210
           3       0.77      0.80      0.79       210
           4       0.24      0.23      0.23       210
           5       0.35      0.36      0.35       210
           6       0.32      0.28      0.29       210
           7       0.44      0.51      0.47       210

    accuracy                           0.45      1470
   macro avg       0.44      0.45      0.45      1470
weighted avg       0.44      0.45      0.45      1470

Decision Tree with 16 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4496598639455782
Con

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3727891156462585
Confusion Matrix of Random Forest is:
 [[ 34   1  22   0  94   0  59]
 [  0  30  37   0 112   0  31]
 [  5   3 157   0  29   1  15]
 [  3   4  19   0 102   0  82]
 [  8   2   1   0 142   0  57]
 [  3   3  28   0  39   1 136]
 [  0   2   9   0  15   0 184]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.64      0.16      0.26       210
           2       0.67      0.14      0.24       210
           3       0.58      0.75      0.65       210
           4       0.00      0.00      0.00       210
           5       0.27      0.68      0.38       210
           6       0.50      0.00      0.01       210
           7       0.33      0.88      0.48       210

    accuracy                           0.37      1470
   macro avg       0.43      0.37      0.29      1470
weighted avg       0.43      0.37      0.29      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4238095238095238
Confusion Matrix of Random Forest is:
 [[ 80  11   7   2  56  12  42]
 [  2  92  17   4  67   1  27]
 [  9  24 148  14   2   2  11]
 [ 18  25  16  11  62   6  72]
 [ 33  27   1   0  99   1  49]
 [ 11  31  18   1  16  15 118]
 [  1  20   2   0   4   5 178]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.52      0.38      0.44       210
           2       0.40      0.44      0.42       210
           3       0.71      0.70      0.71       210
           4       0.34      0.05      0.09       210
           5       0.32      0.47      0.38       210
           6       0.36      0.07      0.12       210
           7       0.36      0.85      0.50       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.38      1470
weighted avg       0.43      0.42      0.38      1470

Random Forest with 4 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.5034013605442177
Confusion Matrix of Random Forest is:
 [[ 89   1   5  19  59  18  19]
 [  2 121   5  19  33  17  13]
 [  8   5 168  14   3  11   1]
 [ 20  17  16  59  32  24  42]
 [ 31  24   0  31  88  12  24]
 [ 11  15  13  17  12  57  85]
 [  3   6   0   9   4  30 158]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.54      0.42      0.48       210
           2       0.64      0.58      0.61       210
           3       0.81      0.80      0.81       210
           4       0.35      0.28      0.31       210
           5       0.38      0.42      0.40       210
           6       0.34      0.27      0.30       210
           7       0.46      0.75      0.57       210

    accuracy                           0.50      1470
   macro avg       0.50      0.50      0.50      1470
weighted avg       0.50      0.50      0.50      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.4931972789115646
Confusion Matrix of Random Forest is:
 [[ 92   2   4  20  55  19  18]
 [  3 122   4  23  26  19  13]
 [ 10   5 168   9   6  10   2]
 [ 21  18  15  55  40  23  38]
 [ 35  22   0  35  83  13  22]
 [ 11  16   9  19  13  67  75]
 [  3   6   1  11   4  47 138]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.44      0.48       210
           2       0.64      0.58      0.61       210
           3       0.84      0.80      0.82       210
           4       0.32      0.26      0.29       210
           5       0.37      0.40      0.38       210
           6       0.34      0.32      0.33       210
           7       0.45      0.66      0.53       210

    accuracy                           0.49      1470
   macro avg       0.50      0.49      0.49      1470
weighted avg       0.50      0.49      0.49      1470

Random Forest with 20 max_depth
Acc

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//xlm_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.5204081632653061
Confusion Matrix of Logistic Regression is:
 [[139   0   4   5  32  13  17]
 [  9  84  51  20  17  19  10]
 [ 14   2 181   1   0   8   4]
 [ 26  12  34  33  32  22  51]
 [ 52  22   9   8  93   1  25]
 [ 13   5  20   8   5  50 109]
 [  2   1   0   5   1  16 185]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.55      0.66      0.60       210
           2       0.67      0.40      0.50       210
           3       0.61      0.86      0.71       210
           4       0.41      0.16      0.23       210
           5       0.52      0.44      0.48       210
           6       0.39      0.24      0.29       210
           7       0.46      0.88      0.61       210

    accuracy                           0.52      1470
   macro avg       0.51      0.52      0.49      1470
weighted avg       0.51      0.52      0.49      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.5108843537414965
Confusion Matrix of SVM is:
 [[134   0   3   7  31  15  20]
 [  6  62  51  41  22  20   8]
 [ 12   2 180   1   2  10   3]
 [ 14  11  38  34  32  27  54]
 [ 44  22   8   8  91   3  34]
 [  5   5  16   7   6  60 111]
 [  0   1   0   1   0  18 190]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.62      0.64      0.63       210
           2       0.60      0.30      0.40       210
           3       0.61      0.86      0.71       210
           4       0.34      0.16      0.22       210
           5       0.49      0.43      0.46       210
           6       0.39      0.29      0.33       210
           7       0.45      0.90      0.60       210

    accuracy                           0.51      1470
   macro avg       0.50      0.51      0.48      1470
weighted avg       0.50      0.51      0.48      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[126   8   2  19  24  18  13]
 [  4 132   4  38  10  15   7]
 [ 14  15 164   9   1   4   3]
 [ 10  30   7  86  14  42  21]
 [ 36  39   2  40  77   6  10]
 [  9  17   5  23   7  74  75]
 [  0   6   0   1   4  51 148]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.63      0.60      0.62       210
           2       0.53      0.63      0.58       210
           3       0.89      0.78      0.83       210
           4       0.40      0.41      0.40       210
           5       0.56      0.37      0.44       210
           6       0.35      0.35      0.35       210
           7       0.53      0.70      0.61       210

    accuracy                           0.55      1470
   macro avg       0.56      0.55      0.55      1470
weighted avg       0.56      0.55      0.55      1470

Decision Tree with 8 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5408163265306123
Conf

Confusion Matrix of Decision Tree is:
 [[111   5   4  18  50  14   8]
 [  8 128  10  29  20  10   5]
 [  4   8 182   5   3   6   2]
 [ 19  27  12  76  19  35  22]
 [ 42  29   3  28  82  17   9]
 [ 17  13  10  32  13  56  69]
 [  7   6   1  15  15  58 108]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.53      0.53      0.53       210
           2       0.59      0.61      0.60       210
           3       0.82      0.87      0.84       210
           4       0.37      0.36      0.37       210
           5       0.41      0.39      0.40       210
           6       0.29      0.27      0.28       210
           7       0.48      0.51      0.50       210

    accuracy                           0.51      1470
   macro avg       0.50      0.51      0.50      1470
weighted avg       0.50      0.51      0.50      1470

Decision Tree with 18 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5054421768707483
Con

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.46258503401360546
Confusion Matrix of Random Forest is:
 [[142  32   3   0   1   5  27]
 [  6 171   5   0   1   8  19]
 [ 12  37 149   0   0   6   6]
 [ 20 102  12   0   0  15  61]
 [ 61 126   0   0   6   1  16]
 [  6  33  12   0   0  23 136]
 [  0  16   0   0   0   5 189]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.57      0.68      0.62       210
           2       0.33      0.81      0.47       210
           3       0.82      0.71      0.76       210
           4       0.00      0.00      0.00       210
           5       0.75      0.03      0.06       210
           6       0.37      0.11      0.17       210
           7       0.42      0.90      0.57       210

    accuracy                           0.46      1470
   macro avg       0.47      0.46      0.38      1470
weighted avg       0.47      0.46      0.38      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.5054421768707483
Confusion Matrix of Random Forest is:
 [[139  13   3   0  23  14  18]
 [  6 162   4   0  10  18  10]
 [ 12  37 150   0   0   9   2]
 [ 20  66  12   0  36  32  44]
 [ 58  74   0   0  61   2  15]
 [  6  31   4   0   2  58 109]
 [  0   8   0   0   8  21 173]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.58      0.66      0.62       210
           2       0.41      0.77      0.54       210
           3       0.87      0.71      0.78       210
           4       0.00      0.00      0.00       210
           5       0.44      0.29      0.35       210
           6       0.38      0.28      0.32       210
           7       0.47      0.82      0.60       210

    accuracy                           0.51      1470
   macro avg       0.45      0.51      0.46      1470
weighted avg       0.45      0.51      0.46      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.527891156462585
Confusion Matrix of Random Forest is:
 [[135   8   3   3  29  18  14]
 [  5 153   4   5  15  22   6]
 [ 13  37 149   0   0  10   1]
 [ 16  51  11  14  44  40  34]
 [ 53  49   0   1  91   6  10]
 [  4  27   3   2   6  72  96]
 [  0   7   0   0   9  32 162]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.60      0.64      0.62       210
           2       0.46      0.73      0.56       210
           3       0.88      0.71      0.78       210
           4       0.56      0.07      0.12       210
           5       0.47      0.43      0.45       210
           6       0.36      0.34      0.35       210
           7       0.50      0.77      0.61       210

    accuracy                           0.53      1470
   macro avg       0.55      0.53      0.50      1470
weighted avg       0.55      0.53      0.50      1470

Random Forest with 5 max_depth
Accur

Accuracy of Random Forest after PCA and ICA is: 0.5870748299319728
Confusion Matrix of Random Forest is:
 [[130   3   1  12  35  17  12]
 [  2 135   3  35  13  19   3]
 [  4  11 175  10   2   8   0]
 [ 10  15  10  92  18  36  29]
 [ 35  28   1  24 104   8  10]
 [  9   6   7  23   7  75  83]
 [  0   3   0   5   5  45 152]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.68      0.62      0.65       210
           2       0.67      0.64      0.66       210
           3       0.89      0.83      0.86       210
           4       0.46      0.44      0.45       210
           5       0.57      0.50      0.53       210
           6       0.36      0.36      0.36       210
           7       0.53      0.72      0.61       210

    accuracy                           0.59      1470
   macro avg       0.59      0.59      0.59      1470
weighted avg       0.59      0.59      0.59      1470

Random Forest with 13 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5782312925170068
Confusion Matrix of Random Forest is:
 [[130   3   1  10  38  17  11]
 [  2 137   5  30  16  19   1]
 [  3   6 181  11   3   6   0]
 [  9  21  11  85  20  37  27]
 [ 36  26   3  19 106  12   8]
 [  9   8   8  22   7  74  82]
 [  1   5   0   7   5  55 137]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.68      0.62      0.65       210
           2       0.67      0.65      0.66       210
           3       0.87      0.86      0.86       210
           4       0.46      0.40      0.43       210
           5       0.54      0.50      0.52       210
           6       0.34      0.35      0.34       210
           7       0.52      0.65      0.58       210

    accuracy                           0.58      1470
   macro avg       0.58      0.58      0.58      1470
weighted avg       0.58      0.58      0.58      1470

Accuracy of Random Forest after PCA

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//bert_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],7)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")



Accuracy of Logistic Regression after PCA and ICA is: 0.3578231292517007
Confusion Matrix of Logistic Regression is:
 [[ 31  42  11   1  71   7  47]
 [  4  87  44   0  61   3  11]
 [ 13  33 142   0  14   3   5]
 [  1  52  19   1  61   5  71]
 [  2  31  10   1 126   0  40]
 [  5  39  17   1  52  10  86]
 [  1  19   5   0  49   7 129]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.54      0.15      0.23       210
           2       0.29      0.41      0.34       210
           3       0.57      0.68      0.62       210
           4       0.25      0.00      0.01       210
           5       0.29      0.60      0.39       210
           6       0.29      0.05      0.08       210
           7       0.33      0.61      0.43       210

    accuracy                           0.36      1470
   macro avg       0.37      0.36      0.30      1470
weighted avg       0.37      0.36      0.30      1470

KNN with 3 Neighb

Accuracy of SVM after PCA and ICA is: 0.3122448979591837
Confusion Matrix of SVM is:
 [[ 12  91   0   2  73   0  32]
 [  0 128  14   0  62   1   5]
 [  9 100  80   1  18   0   2]
 [  0  88   3   0  69   0  50]
 [  1  40   0   0 136   0  33]
 [  4  82   1   0  58   1  64]
 [  0  50   0   0  58   0 102]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.46      0.06      0.10       210
           2       0.22      0.61      0.32       210
           3       0.82      0.38      0.52       210
           4       0.00      0.00      0.00       210
           5       0.29      0.65      0.40       210
           6       0.50      0.00      0.01       210
           7       0.35      0.49      0.41       210

    accuracy                           0.31      1470
   macro avg       0.38      0.31      0.25      1470
weighted avg       0.38      0.31      0.25      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Decision Tree after PCA and ICA is: 0.36054421768707484
Confusion Matrix of Decision Tree is:
 [[ 17  69   8   3  55  16  42]
 [  0 132   5   2  39   6  26]
 [  0  52 135   0  10   3  10]
 [  1  67   8   5  43  23  63]
 [  0  51   5   2  96  10  46]
 [  0  56  10   8  32  31  73]
 [  0  29   2   8  25  32 114]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.94      0.08      0.15       210
           2       0.29      0.63      0.40       210
           3       0.78      0.64      0.70       210
           4       0.18      0.02      0.04       210
           5       0.32      0.46      0.38       210
           6       0.26      0.15      0.19       210
           7       0.30      0.54      0.39       210

    accuracy                           0.36      1470
   macro avg       0.44      0.36      0.32      1470
weighted avg       0.44      0.36      0.32      1470

Decision Tree with 7 max_depth
Acc

Accuracy of Decision Tree after PCA and ICA is: 0.3925170068027211
Confusion Matrix of Decision Tree is:
 [[ 57  16   4  35  38  31  29]
 [ 15 101   8  33  32  16   5]
 [ 10   6 161  11   6  11   5]
 [ 20  18   8  58  33  32  41]
 [ 24  15   9  30  91  18  23]
 [ 19  11   7  47  34  43  49]
 [ 13   8   6  34  26  57  66]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.36      0.27      0.31       210
           2       0.58      0.48      0.52       210
           3       0.79      0.77      0.78       210
           4       0.23      0.28      0.25       210
           5       0.35      0.43      0.39       210
           6       0.21      0.20      0.21       210
           7       0.30      0.31      0.31       210

    accuracy                           0.39      1470
   macro avg       0.40      0.39      0.40      1470
weighted avg       0.40      0.39      0.40      1470

Decision Tree with 17 max_depth
Acc

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.34625850340136055
Confusion Matrix of Random Forest is:
 [[ 19  36  12   0  77   0  66]
 [  2  99  23   0  64   0  22]
 [  0  32 147   0  23   0   8]
 [  1  41  22   0  68   1  77]
 [  1  37   7   0 118   0  47]
 [  1  33  19   0  47   0 110]
 [  0  19   1   0  64   0 126]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.09      0.16       210
           2       0.33      0.47      0.39       210
           3       0.64      0.70      0.67       210
           4       0.00      0.00      0.00       210
           5       0.26      0.56      0.35       210
           6       0.00      0.00      0.00       210
           7       0.28      0.60      0.38       210

    accuracy                           0.35      1470
   macro avg       0.33      0.35      0.28      1470
weighted avg       0.33      0.35      0.28      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3625850340136054
Confusion Matrix of Random Forest is:
 [[ 27  40   8   0  70   6  59]
 [  2 113  12   0  62   4  17]
 [  0  42 139   0  21   2   6]
 [  1  52  11   0  71   9  66]
 [  1  40   5   0 121   0  43]
 [  1  32  20   0  52  12  93]
 [  1  13   7   0  63   5 121]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.82      0.13      0.22       210
           2       0.34      0.54      0.42       210
           3       0.69      0.66      0.67       210
           4       0.00      0.00      0.00       210
           5       0.26      0.58      0.36       210
           6       0.32      0.06      0.10       210
           7       0.30      0.58      0.39       210

    accuracy                           0.36      1470
   macro avg       0.39      0.36      0.31      1470
weighted avg       0.39      0.36      0.31      1470

Random Forest with 4 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.3891156462585034
Confusion Matrix of Random Forest is:
 [[ 27  39   9   3  62  11  59]
 [  1 115  14   1  57   4  18]
 [  0  39 143   1  13   2  12]
 [  3  50  11   6  50  16  74]
 [  1  41   4   0 117   2  45]
 [  2  35  16   1  38  22  96]
 [  0  15   3   5  36   9 142]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.79      0.13      0.22       210
           2       0.34      0.55      0.42       210
           3       0.71      0.68      0.70       210
           4       0.35      0.03      0.05       210
           5       0.31      0.56      0.40       210
           6       0.33      0.10      0.16       210
           7       0.32      0.68      0.43       210

    accuracy                           0.39      1470
   macro avg       0.45      0.39      0.34      1470
weighted avg       0.45      0.39      0.34      1470

Random Forest with 5 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.46190476190476193
Confusion Matrix of Random Forest is:
 [[ 53  15   2  23  53  26  38]
 [  9 116   6  18  36  11  14]
 [  5  11 166  10   9   7   2]
 [  6  18   7  55  38  39  47]
 [  9  23   6  16 108   8  40]
 [ 15  13   6  27  22  55  72]
 [  4   5   0  12  22  41 126]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.52      0.25      0.34       210
           2       0.58      0.55      0.56       210
           3       0.86      0.79      0.82       210
           4       0.34      0.26      0.30       210
           5       0.38      0.51      0.43       210
           6       0.29      0.26      0.28       210
           7       0.37      0.60      0.46       210

    accuracy                           0.46      1470
   macro avg       0.48      0.46      0.46      1470
weighted avg       0.48      0.46      0.46      1470

Random Forest with 13 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.4598639455782313
Confusion Matrix of Random Forest is:
 [[ 58  20   2  23  49  31  27]
 [ 12 114   7  20  36  11  10]
 [  4   9 166   5  11  11   4]
 [  8  16  13  58  38  32  45]
 [ 18  21   7  16 102  10  36]
 [ 16  12   7  22  21  65  67]
 [  5   4   0  19  23  46 113]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.48      0.28      0.35       210
           2       0.58      0.54      0.56       210
           3       0.82      0.79      0.81       210
           4       0.36      0.28      0.31       210
           5       0.36      0.49      0.42       210
           6       0.32      0.31      0.31       210
           7       0.37      0.54      0.44       210

    accuracy                           0.46      1470
   macro avg       0.47      0.46      0.46      1470
weighted avg       0.47      0.46      0.46      1470

Accuracy of Random Forest after PCA

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.4142857142857143
Confusion Matrix of Logistic Regression is:
 [[  1  67  33   0  85   0  24]
 [  0 103  20   0  54   1  32]
 [  1  15 172   1  21   0   0]
 [  0  32  21   1 127   2  27]
 [  0  26   4   0 141   1  38]
 [  0  45  28   0  19   1 117]
 [  0  16   1   0   2   1 190]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.50      0.00      0.01       210
           2       0.34      0.49      0.40       210
           3       0.62      0.82      0.70       210
           4       0.50      0.00      0.01       210
           5       0.31      0.67      0.43       210
           6       0.17      0.00      0.01       210
           7       0.44      0.90      0.60       210

    accuracy                           0.41      1470
   macro avg       0.41      0.41      0.31      1470
weighted avg       0.41      0.41      0.31      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3802721088435374
Confusion Matrix of SVM is:
 [[  1 100   5   0  96   0   8]
 [  0 139   6   0  57   0   8]
 [  1  25 134   2  48   0   0]
 [  0  55   6   0 135   1  13]
 [  0  36   0   0 150   0  24]
 [  0 103  13   0  24   1  69]
 [  0  72   1   0   3   0 134]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.50      0.00      0.01       210
           2       0.26      0.66      0.38       210
           3       0.81      0.64      0.71       210
           4       0.00      0.00      0.00       210
           5       0.29      0.71      0.41       210
           6       0.50      0.00      0.01       210
           7       0.52      0.64      0.58       210

    accuracy                           0.38      1470
   macro avg       0.41      0.38      0.30      1470
weighted avg       0.41      0.38      0.30      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 50  23  39  25  34  33   6]
 [ 30  97  19   7  19  27  11]
 [ 10   0 176  15   2   7   0]
 [ 35  13  20  54  49  26  13]
 [ 38  24  10  50  60  13  15]
 [ 24  18  22  13   9  61  63]
 [ 12   5   1   3   3  49 137]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.25      0.24      0.24       210
           2       0.54      0.46      0.50       210
           3       0.61      0.84      0.71       210
           4       0.32      0.26      0.29       210
           5       0.34      0.29      0.31       210
           6       0.28      0.29      0.29       210
           7       0.56      0.65      0.60       210

    accuracy                           0.43      1470
   macro avg       0.42      0.43      0.42      1470
weighted avg       0.42      0.43      0.42      1470

Decision Tree with 8 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4414965986394558
Conf

Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.35      0.34      0.34       210
           2       0.45      0.43      0.44       210
           3       0.84      0.80      0.82       210
           4       0.24      0.29      0.26       210
           5       0.33      0.32      0.33       210
           6       0.28      0.26      0.27       210
           7       0.50      0.52      0.51       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.42      1470
weighted avg       0.43      0.42      0.42      1470

Decision Tree with 17 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4272108843537415
Confusion Matrix of Decision Tree is:
 [[ 73  15   7  41  36  31   7]
 [ 18 100   6  22  27  23  14]
 [  5   5 169  13   6  11   1]
 [ 38  26   9  57  45  20  15]
 [ 36  23   9  48  65  13  16]
 [ 26  27   6  28  12  52  59]
 [ 19  13   0  14  10  42 112]]
Cla

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.4231292517006803
Confusion Matrix of Random Forest is:
 [[ 21  95   7   0  67   2  18]
 [  5 141   9   0  23   2  30]
 [ 27  26 138   0  19   0   0]
 [ 11  61   8   1 108   1  20]
 [  7  43   0   0 130   0  30]
 [  6  61  15   1  17   3 107]
 [  0  17   1   0   3   1 188]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.27      0.10      0.15       210
           2       0.32      0.67      0.43       210
           3       0.78      0.66      0.71       210
           4       0.50      0.00      0.01       210
           5       0.35      0.62      0.45       210
           6       0.33      0.01      0.03       210
           7       0.48      0.90      0.62       210

    accuracy                           0.42      1470
   macro avg       0.43      0.42      0.34      1470
weighted avg       0.43      0.42      0.34      1470

Random Forest with 3 max_depth
Accu

Accuracy of Random Forest after PCA and ICA is: 0.49183673469387756
Confusion Matrix of Random Forest is:
 [[ 61  23  11  38  38  31   8]
 [ 17 120   6  10  23  18  16]
 [  8   2 171  14   5  10   0]
 [ 24  10  12  59  65  30  10]
 [ 17  22   4  44  93  13  17]
 [ 18  16   8  14  10  58  86]
 [  4   5   0   1   3  36 161]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.41      0.29      0.34       210
           2       0.61      0.57      0.59       210
           3       0.81      0.81      0.81       210
           4       0.33      0.28      0.30       210
           5       0.39      0.44      0.42       210
           6       0.30      0.28      0.29       210
           7       0.54      0.77      0.63       210

    accuracy                           0.49      1470
   macro avg       0.48      0.49      0.48      1470
weighted avg       0.48      0.49      0.48      1470

Random Forest with 11 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.4897959183673469
Confusion Matrix of Random Forest is:
 [[ 68  19  11  43  37  24   8]
 [ 13 116   7  15  27  18  14]
 [  7   1 174  10   7  10   1]
 [ 29  15   9  62  60  27   8]
 [ 24  22   4  42  89   9  20]
 [ 21  18   8  19   8  59  77]
 [  5   6   0   6   3  38 152]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.41      0.32      0.36       210
           2       0.59      0.55      0.57       210
           3       0.82      0.83      0.82       210
           4       0.31      0.30      0.30       210
           5       0.39      0.42      0.40       210
           6       0.32      0.28      0.30       210
           7       0.54      0.72      0.62       210

    accuracy                           0.49      1470
   macro avg       0.48      0.49      0.48      1470
weighted avg       0.48      0.49      0.48      1470

Random Forest with 19 max_depth
Acc

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],4)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Logistic Regression after PCA and ICA is: 0.4272108843537415
Confusion Matrix of Logistic Regression is:
 [[ 95   3  22   0  47   8  35]
 [ 11  91  61   0  30   1  16]
 [ 16   5 176   0   5   2   6]
 [ 33  25  41   0  35   7  69]
 [ 62  13   5   0  85   2  43]
 [ 23  21  30   0  19  10 107]
 [  1  17   4   0   6  11 171]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.39      0.45      0.42       210
           2       0.52      0.43      0.47       210
           3       0.52      0.84      0.64       210
           4       0.00      0.00      0.00       210
           5       0.37      0.40      0.39       210
           6       0.24      0.05      0.08       210
           7       0.38      0.81      0.52       210

    accuracy                           0.43      1470
   macro avg       0.35      0.43      0.36      1470
weighted avg       0.35      0.43      0.36      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.38571428571428573
Confusion Matrix of SVM is:
 [[ 63   5  31  29  17  33  32]
 [  1  49 114  16   3  10  17]
 [  1   1 190   3   1   9   5]
 [  6  19  53  27   3  43  59]
 [ 33  18  22  37  30  18  52]
 [  5  13  33  17   0  39 103]
 [  0   7   5   1   0  28 169]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.58      0.30      0.39       210
           2       0.44      0.23      0.30       210
           3       0.42      0.90      0.58       210
           4       0.21      0.13      0.16       210
           5       0.56      0.14      0.23       210
           6       0.22      0.19      0.20       210
           7       0.39      0.80      0.52       210

    accuracy                           0.39      1470
   macro avg       0.40      0.39      0.34      1470
weighted avg       0.40      0.39      0.34      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[108   7   0  34  38   5  18]
 [  6 103   9  49  22  14   7]
 [ 11  13 161   9   4  10   2]
 [ 20  21  17  77  19  22  34]
 [ 41  11   3  37  88   3  27]
 [ 13   9   8  53   8  52  67]
 [  1   3   0  25  11  22 148]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.54      0.51      0.53       210
           2       0.62      0.49      0.55       210
           3       0.81      0.77      0.79       210
           4       0.27      0.37      0.31       210
           5       0.46      0.42      0.44       210
           6       0.41      0.25      0.31       210
           7       0.49      0.70      0.58       210

    accuracy                           0.50      1470
   macro avg       0.51      0.50      0.50      1470
weighted avg       0.51      0.50      0.50      1470

Decision Tree with 8 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.5006802721088436
Conf

Confusion Matrix of Decision Tree is:
 [[ 93  11   6  24  46  20  10]
 [ 10 110   7  33  20  25   5]
 [  5   8 166  12   5  11   3]
 [ 20  31  12  53  27  43  24]
 [ 38  32   4  28  74  18  16]
 [ 15  17  10  36  14  59  59]
 [ 10  15   1  20   8  40 116]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.49      0.44      0.46       210
           2       0.49      0.52      0.51       210
           3       0.81      0.79      0.80       210
           4       0.26      0.25      0.25       210
           5       0.38      0.35      0.37       210
           6       0.27      0.28      0.28       210
           7       0.50      0.55      0.52       210

    accuracy                           0.46      1470
   macro avg       0.46      0.46      0.46      1470
weighted avg       0.46      0.46      0.46      1470

Decision Tree with 19 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.4557823129251701
Con

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.38979591836734695
Confusion Matrix of Random Forest is:
 [[120  12  18   0   3   0  57]
 [ 20  81  44   0  17   0  48]
 [ 21   3 164   0   4   0  18]
 [ 43  28  18   0   5   0 116]
 [ 97  24   1   0  16   0  72]
 [ 20  22  16   0   0   0 152]
 [  4  14   0   0   0   0 192]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.37      0.57      0.45       210
           2       0.44      0.39      0.41       210
           3       0.63      0.78      0.70       210
           4       0.00      0.00      0.00       210
           5       0.36      0.08      0.13       210
           6       0.00      0.00      0.00       210
           7       0.29      0.91      0.44       210

    accuracy                           0.39      1470
   macro avg       0.30      0.39      0.30      1470
weighted avg       0.30      0.39      0.30      1470

Random Forest with 3 max_depth


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.45374149659863944
Confusion Matrix of Random Forest is:
 [[124  13   2   2  15   5  49]
 [ 15 123  17   5  11   7  32]
 [ 21   5 163   0   6   3  12]
 [ 35  34  15   6  11  17  92]
 [ 77  24   0   2  42   1  64]
 [ 19  18  15   2   1  13 142]
 [  4   8   0   0   0   2 196]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.42      0.59      0.49       210
           2       0.55      0.59      0.57       210
           3       0.77      0.78      0.77       210
           4       0.35      0.03      0.05       210
           5       0.49      0.20      0.28       210
           6       0.27      0.06      0.10       210
           7       0.33      0.93      0.49       210

    accuracy                           0.45      1470
   macro avg       0.45      0.45      0.39      1470
weighted avg       0.45      0.45      0.39      1470

Random Forest with 4 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5476190476190477
Confusion Matrix of Random Forest is:
 [[109   6   0  25  39  19  12]
 [  3 135   8  20  21  17   6]
 [  7   7 171  11   6   7   1]
 [ 18  30  12  54  22  46  28]
 [ 37  14   0  31 103   9  16]
 [  8  18   9  27   6  80  62]
 [  2   6   0   8   5  36 153]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.59      0.52      0.55       210
           2       0.62      0.64      0.63       210
           3       0.85      0.81      0.83       210
           4       0.31      0.26      0.28       210
           5       0.51      0.49      0.50       210
           6       0.37      0.38      0.38       210
           7       0.55      0.73      0.63       210

    accuracy                           0.55      1470
   macro avg       0.54      0.55      0.54      1470
weighted avg       0.54      0.55      0.54      1470

Random Forest with 12 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5306122448979592
Confusion Matrix of Random Forest is:
 [[107   6   0  26  41  21   9]
 [  4 137   9  15  22  18   5]
 [  7   7 173  10   5   7   1]
 [ 25  29  12  57  23  37  27]
 [ 46  17   1  24  95  11  16]
 [ 10  18  11  27   9  70  65]
 [  3   5   0  13   8  40 141]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.53      0.51      0.52       210
           2       0.63      0.65      0.64       210
           3       0.84      0.82      0.83       210
           4       0.33      0.27      0.30       210
           5       0.47      0.45      0.46       210
           6       0.34      0.33      0.34       210
           7       0.53      0.67      0.59       210

    accuracy                           0.53      1470
   macro avg       0.52      0.53      0.53      1470
weighted avg       0.52      0.53      0.53      1470

Random Forest with 20 max_depth
Acc

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],3)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")

Accuracy of Logistic Regression after PCA and ICA is: 0.3802721088435374
Confusion Matrix of Logistic Regression is:
 [[ 81   0  19   0  62   0  48]
 [ 20  13  86   0  45   1  45]
 [ 28   0 164   0   5   0  13]
 [ 28   6  24   0  66   0  86]
 [ 46   1   6   0 101   1  55]
 [  7   6  16   0  34   1 146]
 [  2   5   0   0   4   0 199]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.38      0.39      0.38       210
           2       0.42      0.06      0.11       210
           3       0.52      0.78      0.62       210
           4       0.00      0.00      0.00       210
           5       0.32      0.48      0.38       210
           6       0.33      0.00      0.01       210
           7       0.34      0.95      0.50       210

    accuracy                           0.38      1470
   macro avg       0.33      0.38      0.29      1470
weighted avg       0.33      0.38      0.29      1470

KNN with 3 Neighb

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of KNN Model is:
 [[109  13   6  22  39  11  10]
 [ 30 117  15  20   6  13   9]
 [ 19  17 159   5   2   8   0]
 [ 36  40  17  42  38  20  17]
 [ 71  21   4  22  69  11  12]
 [ 28  29  13  28  20  36  56]
 [ 17  24   2  19  10  37 101]]
Classification Report of KNN Model is:
               precision    recall  f1-score   support

           1       0.35      0.52      0.42       210
           2       0.45      0.56      0.50       210
           3       0.74      0.76      0.75       210
           4       0.27      0.20      0.23       210
           5       0.38      0.33      0.35       210
           6       0.26      0.17      0.21       210
           7       0.49      0.48      0.49       210

    accuracy                           0.43      1470
   macro avg       0.42      0.43      0.42      1470
weighted avg       0.42      0.43      0.42      1470

KNN with 4 Neighbors
Accuracy of KNN Model after PCA and ICA is: 0.4387755102040816
Confusion Matrix of KNN Mo

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of SVM after PCA and ICA is: 0.3625850340136054
Confusion Matrix of SVM is:
 [[ 49   2  13  20  43  10  73]
 [  7  29  30  38  17   3  86]
 [ 14   0 138  15   6   7  30]
 [  9   5  14  40  23   8 111]
 [ 20   1   2  35  66   5  81]
 [  3   5  11  19   8   1 163]
 [  0   0   0   0   0   0 210]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.48      0.23      0.31       210
           2       0.69      0.14      0.23       210
           3       0.66      0.66      0.66       210
           4       0.24      0.19      0.21       210
           5       0.40      0.31      0.35       210
           6       0.03      0.00      0.01       210
           7       0.28      1.00      0.44       210

    accuracy                           0.36      1470
   macro avg       0.40      0.36      0.32      1470
weighted avg       0.40      0.36      0.32      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is:

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.46      0.47      0.46       210
           2       0.52      0.54      0.53       210
           3       0.78      0.75      0.76       210
           4       0.26      0.22      0.24       210
           5       0.35      0.45      0.40       210
           6       0.31      0.19      0.24       210
           7       0.51      0.64      0.57       210

    accuracy                           0.47      1470
   macro avg       0.46      0.47      0.46      1470
weighted avg       0.46      0.47      0.46      1470

Decision Tree with 10 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.47346938775510206
Confusion Matrix of Decision Tree is:
 [[ 96   6   5  28  53  12  10]
 [ 15 111  14  34   9  19   8]
 [ 13  16 167   3   5   6   0]
 [ 19  25  16  59  47  14  30]
 [ 44  15   3  31  88  13  16]
 [ 12  21  10  41  19  44  63]
 [  9   7   0  18  11  34 131]]
Cl

Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.42      0.43      0.43       210
           2       0.53      0.48      0.51       210
           3       0.72      0.79      0.75       210
           4       0.26      0.26      0.26       210
           5       0.34      0.32      0.33       210
           6       0.27      0.25      0.26       210
           7       0.48      0.52      0.50       210

    accuracy                           0.44      1470
   macro avg       0.43      0.44      0.43      1470
weighted avg       0.43      0.44      0.43      1470

Accuracy of Decision Tree after PCA and ICA is: 0.4340136054421769
Confusion Matrix of Decision Tree is:
 [[ 87  15   6  28  39  19  16]
 [ 16  98  17  23  13  32  11]
 [  9   9 169   7   8   8   0]
 [ 20  23  19  51  38  29  30]
 [ 53  14   7  33  68  19  16]
 [ 17  20  14  40  18  53  48]
 [  8  13   1  22  11  43 112]]
Classification Report of Decision T

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy of Random Forest after PCA and ICA is: 0.36394557823129253
Confusion Matrix of Random Forest is:
 [[102   3  13  11  44   1  36]
 [ 28  64  54  14  33   2  15]
 [ 36   4 154   7   3   1   5]
 [ 37  34  18  25  50   0  46]
 [ 79  13   3  13  75   0  27]
 [ 14  44  16  11  27   0  98]
 [  3  67   0   5  20   0 115]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.34      0.49      0.40       210
           2       0.28      0.30      0.29       210
           3       0.60      0.73      0.66       210
           4       0.29      0.12      0.17       210
           5       0.30      0.36      0.32       210
           6       0.00      0.00      0.00       210
           7       0.34      0.55      0.42       210

    accuracy                           0.36      1470
   macro avg       0.31      0.36      0.32      1470
weighted avg       0.31      0.36      0.32      1470

Random Forest with 3 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.5190476190476191
Confusion Matrix of Random Forest is:
 [[ 93   6   5  16  61  15  14]
 [  6 118  12  30  16  16  12]
 [  9  13 172   3   6   7   0]
 [ 16  25  11  55  56  10  37]
 [ 32   5   3  21 114   8  27]
 [ 11  21  11  25  20  43  79]
 [  3   4   2   8   5  20 168]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.55      0.44      0.49       210
           2       0.61      0.56      0.59       210
           3       0.80      0.82      0.81       210
           4       0.35      0.26      0.30       210
           5       0.41      0.54      0.47       210
           6       0.36      0.20      0.26       210
           7       0.50      0.80      0.61       210

    accuracy                           0.52      1470
   macro avg       0.51      0.52      0.50      1470
weighted avg       0.51      0.52      0.50      1470

Random Forest with 11 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.482312925170068
Confusion Matrix of Random Forest is:
 [[ 90   9   6  24  56  12  13]
 [ 11 107  16  26  16  21  13]
 [  8  10 172   7   6   7   0]
 [ 15  19  17  48  51  25  35]
 [ 38   7   5  25 102  12  21]
 [ 10  17  10  32  19  46  76]
 [  5   6   1  12   6  36 144]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.51      0.43      0.47       210
           2       0.61      0.51      0.56       210
           3       0.76      0.82      0.79       210
           4       0.28      0.23      0.25       210
           5       0.40      0.49      0.44       210
           6       0.29      0.22      0.25       210
           7       0.48      0.69      0.56       210

    accuracy                           0.48      1470
   macro avg       0.47      0.48      0.47      1470
weighted avg       0.47      0.48      0.47      1470

Random Forest with 19 max_depth
Accu

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//xlm_base_finetuned_vectorized_kabita_dataset.csv")

x_train,x_test,y_train,y_test = scale_pca_ica(x_df,labels_df['kabita_labels'],5)

# Logistic regression
tv_lr_model = LogisticRegression()
ml_training(tv_lr_model,x_train,x_test,y_train,y_test,"Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_train,x_test,y_train,y_test,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_train,x_test,y_train,y_test,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_train,x_test,y_train,y_test,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_train,x_test,y_train,y_test,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
tv_dt_model = DecisionTreeClassifier(random_state=3)
ml_training(tv_dt_model,x_train,x_test,y_train,y_test,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
tv_rf_model = RandomForestClassifier(random_state=3)
ml_training(tv_rf_model,x_train,x_test,y_train,y_test,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_train=mms_scale.fit_transform(x_train)
m_test=mms_scale.fit_transform(x_test)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_train,m_test,y_train,y_test,"Multinomial Naive Bayes")



Accuracy of Logistic Regression after PCA and ICA is: 0.35306122448979593
Confusion Matrix of Logistic Regression is:
 [[ 56  26  19  18  56   5  30]
 [ 19  66  57  23  26   1  18]
 [ 10  20 165   6   6   0   3]
 [ 24  30  22  32  47   2  53]
 [ 16  26  17  23  83   1  44]
 [ 17  41  13  18  43   9  69]
 [ 14  22   2  13  45   6 108]]
Classification Report of Logistic Regression is:
               precision    recall  f1-score   support

           1       0.36      0.27      0.31       210
           2       0.29      0.31      0.30       210
           3       0.56      0.79      0.65       210
           4       0.24      0.15      0.19       210
           5       0.27      0.40      0.32       210
           6       0.38      0.04      0.08       210
           7       0.33      0.51      0.40       210

    accuracy                           0.35      1470
   macro avg       0.35      0.35      0.32      1470
weighted avg       0.35      0.35      0.32      1470

KNN with 3 Neigh

Accuracy of SVM after PCA and ICA is: 0.291156462585034
Confusion Matrix of SVM is:
 [[ 58  31   0  25  73   0  23]
 [ 28  94   7  35  39   0   7]
 [ 17  97  66  13  16   0   1]
 [ 28  47   0  30  75   0  30]
 [ 15  31   0  25 108   0  31]
 [ 18  44   0  26  76   4  42]
 [ 17  22   0  18  82   3  68]]
Classification Report of SVM is:
               precision    recall  f1-score   support

           1       0.32      0.28      0.30       210
           2       0.26      0.45      0.33       210
           3       0.90      0.31      0.47       210
           4       0.17      0.14      0.16       210
           5       0.23      0.51      0.32       210
           6       0.57      0.02      0.04       210
           7       0.34      0.32      0.33       210

    accuracy                           0.29      1470
   macro avg       0.40      0.29      0.28      1470
weighted avg       0.40      0.29      0.28      1470

Working on SVM Kernal: poly
Accuracy of SVM after PCA and ICA is: 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Confusion Matrix of Decision Tree is:
 [[ 42  22   4  19  71  16  36]
 [ 14  99   9  19  34  16  19]
 [  1  36 146   9  12   2   4]
 [ 10  33   4  14  67  31  51]
 [ 15  23   3  20  94  10  45]
 [  8  36   5  16  46  30  69]
 [  3  18   1  19  49  23  97]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.45      0.20      0.28       210
           2       0.37      0.47      0.42       210
           3       0.85      0.70      0.76       210
           4       0.12      0.07      0.09       210
           5       0.25      0.45      0.32       210
           6       0.23      0.14      0.18       210
           7       0.30      0.46      0.37       210

    accuracy                           0.36      1470
   macro avg       0.37      0.36      0.34      1470
weighted avg       0.37      0.36      0.34      1470

Decision Tree with 8 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.35306122448979593
Con

Confusion Matrix of Decision Tree is:
 [[ 63  21   4  36  29  28  29]
 [ 23  96  19  18  20  18  16]
 [ 10  16 156  13   5   9   1]
 [ 35  26   7  41  28  37  36]
 [ 28  24  11  34  47  31  35]
 [ 27  15   9  29  29  55  46]
 [ 20  19   5  27  29  42  68]]
Classification Report of Decision Tree is:
               precision    recall  f1-score   support

           1       0.31      0.30      0.30       210
           2       0.44      0.46      0.45       210
           3       0.74      0.74      0.74       210
           4       0.21      0.20      0.20       210
           5       0.25      0.22      0.24       210
           6       0.25      0.26      0.26       210
           7       0.29      0.32      0.31       210

    accuracy                           0.36      1470
   macro avg       0.36      0.36      0.36      1470
weighted avg       0.36      0.36      0.36      1470

Decision Tree with 18 max_depth
Accuracy of Decision Tree after PCA and ICA is: 0.35578231292517004
Co

Accuracy of Random Forest after PCA and ICA is: 0.35714285714285715
Confusion Matrix of Random Forest is:
 [[ 40  19  16  50  48   9  28]
 [ 13  59  39  49  30   6  14]
 [  1  12 165  25   4   0   3]
 [ 10  17  17  62  43  19  42]
 [  5  11  10  65  76   4  39]
 [  4  26  11  45  39  26  59]
 [  4   6   1  41  46  15  97]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.52      0.19      0.28       210
           2       0.39      0.28      0.33       210
           3       0.64      0.79      0.70       210
           4       0.18      0.30      0.23       210
           5       0.27      0.36      0.31       210
           6       0.33      0.12      0.18       210
           7       0.34      0.46      0.39       210

    accuracy                           0.36      1470
   macro avg       0.38      0.36      0.35      1470
weighted avg       0.38      0.36      0.35      1470

Random Forest with 5 max_depth
Acc

Accuracy of Random Forest after PCA and ICA is: 0.41836734693877553
Confusion Matrix of Random Forest is:
 [[ 68  11   6  35  50  15  25]
 [  8 104  12  31  25  14  16]
 [  5  12 161  22   6   1   3]
 [ 26  15   5  51  49  31  33]
 [ 13  19   4  31  94  15  34]
 [ 10  21   5  29  36  43  66]
 [  4   9   1  25  48  29  94]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.51      0.32      0.40       210
           2       0.54      0.50      0.52       210
           3       0.83      0.77      0.80       210
           4       0.23      0.24      0.24       210
           5       0.31      0.45      0.36       210
           6       0.29      0.20      0.24       210
           7       0.35      0.45      0.39       210

    accuracy                           0.42      1470
   macro avg       0.44      0.42      0.42      1470
weighted avg       0.44      0.42      0.42      1470

Random Forest with 13 max_depth
Ac

Accuracy of Random Forest after PCA and ICA is: 0.40272108843537413
Confusion Matrix of Random Forest is:
 [[ 66  12   6  34  46  15  31]
 [ 13 105  13  26  22  12  19]
 [  3  13 162  18   8   3   3]
 [ 26  12   8  49  45  30  40]
 [ 24  23   2  30  77  21  33]
 [ 21  23   9  21  30  44  62]
 [ 13  11   1  26  38  32  89]]
Classification Report of Random Forest is:
               precision    recall  f1-score   support

           1       0.40      0.31      0.35       210
           2       0.53      0.50      0.51       210
           3       0.81      0.77      0.79       210
           4       0.24      0.23      0.24       210
           5       0.29      0.37      0.32       210
           6       0.28      0.21      0.24       210
           7       0.32      0.42      0.37       210

    accuracy                           0.40      1470
   macro avg       0.41      0.40      0.40      1470
weighted avg       0.41      0.40      0.40      1470

Accuracy of Random Forest after PC