In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.model_selection import KFold
    from sklearn.model_selection import cross_val_score
except Exception as e:
    print("Error is due to",e)

In [2]:
# getting path of file
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Nisha//Input//Nisha_dataset_labels.csv")
#converting datframe to numpy array
labels = labels_df.to_numpy().ravel()

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_fold, y_fold, model_name):
    kfold = KFold(n_splits=10, random_state=7, shuffle=True)
    results = cross_val_score(ml_model, x_fold, y_fold, cv=kfold)
    print("Accuracies for K-Fold for "+model_name+" :", results)
    print("Mean Accuracy of K-Fold for "+model_name+" :", results.mean()*100.0)
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tfidf_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Logistic Regression : [0.71836735 0.74285714 0.73673469 0.70408163 0.71632653 0.72653061
 0.74897959 0.74081633 0.74489796 0.70204082]
Mean Accuracy of K-Fold for Logistic Regression : 72.81632653061226
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.54081633 0.52244898 0.54489796 0.5122449  0.53061224 0.53673469
 0.53877551 0.52653061 0.53673469 0.52040816]
Mean Accuracy of K-Fold for KNN Model : 53.102040816326536
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.56530612 0.53877551 0.52040816 0.52857143 0.5244898  0.50408163
 0.56938776 0.5244898  0.56122449 0.49183673]
Mean Accuracy of K-Fold for KNN Model : 53.28571428571428
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.56326531 0.52857143 0.53469388 0.52244898 0.53265306 0.5244898
 0.55918367 0.53265306 0.55714286 0.49795918]
Mean Accuracy of K-Fold for KNN Model : 53.53061224489796
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.54693878 0.52040816

Accuracies for K-Fold for Decision Tree : [0.52244898 0.51428571 0.55510204 0.51632653 0.53877551 0.52040816
 0.53877551 0.53265306 0.56530612 0.5244898 ]
Mean Accuracy of K-Fold for Decision Tree : 53.28571428571428
Decision Tree with 16 max_depth
Accuracies for K-Fold for Decision Tree : [0.53469388 0.52244898 0.56530612 0.5244898  0.54693878 0.52244898
 0.55102041 0.55306122 0.5755102  0.53877551]
Mean Accuracy of K-Fold for Decision Tree : 54.3469387755102
Decision Tree with 17 max_depth
Accuracies for K-Fold for Decision Tree : [0.53469388 0.52040816 0.55918367 0.53061224 0.54693878 0.53061224
 0.56938776 0.55510204 0.57346939 0.53061224]
Mean Accuracy of K-Fold for Decision Tree : 54.51020408163265
Decision Tree with 18 max_depth
Accuracies for K-Fold for Decision Tree : [0.54489796 0.52857143 0.56326531 0.52244898 0.56530612 0.53265306
 0.56326531 0.55714286 0.5755102  0.53673469]
Mean Accuracy of K-Fold for Decision Tree : 54.897959183673464
Decision Tree with 19 max_depth
Accu

Accuracies for K-Fold for Multinomial Naive Bayes : [0.67142857 0.69591837 0.66938776 0.65918367 0.66122449 0.67755102
 0.69795918 0.70612245 0.6755102  0.66530612]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 67.79591836734694


In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//cv_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Logistic Regression : [0.73061224 0.73469388 0.74489796 0.68979592 0.71428571 0.73265306
 0.75306122 0.7244898  0.73469388 0.70816327]
Mean Accuracy of K-Fold for Logistic Regression : 72.6734693877551
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.51836735 0.51020408 0.52857143 0.51428571 0.49183673 0.52857143
 0.52040816 0.54081633 0.53265306 0.5       ]
Mean Accuracy of K-Fold for KNN Model : 51.85714285714285
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.55714286 0.54693878 0.52040816 0.53673469 0.54489796 0.52244898
 0.52244898 0.54693878 0.56938776 0.5122449 ]
Mean Accuracy of K-Fold for KNN Model : 53.79591836734694
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.55102041 0.54693878 0.53673469 0.53469388 0.51020408 0.51428571
 0.51020408 0.54489796 0.5755102  0.48571429]
Mean Accuracy of K-Fold for KNN Model : 53.102040816326536
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.54693878 0.55306122

Accuracies for K-Fold for Decision Tree : [0.56734694 0.55306122 0.59591837 0.54693878 0.56938776 0.55306122
 0.56326531 0.56734694 0.55510204 0.54693878]
Mean Accuracy of K-Fold for Decision Tree : 56.18367346938775
Decision Tree with 16 max_depth
Accuracies for K-Fold for Decision Tree : [0.56734694 0.54489796 0.60204082 0.54693878 0.56530612 0.56122449
 0.56122449 0.57959184 0.54489796 0.54897959]
Mean Accuracy of K-Fold for Decision Tree : 56.224489795918366
Decision Tree with 17 max_depth
Accuracies for K-Fold for Decision Tree : [0.56122449 0.55918367 0.59591837 0.54693878 0.56530612 0.56938776
 0.5755102  0.57755102 0.55306122 0.54693878]
Mean Accuracy of K-Fold for Decision Tree : 56.51020408163265
Decision Tree with 18 max_depth
Accuracies for K-Fold for Decision Tree : [0.5755102  0.55918367 0.59183673 0.55102041 0.56734694 0.56938776
 0.57755102 0.58367347 0.56938776 0.55714286]
Mean Accuracy of K-Fold for Decision Tree : 57.020408163265316
Decision Tree with 19 max_depth
Ac

Accuracies for K-Fold for Multinomial Naive Bayes : [0.67959184 0.69387755 0.66734694 0.64489796 0.63265306 0.68571429
 0.67346939 0.69591837 0.65918367 0.65918367]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 66.91836734693878


In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//BagOfWords//tf_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Logistic Regression : [0.69795918 0.7        0.7244898  0.67755102 0.69795918 0.73061224
 0.74285714 0.72857143 0.7244898  0.68163265]
Mean Accuracy of K-Fold for Logistic Regression : 71.06122448979593
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.56530612 0.52244898 0.54285714 0.5244898  0.52857143 0.53265306
 0.54285714 0.56938776 0.52857143 0.53061224]
Mean Accuracy of K-Fold for KNN Model : 53.87755102040816
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.58163265 0.56122449 0.5755102  0.52040816 0.54489796 0.54693878
 0.56530612 0.56734694 0.5755102  0.55102041]
Mean Accuracy of K-Fold for KNN Model : 55.89795918367347
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.57346939 0.56938776 0.56938776 0.54489796 0.54285714 0.5244898
 0.55510204 0.58163265 0.57755102 0.54081633]
Mean Accuracy of K-Fold for KNN Model : 55.79591836734693
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.52857143 0.56734694 

Accuracies for K-Fold for Decision Tree : [0.50408163 0.54081633 0.55510204 0.5244898  0.54489796 0.53061224
 0.54693878 0.55510204 0.55306122 0.54081633]
Mean Accuracy of K-Fold for Decision Tree : 53.95918367346939
Decision Tree with 16 max_depth
Accuracies for K-Fold for Decision Tree : [0.51428571 0.54693878 0.55510204 0.52653061 0.55714286 0.52653061
 0.54693878 0.56326531 0.55510204 0.55102041]
Mean Accuracy of K-Fold for Decision Tree : 54.42857142857142
Decision Tree with 17 max_depth
Accuracies for K-Fold for Decision Tree : [0.53265306 0.55714286 0.56122449 0.53265306 0.56734694 0.54285714
 0.57142857 0.56938776 0.58163265 0.55510204]
Mean Accuracy of K-Fold for Decision Tree : 55.714285714285715
Decision Tree with 18 max_depth
Accuracies for K-Fold for Decision Tree : [0.52857143 0.55918367 0.57142857 0.52653061 0.56530612 0.54489796
 0.56530612 0.56530612 0.5755102  0.55714286]
Mean Accuracy of K-Fold for Decision Tree : 55.59183673469388
Decision Tree with 19 max_depth
Acc

Accuracies for K-Fold for Multinomial Naive Bayes : [0.69183673 0.70612245 0.67959184 0.66938776 0.66122449 0.68571429
 0.69591837 0.72244898 0.68367347 0.65714286]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 68.53061224489795


### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.74081633 0.72040816 0.77142857 0.74693878 0.75714286 0.76326531
 0.72653061 0.7122449  0.74285714 0.73061224]
Mean Accuracy of K-Fold for Logistic Regression : 74.12244897959185
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.65306122 0.62040816 0.65714286 0.65102041 0.63469388 0.66530612
 0.64489796 0.64693878 0.64693878 0.64693878]
Mean Accuracy of K-Fold for KNN Model : 64.6734693877551
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.65102041 0.61632653 0.66326531 0.65510204 0.63061224 0.67755102
 0.66122449 0.65918367 0.67142857 0.66530612]
Mean Accuracy of K-Fold for KNN Model : 65.51020408163264
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.65510204 0.65306122 0.66326531 0.63469388 0.63061224 0.66326531
 0.67142857 0.65510204 0.66530612 0.67755102]
Mean Accuracy of K-Fold for KNN Model : 65.6938775510204
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.64897959 0.65102041 0

Accuracies for K-Fold for Decision Tree : [0.56938776 0.53265306 0.57142857 0.56938776 0.62244898 0.56530612
 0.56326531 0.53265306 0.55306122 0.54081633]
Mean Accuracy of K-Fold for Decision Tree : 56.20408163265307
Decision Tree with 16 max_depth
Accuracies for K-Fold for Decision Tree : [0.57755102 0.54489796 0.5877551  0.56734694 0.61632653 0.54081633
 0.55510204 0.53265306 0.54081633 0.53673469]
Mean Accuracy of K-Fold for Decision Tree : 56.00000000000001
Decision Tree with 17 max_depth
Accuracies for K-Fold for Decision Tree : [0.5755102  0.54285714 0.56734694 0.5755102  0.61020408 0.55306122
 0.54693878 0.54285714 0.52244898 0.53469388]
Mean Accuracy of K-Fold for Decision Tree : 55.714285714285715
Decision Tree with 18 max_depth
Accuracies for K-Fold for Decision Tree : [0.57346939 0.51632653 0.56734694 0.56938776 0.6122449  0.54693878
 0.54081633 0.53673469 0.53061224 0.54285714]
Mean Accuracy of K-Fold for Decision Tree : 55.367346938775505
Decision Tree with 19 max_depth
Ac

Accuracies for K-Fold for Multinomial Naive Bayes : [0.529 0.516 0.52  0.553 0.565 0.52  0.545 0.529 0.484 0.527]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 52.877551020408156


In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_gkb.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.549 0.547 0.567 0.557 0.561 0.524 0.555 0.578 0.555 0.531]
Mean Accuracy of K-Fold for Logistic Regression : 55.24489795918367
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.461 0.465 0.457 0.482 0.486 0.422 0.465 0.451 0.455 0.427]
Mean Accuracy of K-Fold for KNN Model : 45.714285714285715
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.443 0.48  0.482 0.5   0.48  0.414 0.467 0.459 0.482 0.443]
Mean Accuracy of K-Fold for KNN Model : 46.48979591836735
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.455 0.48  0.459 0.478 0.486 0.422 0.467 0.463 0.482 0.457]
Mean Accuracy of K-Fold for KNN Model : 46.48979591836735
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.467 0.465 0.467 0.465 0.465 0.435 0.449 0.476 0.48  0.449]
Mean Accuracy of K-Fold for KNN Model : 46.183673469387756
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.459 0.445 0.476 0.473 0.48  0.451 0.459 0.

Accuracies for K-Fold for Decision Tree : [0.471 0.469 0.437 0.455 0.48  0.398 0.471 0.494 0.473 0.439]
Mean Accuracy of K-Fold for Decision Tree : 45.87755102040816
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.353 0.38  0.32  0.322 0.333 0.316 0.349 0.369 0.337 0.335]
Mean Accuracy of K-Fold for Random Forest : 34.14285714285714
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.376 0.404 0.41  0.369 0.361 0.316 0.376 0.39  0.367 0.367]
Mean Accuracy of K-Fold for Random Forest : 37.36734693877551
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.416 0.433 0.453 0.431 0.398 0.369 0.42  0.435 0.406 0.416]
Mean Accuracy of K-Fold for Random Forest : 41.775510204081634
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.447 0.461 0.484 0.463 0.449 0.437 0.453 0.451 0.471 0.453]
Mean Accuracy of K-Fold for Random Forest : 45.6938775510204
Random Forest with 5 max_depth
Accuracies for 

In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_ndisbert.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.718 0.731 0.724 0.751 0.753 0.718 0.751 0.733 0.72  0.741]
Mean Accuracy of K-Fold for Logistic Regression : 73.40816326530611
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.622 0.651 0.6   0.643 0.614 0.578 0.608 0.627 0.61  0.633]
Mean Accuracy of K-Fold for KNN Model : 61.857142857142854
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.629 0.647 0.637 0.692 0.631 0.586 0.643 0.649 0.633 0.633]
Mean Accuracy of K-Fold for KNN Model : 63.775510204081634
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.606 0.68  0.618 0.684 0.643 0.602 0.639 0.639 0.622 0.618]
Mean Accuracy of K-Fold for KNN Model : 63.51020408163265
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.631 0.669 0.633 0.688 0.645 0.61  0.645 0.643 0.622 0.624]
Mean Accuracy of K-Fold for KNN Model : 64.10204081632654
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.631 0.682 0.614 0.694 0.639 0.61  0.641 0.

Accuracies for K-Fold for Decision Tree : [0.51  0.529 0.541 0.549 0.506 0.488 0.545 0.537 0.549 0.476]
Mean Accuracy of K-Fold for Decision Tree : 52.28571428571429
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.312 0.316 0.273 0.263 0.284 0.343 0.3   0.284 0.222 0.28 ]
Mean Accuracy of K-Fold for Random Forest : 28.775510204081634
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.486 0.473 0.429 0.478 0.482 0.406 0.469 0.473 0.416 0.445]
Mean Accuracy of K-Fold for Random Forest : 45.57142857142857
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.58  0.573 0.529 0.58  0.58  0.52  0.551 0.578 0.531 0.551]
Mean Accuracy of K-Fold for Random Forest : 55.714285714285715
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.627 0.6   0.569 0.604 0.598 0.555 0.586 0.594 0.586 0.592]
Mean Accuracy of K-Fold for Random Forest : 59.10204081632653
Random Forest with 5 max_depth
Accuracies fo

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//bert_vectorized_Nisha_dataset_vbert.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Logistic Regression : [0.757 0.751 0.747 0.78  0.79  0.771 0.767 0.755 0.759 0.737]
Mean Accuracy of K-Fold for Logistic Regression : 76.14285714285714
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.633 0.667 0.659 0.647 0.645 0.637 0.643 0.676 0.637 0.602]
Mean Accuracy of K-Fold for KNN Model : 64.44897959183673
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.643 0.665 0.676 0.651 0.657 0.647 0.647 0.663 0.671 0.62 ]
Mean Accuracy of K-Fold for KNN Model : 65.40816326530613
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.639 0.676 0.657 0.661 0.647 0.663 0.669 0.682 0.676 0.614]
Mean Accuracy of K-Fold for KNN Model : 65.83673469387755
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.655 0.682 0.659 0.669 0.665 0.651 0.649 0.68  0.667 0.629]
Mean Accuracy of K-Fold for KNN Model : 66.06122448979592
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.647 0.676 0.663 0.673 0.653 0.657 0.661 0.68

Accuracies for K-Fold for Decision Tree : [0.484 0.547 0.527 0.553 0.508 0.504 0.496 0.533 0.543 0.48 ]
Mean Accuracy of K-Fold for Decision Tree : 51.73469387755103
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.414 0.439 0.378 0.41  0.414 0.353 0.441 0.412 0.304 0.384]
Mean Accuracy of K-Fold for Random Forest : 39.48979591836735
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.504 0.51  0.506 0.5   0.529 0.447 0.547 0.52  0.461 0.482]
Mean Accuracy of K-Fold for Random Forest : 50.061224489795904
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.545 0.543 0.533 0.553 0.567 0.512 0.573 0.586 0.506 0.541]
Mean Accuracy of K-Fold for Random Forest : 54.59183673469388
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.588 0.586 0.576 0.582 0.606 0.541 0.608 0.594 0.557 0.571]
Mean Accuracy of K-Fold for Random Forest : 58.08163265306122
Random Forest with 5 max_depth
Accuracies for

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//gpt_vectorized_Nisha_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.739 0.735 0.739 0.724 0.763 0.753 0.745 0.749 0.72  0.714]
Mean Accuracy of K-Fold for Logistic Regression : 73.81632653061226
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.598 0.643 0.614 0.614 0.612 0.62  0.612 0.639 0.622 0.618]
Mean Accuracy of K-Fold for KNN Model : 61.93877551020408
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.596 0.647 0.639 0.606 0.618 0.608 0.627 0.633 0.606 0.622]
Mean Accuracy of K-Fold for KNN Model : 62.02040816326531
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.6   0.624 0.645 0.624 0.627 0.616 0.624 0.643 0.596 0.624]
Mean Accuracy of K-Fold for KNN Model : 62.244897959183675
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.604 0.641 0.624 0.62  0.598 0.624 0.633 0.647 0.602 0.624]
Mean Accuracy of K-Fold for KNN Model : 62.183673469387756
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.629 0.667 0.637 0.631 0.608 0.637 0.635 0.

Accuracies for K-Fold for Decision Tree : [0.5   0.535 0.484 0.498 0.478 0.473 0.506 0.488 0.476 0.492]
Mean Accuracy of K-Fold for Decision Tree : 49.28571428571429
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.429 0.427 0.392 0.394 0.453 0.369 0.435 0.439 0.343 0.402]
Mean Accuracy of K-Fold for Random Forest : 40.816326530612244
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.52  0.524 0.514 0.494 0.533 0.461 0.531 0.531 0.461 0.494]
Mean Accuracy of K-Fold for Random Forest : 50.632653061224495
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.535 0.584 0.549 0.533 0.565 0.516 0.541 0.588 0.508 0.522]
Mean Accuracy of K-Fold for Random Forest : 54.40816326530612
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.569 0.61  0.573 0.567 0.596 0.555 0.598 0.614 0.549 0.553]
Mean Accuracy of K-Fold for Random Forest : 57.857142857142854
Random Forest with 5 max_depth
Accuracies f

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//SentenceTransformers//xlm_vectorized_Nisha_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.749 0.765 0.763 0.745 0.765 0.731 0.741 0.747 0.743 0.735]
Mean Accuracy of K-Fold for Logistic Regression : 74.83673469387755
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.645 0.637 0.612 0.647 0.667 0.633 0.631 0.671 0.665 0.647]
Mean Accuracy of K-Fold for KNN Model : 64.55102040816327
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.637 0.678 0.618 0.663 0.68  0.655 0.649 0.676 0.669 0.641]
Mean Accuracy of K-Fold for KNN Model : 65.6530612244898
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.659 0.68  0.655 0.678 0.686 0.665 0.657 0.688 0.673 0.665]
Mean Accuracy of K-Fold for KNN Model : 67.06122448979592
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.667 0.686 0.631 0.657 0.686 0.663 0.639 0.69  0.676 0.673]
Mean Accuracy of K-Fold for KNN Model : 66.6734693877551
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.655 0.667 0.645 0.653 0.686 0.657 0.641 0.702 

Accuracies for K-Fold for Decision Tree : [0.563 0.569 0.559 0.573 0.59  0.512 0.573 0.576 0.59  0.573]
Mean Accuracy of K-Fold for Decision Tree : 56.79591836734694
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.476 0.494 0.447 0.459 0.492 0.427 0.486 0.494 0.429 0.476]
Mean Accuracy of K-Fold for Random Forest : 46.775510204081634
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.531 0.551 0.504 0.52  0.535 0.488 0.537 0.533 0.486 0.518]
Mean Accuracy of K-Fold for Random Forest : 52.020408163265316
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.58  0.608 0.551 0.547 0.588 0.527 0.561 0.576 0.531 0.567]
Mean Accuracy of K-Fold for Random Forest : 56.3469387755102
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.588 0.635 0.596 0.578 0.618 0.551 0.588 0.61  0.565 0.604]
Mean Accuracy of K-Fold for Random Forest : 59.3265306122449
Random Forest with 5 max_depth
Accuracies for 

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//bert_base_finetuned_vectorized_Nisha_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.692 0.651 0.702 0.682 0.667 0.692 0.643 0.688 0.657 0.653]
Mean Accuracy of K-Fold for Logistic Regression : 67.26530612244898
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.522 0.537 0.567 0.539 0.545 0.49  0.502 0.504 0.555 0.516]
Mean Accuracy of K-Fold for KNN Model : 52.775510204081634
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.543 0.555 0.578 0.529 0.545 0.5   0.498 0.549 0.553 0.522]
Mean Accuracy of K-Fold for KNN Model : 53.714285714285715
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.539 0.557 0.582 0.533 0.559 0.514 0.484 0.541 0.551 0.51 ]
Mean Accuracy of K-Fold for KNN Model : 53.69387755102041
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.547 0.555 0.578 0.533 0.557 0.533 0.486 0.531 0.555 0.512]
Mean Accuracy of K-Fold for KNN Model : 53.85714285714285
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.531 0.561 0.573 0.529 0.569 0.522 0.504 0.

Accuracies for K-Fold for Decision Tree : [0.39  0.4   0.412 0.404 0.41  0.431 0.429 0.437 0.431 0.394]
Mean Accuracy of K-Fold for Decision Tree : 41.367346938775505
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.308 0.351 0.263 0.296 0.276 0.21  0.282 0.292 0.237 0.322]
Mean Accuracy of K-Fold for Random Forest : 28.367346938775505
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.386 0.424 0.373 0.384 0.424 0.376 0.416 0.429 0.349 0.406]
Mean Accuracy of K-Fold for Random Forest : 39.6734693877551
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.396 0.473 0.418 0.414 0.457 0.412 0.467 0.459 0.402 0.433]
Mean Accuracy of K-Fold for Random Forest : 43.3265306122449
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.431 0.465 0.433 0.461 0.471 0.457 0.461 0.488 0.429 0.451]
Mean Accuracy of K-Fold for Random Forest : 45.46938775510204
Random Forest with 5 max_depth
Accuracies for 

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_Nisha_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.663 0.635 0.68  0.645 0.631 0.665 0.647 0.659 0.622 0.631]
Mean Accuracy of K-Fold for Logistic Regression : 64.77551020408163
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.545 0.598 0.545 0.537 0.549 0.567 0.573 0.551 0.543 0.582]
Mean Accuracy of K-Fold for KNN Model : 55.897959183673464
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.563 0.594 0.578 0.547 0.578 0.573 0.598 0.573 0.557 0.571]
Mean Accuracy of K-Fold for KNN Model : 57.326530612244895
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.563 0.614 0.584 0.553 0.582 0.578 0.6   0.588 0.555 0.58 ]
Mean Accuracy of K-Fold for KNN Model : 57.95918367346938
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.565 0.614 0.604 0.547 0.598 0.571 0.61  0.576 0.545 0.592]
Mean Accuracy of K-Fold for KNN Model : 58.224489795918366
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.58  0.604 0.594 0.547 0.59  0.571 0.604 0

Accuracies for K-Fold for Decision Tree : [0.467 0.496 0.437 0.459 0.457 0.447 0.42  0.494 0.459 0.435]
Mean Accuracy of K-Fold for Decision Tree : 45.714285714285715
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.353 0.396 0.355 0.369 0.388 0.329 0.371 0.373 0.331 0.357]
Mean Accuracy of K-Fold for Random Forest : 36.224489795918366
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.416 0.441 0.418 0.422 0.443 0.388 0.447 0.418 0.388 0.431]
Mean Accuracy of K-Fold for Random Forest : 42.12244897959184
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.49  0.5   0.482 0.498 0.52  0.467 0.52  0.508 0.459 0.504]
Mean Accuracy of K-Fold for Random Forest : 49.489795918367356
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.529 0.545 0.543 0.529 0.549 0.535 0.531 0.551 0.52  0.535]
Mean Accuracy of K-Fold for Random Forest : 53.6530612244898
Random Forest with 5 max_depth
Accuracies fo

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_base_finetuned_vectorized_Nisha_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.708 0.714 0.735 0.731 0.724 0.729 0.749 0.769 0.731 0.731]
Mean Accuracy of K-Fold for Logistic Regression : 73.20408163265306
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.443 0.437 0.445 0.459 0.443 0.408 0.41  0.453 0.441 0.455]
Mean Accuracy of K-Fold for KNN Model : 43.938775510204074
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.433 0.433 0.467 0.473 0.467 0.445 0.433 0.469 0.469 0.457]
Mean Accuracy of K-Fold for KNN Model : 45.46938775510204
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.453 0.447 0.467 0.463 0.457 0.441 0.451 0.465 0.469 0.465]
Mean Accuracy of K-Fold for KNN Model : 45.795918367346935
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.463 0.455 0.459 0.484 0.478 0.447 0.445 0.469 0.453 0.467]
Mean Accuracy of K-Fold for KNN Model : 46.20408163265306
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.451 0.48  0.478 0.467 0.486 0.437 0.473 0.

Accuracies for K-Fold for Decision Tree : [0.478 0.42  0.484 0.471 0.473 0.443 0.476 0.486 0.478 0.469]
Mean Accuracy of K-Fold for Decision Tree : 46.775510204081634
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.359 0.386 0.365 0.351 0.408 0.339 0.396 0.378 0.312 0.355]
Mean Accuracy of K-Fold for Random Forest : 36.48979591836735
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.42  0.449 0.42  0.412 0.478 0.398 0.461 0.459 0.378 0.435]
Mean Accuracy of K-Fold for Random Forest : 43.10204081632653
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.488 0.543 0.506 0.476 0.533 0.49  0.533 0.529 0.467 0.508]
Mean Accuracy of K-Fold for Random Forest : 50.7142857142857
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.508 0.559 0.512 0.518 0.582 0.531 0.541 0.563 0.504 0.522]
Mean Accuracy of K-Fold for Random Forest : 53.40816326530613
Random Forest with 5 max_depth
Accuracies for 

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_Nisha_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.751 0.755 0.753 0.751 0.757 0.741 0.745 0.782 0.731 0.755]
Mean Accuracy of K-Fold for Logistic Regression : 75.20408163265306
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.447 0.439 0.447 0.449 0.469 0.435 0.469 0.447 0.471 0.449]
Mean Accuracy of K-Fold for KNN Model : 45.224489795918366
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.459 0.42  0.476 0.461 0.455 0.48  0.467 0.457 0.471 0.467]
Mean Accuracy of K-Fold for KNN Model : 46.14285714285714
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.459 0.422 0.48  0.453 0.465 0.461 0.471 0.451 0.467 0.455]
Mean Accuracy of K-Fold for KNN Model : 45.857142857142854
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.451 0.447 0.473 0.459 0.459 0.433 0.459 0.455 0.471 0.443]
Mean Accuracy of K-Fold for KNN Model : 45.51020408163266
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.457 0.449 0.482 0.449 0.48  0.443 0.457 0.

Accuracies for K-Fold for Decision Tree : [0.447 0.488 0.443 0.539 0.478 0.465 0.498 0.486 0.498 0.473]
Mean Accuracy of K-Fold for Decision Tree : 48.142857142857146
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.384 0.416 0.308 0.386 0.408 0.331 0.322 0.386 0.241 0.38 ]
Mean Accuracy of K-Fold for Random Forest : 35.61224489795919
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.437 0.49  0.445 0.461 0.48  0.427 0.496 0.461 0.449 0.461]
Mean Accuracy of K-Fold for Random Forest : 46.06122448979592
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.492 0.543 0.484 0.5   0.547 0.496 0.522 0.531 0.478 0.527]
Mean Accuracy of K-Fold for Random Forest : 51.18367346938775
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.553 0.576 0.527 0.565 0.592 0.533 0.567 0.561 0.504 0.555]
Mean Accuracy of K-Fold for Random Forest : 55.32653061224491
Random Forest with 5 max_depth
Accuracies for

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Nisha//FineTunedTransformers//xlm_base_finetuned_vectorized_Nisha_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.512 0.516 0.508 0.531 0.543 0.504 0.482 0.524 0.531 0.502]
Mean Accuracy of K-Fold for Logistic Regression : 51.530612244897966
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.4   0.418 0.445 0.427 0.437 0.371 0.408 0.453 0.404 0.431]
Mean Accuracy of K-Fold for KNN Model : 41.938775510204074
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.427 0.422 0.449 0.435 0.449 0.404 0.418 0.449 0.433 0.447]
Mean Accuracy of K-Fold for KNN Model : 43.32653061224489
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.443 0.429 0.463 0.422 0.447 0.382 0.416 0.457 0.414 0.437]
Mean Accuracy of K-Fold for KNN Model : 43.10204081632653
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.439 0.435 0.447 0.457 0.451 0.4   0.427 0.463 0.435 0.449]
Mean Accuracy of K-Fold for KNN Model : 44.0204081632653
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.449 0.443 0.453 0.449 0.451 0.412 0.424 0.4

Accuracies for K-Fold for Decision Tree : [0.382 0.349 0.347 0.367 0.351 0.32  0.355 0.357 0.339 0.335]
Mean Accuracy of K-Fold for Decision Tree : 35.02040816326531
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.243 0.259 0.267 0.257 0.286 0.21  0.273 0.29  0.227 0.282]
Mean Accuracy of K-Fold for Random Forest : 25.93877551020408
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.347 0.371 0.327 0.333 0.359 0.324 0.363 0.337 0.316 0.343]
Mean Accuracy of K-Fold for Random Forest : 34.20408163265306
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.371 0.378 0.361 0.384 0.4   0.353 0.408 0.392 0.343 0.369]
Mean Accuracy of K-Fold for Random Forest : 37.59183673469388
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.384 0.402 0.347 0.394 0.433 0.388 0.414 0.41  0.365 0.392]
Mean Accuracy of K-Fold for Random Forest : 39.28571428571429
Random Forest with 5 max_depth
Accuracies for 