In [1]:
try:
    import pandas as pd
    import numpy as np
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.naive_bayes import BernoulliNB
    from sklearn import svm
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.model_selection import KFold
    from sklearn.model_selection import cross_val_score
except Exception as e:
    print("Error is due to",e)

In [2]:
# getting path of file
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Kabita//Input//kabita_dataset_labels.csv")
#converting datframe to numpy array
labels = labels_df.to_numpy().ravel()

In [3]:
# Function for Modelling and extracting Metrics
def ml_training(ml_model, x_fold, y_fold, model_name):
    kfold = KFold(n_splits=10, random_state=7, shuffle=True)
    results = cross_val_score(ml_model, x_fold, y_fold, cv=kfold)
    print("Accuracies for K-Fold for "+model_name+" :", results)
    print("Mean Accuracy of K-Fold for "+model_name+" :", results.mean()*100.0)
    print(70*"=")

### Bag of words Models

In [4]:
# TFIDF vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tfidf_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Logistic Regression : [0.76122449 0.79387755 0.77142857 0.75306122 0.74693878 0.77142857
 0.77346939 0.76530612 0.75918367 0.75918367]
Mean Accuracy of K-Fold for Logistic Regression : 76.55102040816327
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.58571429 0.62244898 0.60816327 0.6        0.59183673 0.62040816
 0.60204082 0.58979592 0.62653061 0.58163265]
Mean Accuracy of K-Fold for KNN Model : 60.285714285714285
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.54285714 0.59795918 0.56326531 0.58367347 0.58979592 0.55918367
 0.56734694 0.56122449 0.63061224 0.56326531]
Mean Accuracy of K-Fold for KNN Model : 57.59183673469387
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.55510204 0.59591837 0.55714286 0.57346939 0.56938776 0.57142857
 0.55918367 0.56122449 0.61428571 0.55102041]
Mean Accuracy of K-Fold for KNN Model : 57.08163265306122
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.53265306 0.5755102

Accuracies for K-Fold for Decision Tree : [0.6122449  0.56938776 0.58367347 0.55714286 0.57346939 0.53673469
 0.53265306 0.54489796 0.58571429 0.56122449]
Mean Accuracy of K-Fold for Decision Tree : 56.57142857142857
Decision Tree with 16 max_depth
Accuracies for K-Fold for Decision Tree : [0.60816327 0.5755102  0.58571429 0.56122449 0.57755102 0.53877551
 0.54897959 0.54693878 0.59591837 0.57346939]
Mean Accuracy of K-Fold for Decision Tree : 57.12244897959183
Decision Tree with 17 max_depth
Accuracies for K-Fold for Decision Tree : [0.61428571 0.58367347 0.59387755 0.55714286 0.57755102 0.54285714
 0.54693878 0.60816327 0.59591837 0.58163265]
Mean Accuracy of K-Fold for Decision Tree : 58.0204081632653
Decision Tree with 18 max_depth
Accuracies for K-Fold for Decision Tree : [0.63061224 0.60408163 0.59795918 0.56938776 0.58979592 0.56122449
 0.55102041 0.6122449  0.60408163 0.58367347]
Mean Accuracy of K-Fold for Decision Tree : 59.04081632653061
Decision Tree with 19 max_depth
Accur

Accuracies for K-Fold for Multinomial Naive Bayes : [0.71836735 0.77959184 0.73265306 0.68571429 0.71632653 0.7122449
 0.71020408 0.70408163 0.72857143 0.68979592]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 71.77551020408163


In [5]:
# Count Vectorizer vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//cv_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Logistic Regression : [0.75102041 0.78979592 0.78163265 0.75306122 0.75714286 0.7755102
 0.78979592 0.75714286 0.76122449 0.76326531]
Mean Accuracy of K-Fold for Logistic Regression : 76.79591836734694
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.59183673 0.63877551 0.62040816 0.63265306 0.61428571 0.63877551
 0.61020408 0.60612245 0.64489796 0.62653061]
Mean Accuracy of K-Fold for KNN Model : 62.24489795918367
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.57959184 0.6244898  0.56530612 0.58367347 0.60204082 0.60204082
 0.60204082 0.60408163 0.64693878 0.5877551 ]
Mean Accuracy of K-Fold for KNN Model : 59.97959183673469
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.56530612 0.65306122 0.59183673 0.59387755 0.6244898  0.6122449
 0.60408163 0.61428571 0.63877551 0.58367347]
Mean Accuracy of K-Fold for KNN Model : 60.81632653061225
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.54489796 0.62857143 0

Accuracies for K-Fold for Decision Tree : [0.59795918 0.58367347 0.57142857 0.55306122 0.58367347 0.5755102
 0.57142857 0.61428571 0.61632653 0.59591837]
Mean Accuracy of K-Fold for Decision Tree : 58.63265306122448
Decision Tree with 16 max_depth
Accuracies for K-Fold for Decision Tree : [0.6        0.6        0.57755102 0.55918367 0.59183673 0.57755102
 0.56938776 0.61836735 0.6244898  0.6       ]
Mean Accuracy of K-Fold for Decision Tree : 59.183673469387756
Decision Tree with 17 max_depth
Accuracies for K-Fold for Decision Tree : [0.60204082 0.6122449  0.57346939 0.56530612 0.59183673 0.59183673
 0.58367347 0.61836735 0.63061224 0.60612245]
Mean Accuracy of K-Fold for Decision Tree : 59.75510204081633
Decision Tree with 18 max_depth
Accuracies for K-Fold for Decision Tree : [0.60612245 0.62244898 0.5877551  0.57142857 0.60204082 0.58979592
 0.58163265 0.63469388 0.63265306 0.61836735]
Mean Accuracy of K-Fold for Decision Tree : 60.46938775510205
Decision Tree with 19 max_depth
Accu

Accuracies for K-Fold for Multinomial Naive Bayes : [0.71428571 0.79183673 0.7122449  0.68571429 0.7122449  0.70816327
 0.72857143 0.72653061 0.70816327 0.7       ]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 71.87755102040818


In [6]:
# Term Frequency vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//BagOfWords//tf_500_vectors.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,x_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Logistic Regression : [0.75510204 0.78163265 0.76122449 0.74081633 0.75102041 0.76938776
 0.78571429 0.75306122 0.76326531 0.75306122]
Mean Accuracy of K-Fold for Logistic Regression : 76.14285714285714
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.6122449  0.67755102 0.64285714 0.64081633 0.62244898 0.67142857
 0.62244898 0.62244898 0.65918367 0.65102041]
Mean Accuracy of K-Fold for KNN Model : 64.22448979591836
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.5877551  0.65306122 0.61632653 0.62040816 0.62244898 0.61632653
 0.59591837 0.57959184 0.65510204 0.57346939]
Mean Accuracy of K-Fold for KNN Model : 61.20408163265305
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.57142857 0.65306122 0.63265306 0.61836735 0.6244898  0.59795918
 0.59591837 0.58367347 0.65918367 0.56530612]
Mean Accuracy of K-Fold for KNN Model : 61.020408163265294
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.56938776 0.6428571

Accuracies for K-Fold for Decision Tree : [0.58979592 0.57142857 0.57755102 0.54081633 0.55102041 0.54081633
 0.53877551 0.57346939 0.60612245 0.56326531]
Mean Accuracy of K-Fold for Decision Tree : 56.53061224489796
Decision Tree with 16 max_depth
Accuracies for K-Fold for Decision Tree : [0.59183673 0.57142857 0.57959184 0.53673469 0.56326531 0.54897959
 0.54897959 0.57959184 0.59591837 0.56326531]
Mean Accuracy of K-Fold for Decision Tree : 56.79591836734693
Decision Tree with 17 max_depth
Accuracies for K-Fold for Decision Tree : [0.60204082 0.57346939 0.58367347 0.55306122 0.56326531 0.55510204
 0.54489796 0.5877551  0.60816327 0.58979592]
Mean Accuracy of K-Fold for Decision Tree : 57.61224489795919
Decision Tree with 18 max_depth
Accuracies for K-Fold for Decision Tree : [0.60408163 0.58367347 0.58367347 0.55306122 0.57142857 0.54693878
 0.56122449 0.58979592 0.60612245 0.58163265]
Mean Accuracy of K-Fold for Decision Tree : 57.81632653061226
Decision Tree with 19 max_depth
Accu

Accuracies for K-Fold for Multinomial Naive Bayes : [0.72857143 0.78979592 0.7244898  0.69591837 0.71632653 0.7244898
 0.73265306 0.7244898  0.72857143 0.70612245]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 72.71428571428571


### Sentence Transformer Models

In [7]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.7755102  0.79387755 0.77142857 0.79591837 0.78163265 0.80816327
 0.77755102 0.78163265 0.78163265 0.8       ]
Mean Accuracy of K-Fold for Logistic Regression : 78.67346938775512
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.70408163 0.72040816 0.66326531 0.67346939 0.66530612 0.72040816
 0.68367347 0.66326531 0.70408163 0.6755102 ]
Mean Accuracy of K-Fold for KNN Model : 68.73469387755101
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.69387755 0.72040816 0.68979592 0.69795918 0.7122449  0.7244898
 0.68163265 0.66122449 0.7        0.68979592]
Mean Accuracy of K-Fold for KNN Model : 69.71428571428572
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.69591837 0.71020408 0.7122449  0.68979592 0.72653061 0.73877551
 0.69183673 0.67755102 0.69795918 0.7       ]
Mean Accuracy of K-Fold for KNN Model : 70.40816326530613
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.68367347 0.7122449  

Accuracies for K-Fold for Decision Tree : [0.57755102 0.57346939 0.57142857 0.55510204 0.59591837 0.62857143
 0.60816327 0.57755102 0.59387755 0.57755102]
Mean Accuracy of K-Fold for Decision Tree : 58.59183673469387
Decision Tree with 16 max_depth
Accuracies for K-Fold for Decision Tree : [0.57755102 0.58571429 0.55714286 0.54489796 0.60204082 0.62244898
 0.6        0.5877551  0.58979592 0.57755102]
Mean Accuracy of K-Fold for Decision Tree : 58.448979591836746
Decision Tree with 17 max_depth
Accuracies for K-Fold for Decision Tree : [0.57959184 0.58979592 0.56122449 0.56326531 0.5755102  0.62040816
 0.60816327 0.59591837 0.59183673 0.55714286]
Mean Accuracy of K-Fold for Decision Tree : 58.42857142857143
Decision Tree with 18 max_depth
Accuracies for K-Fold for Decision Tree : [0.58979592 0.59387755 0.57755102 0.56734694 0.58163265 0.6122449
 0.60204082 0.5755102  0.57959184 0.56734694]
Mean Accuracy of K-Fold for Decision Tree : 58.46938775510204
Decision Tree with 19 max_depth
Accu

Accuracies for K-Fold for Multinomial Naive Bayes : [0.553 0.553 0.498 0.514 0.529 0.539 0.524 0.537 0.508 0.516]
Mean Accuracy of K-Fold for Multinomial Naive Bayes : 52.714285714285715


In [8]:
# GKB BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_gkb.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.569 0.549 0.563 0.557 0.522 0.573 0.545 0.547 0.545 0.553]
Mean Accuracy of K-Fold for Logistic Regression : 55.24489795918368
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.422 0.467 0.467 0.443 0.441 0.457 0.441 0.439 0.437 0.435]
Mean Accuracy of K-Fold for KNN Model : 44.48979591836735
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.435 0.502 0.469 0.459 0.439 0.465 0.455 0.455 0.469 0.445]
Mean Accuracy of K-Fold for KNN Model : 45.93877551020409
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.433 0.498 0.482 0.471 0.457 0.471 0.453 0.469 0.463 0.439]
Mean Accuracy of K-Fold for KNN Model : 46.367346938775505
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.429 0.473 0.476 0.463 0.463 0.465 0.455 0.473 0.467 0.443]
Mean Accuracy of K-Fold for KNN Model : 46.08163265306122
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.435 0.478 0.49  0.473 0.473 0.476 0.461 0.4

Accuracies for K-Fold for Decision Tree : [0.441 0.467 0.445 0.453 0.422 0.478 0.449 0.465 0.429 0.449]
Mean Accuracy of K-Fold for Decision Tree : 44.9795918367347
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.369 0.329 0.331 0.353 0.318 0.333 0.314 0.359 0.304 0.341]
Mean Accuracy of K-Fold for Random Forest : 33.51020408163265
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.427 0.367 0.367 0.373 0.357 0.365 0.341 0.394 0.327 0.369]
Mean Accuracy of K-Fold for Random Forest : 36.87755102040816
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.482 0.455 0.445 0.429 0.408 0.427 0.408 0.437 0.388 0.414]
Mean Accuracy of K-Fold for Random Forest : 42.91836734693878
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.492 0.476 0.478 0.459 0.433 0.455 0.437 0.457 0.427 0.435]
Mean Accuracy of K-Fold for Random Forest : 45.46938775510204
Random Forest with 5 max_depth
Accuracies for K

In [9]:
# N Distill BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_ndisbert.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.759 0.81  0.767 0.731 0.759 0.79  0.784 0.802 0.798 0.751]
Mean Accuracy of K-Fold for Logistic Regression : 77.51020408163265
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.665 0.637 0.635 0.663 0.665 0.665 0.653 0.657 0.661 0.639]
Mean Accuracy of K-Fold for KNN Model : 65.40816326530613
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.649 0.667 0.671 0.671 0.676 0.698 0.657 0.659 0.676 0.647]
Mean Accuracy of K-Fold for KNN Model : 66.71428571428571
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.684 0.649 0.678 0.676 0.663 0.704 0.669 0.641 0.655 0.655]
Mean Accuracy of K-Fold for KNN Model : 66.73469387755102
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.69  0.665 0.659 0.678 0.684 0.69  0.665 0.653 0.671 0.637]
Mean Accuracy of K-Fold for KNN Model : 66.91836734693878
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.69  0.667 0.661 0.671 0.68  0.696 0.663 0.66

Accuracies for K-Fold for Decision Tree : [0.561 0.549 0.555 0.559 0.531 0.51  0.541 0.598 0.547 0.508]
Mean Accuracy of K-Fold for Decision Tree : 54.59183673469388
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.318 0.322 0.273 0.286 0.278 0.276 0.306 0.322 0.288 0.243]
Mean Accuracy of K-Fold for Random Forest : 29.122448979591837
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.512 0.498 0.451 0.453 0.445 0.422 0.476 0.518 0.469 0.447]
Mean Accuracy of K-Fold for Random Forest : 46.918367346938766
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.563 0.596 0.551 0.567 0.545 0.535 0.598 0.578 0.557 0.559]
Mean Accuracy of K-Fold for Random Forest : 56.48979591836735
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.586 0.643 0.61  0.606 0.602 0.596 0.62  0.606 0.604 0.58 ]
Mean Accuracy of K-Fold for Random Forest : 60.530612244897966
Random Forest with 5 max_depth
Accuracies f

In [10]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_vbert.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

Accuracies for K-Fold for Logistic Regression : [0.776 0.802 0.8   0.788 0.796 0.784 0.784 0.784 0.82  0.81 ]
Mean Accuracy of K-Fold for Logistic Regression : 79.42857142857143
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.7   0.692 0.667 0.653 0.653 0.686 0.643 0.682 0.684 0.641]
Mean Accuracy of K-Fold for KNN Model : 67.0
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.712 0.722 0.688 0.663 0.655 0.684 0.639 0.7   0.686 0.653]
Mean Accuracy of K-Fold for KNN Model : 68.02040816326532
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.708 0.718 0.708 0.659 0.661 0.698 0.645 0.704 0.676 0.68 ]
Mean Accuracy of K-Fold for KNN Model : 68.57142857142857
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.712 0.72  0.71  0.655 0.663 0.712 0.653 0.704 0.667 0.673]
Mean Accuracy of K-Fold for KNN Model : 68.71428571428571
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.718 0.72  0.702 0.659 0.68  0.692 0.68  0.704 0.682 0.673

Accuracies for K-Fold for Decision Tree : [0.553 0.543 0.555 0.543 0.508 0.543 0.547 0.508 0.522 0.506]
Mean Accuracy of K-Fold for Decision Tree : 53.28571428571428
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.396 0.341 0.378 0.333 0.378 0.331 0.373 0.422 0.357 0.28 ]
Mean Accuracy of K-Fold for Random Forest : 35.87755102040816
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.58  0.52  0.494 0.467 0.504 0.476 0.512 0.522 0.467 0.48 ]
Mean Accuracy of K-Fold for Random Forest : 50.22448979591837
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.627 0.584 0.569 0.518 0.555 0.543 0.563 0.559 0.553 0.533]
Mean Accuracy of K-Fold for Random Forest : 56.04081632653062
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.643 0.627 0.598 0.557 0.586 0.596 0.6   0.598 0.588 0.567]
Mean Accuracy of K-Fold for Random Forest : 59.591836734693885
Random Forest with 5 max_depth
Accuracies for

In [11]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//gpt_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.731 0.778 0.798 0.747 0.771 0.769 0.751 0.749 0.8   0.78 ]
Mean Accuracy of K-Fold for Logistic Regression : 76.73469387755102
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.645 0.682 0.655 0.649 0.651 0.657 0.665 0.635 0.665 0.645]
Mean Accuracy of K-Fold for KNN Model : 65.48979591836734
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.661 0.692 0.673 0.649 0.649 0.665 0.665 0.649 0.667 0.682]
Mean Accuracy of K-Fold for KNN Model : 66.53061224489795
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.684 0.69  0.688 0.671 0.663 0.684 0.671 0.649 0.673 0.68 ]
Mean Accuracy of K-Fold for KNN Model : 67.53061224489795
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.673 0.69  0.69  0.671 0.684 0.671 0.682 0.639 0.663 0.686]
Mean Accuracy of K-Fold for KNN Model : 67.48979591836736
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.69  0.704 0.698 0.653 0.684 0.671 0.696 0.64

Accuracies for K-Fold for Decision Tree : [0.529 0.553 0.5   0.5   0.533 0.533 0.52  0.502 0.537 0.516]
Mean Accuracy of K-Fold for Decision Tree : 52.22448979591837
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.382 0.398 0.384 0.41  0.41  0.42  0.371 0.433 0.38  0.331]
Mean Accuracy of K-Fold for Random Forest : 39.183673469387756
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.547 0.508 0.496 0.461 0.471 0.486 0.49  0.514 0.461 0.473]
Mean Accuracy of K-Fold for Random Forest : 49.08163265306122
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.578 0.553 0.549 0.51  0.516 0.533 0.535 0.551 0.545 0.535]
Mean Accuracy of K-Fold for Random Forest : 54.04081632653062
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.606 0.567 0.59  0.547 0.549 0.576 0.582 0.584 0.578 0.584]
Mean Accuracy of K-Fold for Random Forest : 57.61224489795919
Random Forest with 5 max_depth
Accuracies for

In [12]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//xlm_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.751 0.773 0.776 0.786 0.761 0.778 0.788 0.776 0.78  0.769]
Mean Accuracy of K-Fold for Logistic Regression : 77.36734693877551
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.649 0.704 0.673 0.69  0.659 0.669 0.686 0.686 0.667 0.671]
Mean Accuracy of K-Fold for KNN Model : 67.55102040816328
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.651 0.698 0.667 0.647 0.673 0.714 0.708 0.678 0.678 0.657]
Mean Accuracy of K-Fold for KNN Model : 67.71428571428572
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.669 0.71  0.676 0.669 0.68  0.71  0.704 0.68  0.663 0.678]
Mean Accuracy of K-Fold for KNN Model : 68.38775510204081
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.678 0.704 0.68  0.69  0.663 0.724 0.714 0.678 0.671 0.661]
Mean Accuracy of K-Fold for KNN Model : 68.63265306122449
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.686 0.706 0.688 0.686 0.68  0.706 0.702 0.69

Accuracies for K-Fold for Decision Tree : [0.58  0.606 0.588 0.588 0.569 0.616 0.573 0.596 0.588 0.598]
Mean Accuracy of K-Fold for Decision Tree : 59.020408163265316
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.496 0.453 0.465 0.473 0.476 0.449 0.467 0.5   0.418 0.441]
Mean Accuracy of K-Fold for Random Forest : 46.38775510204082
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.573 0.541 0.518 0.545 0.527 0.527 0.543 0.561 0.522 0.512]
Mean Accuracy of K-Fold for Random Forest : 53.6938775510204
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.61  0.596 0.559 0.602 0.578 0.569 0.584 0.627 0.565 0.555]
Mean Accuracy of K-Fold for Random Forest : 58.448979591836746
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.649 0.629 0.6   0.618 0.614 0.602 0.631 0.635 0.596 0.602]
Mean Accuracy of K-Fold for Random Forest : 61.755102040816325
Random Forest with 5 max_depth
Accuracies fo

### Fine Tuned Transformers Models

In [13]:
# BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//bert_base_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.694 0.753 0.665 0.716 0.708 0.69  0.72  0.706 0.729 0.696]
Mean Accuracy of K-Fold for Logistic Regression : 70.77551020408163
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.553 0.567 0.565 0.569 0.543 0.555 0.527 0.571 0.557 0.537]
Mean Accuracy of K-Fold for KNN Model : 55.448979591836725
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.551 0.59  0.563 0.559 0.547 0.573 0.567 0.547 0.569 0.555]
Mean Accuracy of K-Fold for KNN Model : 56.22448979591837
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.571 0.602 0.551 0.58  0.561 0.584 0.561 0.565 0.555 0.547]
Mean Accuracy of K-Fold for KNN Model : 56.775510204081634
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.571 0.62  0.578 0.563 0.571 0.58  0.571 0.571 0.563 0.559]
Mean Accuracy of K-Fold for KNN Model : 57.48979591836735
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.586 0.622 0.578 0.567 0.573 0.58  0.549 0.

Accuracies for K-Fold for Decision Tree : [0.41  0.494 0.427 0.467 0.463 0.431 0.451 0.451 0.473 0.404]
Mean Accuracy of K-Fold for Decision Tree : 44.714285714285715
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.2   0.276 0.261 0.267 0.261 0.257 0.229 0.235 0.245 0.227]
Mean Accuracy of K-Fold for Random Forest : 24.57142857142857
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.461 0.443 0.422 0.4   0.422 0.404 0.427 0.439 0.371 0.398]
Mean Accuracy of K-Fold for Random Forest : 41.87755102040816
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.488 0.488 0.447 0.433 0.461 0.422 0.443 0.443 0.42  0.441]
Mean Accuracy of K-Fold for Random Forest : 44.857142857142854
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.5   0.496 0.471 0.445 0.492 0.461 0.473 0.492 0.473 0.48 ]
Mean Accuracy of K-Fold for Random Forest : 47.83673469387756
Random Forest with 5 max_depth
Accuracies fo

In [14]:
# Hinglish BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//vbert_hinglish_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.676 0.722 0.694 0.688 0.673 0.676 0.694 0.68  0.692 0.667]
Mean Accuracy of K-Fold for Logistic Regression : 68.61224489795919
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.627 0.643 0.62  0.62  0.61  0.614 0.59  0.596 0.629 0.592]
Mean Accuracy of K-Fold for KNN Model : 61.408163265306136
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.639 0.649 0.633 0.631 0.624 0.612 0.606 0.61  0.647 0.604]
Mean Accuracy of K-Fold for KNN Model : 62.55102040816327
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.651 0.647 0.635 0.647 0.622 0.633 0.608 0.622 0.659 0.622]
Mean Accuracy of K-Fold for KNN Model : 63.469387755102034
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.647 0.659 0.633 0.645 0.614 0.62  0.606 0.639 0.653 0.627]
Mean Accuracy of K-Fold for KNN Model : 63.42857142857143
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.659 0.667 0.641 0.653 0.606 0.631 0.608 0.

Accuracies for K-Fold for Decision Tree : [0.498 0.48  0.455 0.471 0.482 0.484 0.482 0.461 0.486 0.473]
Mean Accuracy of K-Fold for Decision Tree : 47.714285714285715
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.441 0.402 0.402 0.386 0.402 0.353 0.386 0.441 0.378 0.367]
Mean Accuracy of K-Fold for Random Forest : 39.57142857142857
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.494 0.449 0.445 0.416 0.443 0.435 0.435 0.488 0.408 0.439]
Mean Accuracy of K-Fold for Random Forest : 44.51020408163265
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.527 0.498 0.506 0.473 0.484 0.471 0.471 0.498 0.457 0.463]
Mean Accuracy of K-Fold for Random Forest : 48.48979591836734
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.578 0.571 0.565 0.529 0.527 0.535 0.533 0.563 0.527 0.531]
Mean Accuracy of K-Fold for Random Forest : 54.57142857142857
Random Forest with 5 max_depth
Accuracies for

In [15]:
# GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_base_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.751 0.782 0.78  0.733 0.767 0.749 0.782 0.765 0.802 0.788]
Mean Accuracy of K-Fold for Logistic Regression : 76.9795918367347
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.516 0.492 0.461 0.488 0.467 0.455 0.465 0.461 0.535 0.471]
Mean Accuracy of K-Fold for KNN Model : 48.122448979591844
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.533 0.502 0.49  0.478 0.484 0.471 0.488 0.492 0.524 0.451]
Mean Accuracy of K-Fold for KNN Model : 49.12244897959185
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.527 0.51  0.488 0.498 0.502 0.476 0.492 0.488 0.514 0.467]
Mean Accuracy of K-Fold for KNN Model : 49.61224489795918
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.52  0.498 0.476 0.518 0.49  0.471 0.49  0.496 0.545 0.467]
Mean Accuracy of K-Fold for KNN Model : 49.71428571428571
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.545 0.502 0.48  0.522 0.482 0.492 0.516 0.50

Accuracies for K-Fold for Decision Tree : [0.553 0.516 0.518 0.514 0.49  0.524 0.492 0.524 0.518 0.463]
Mean Accuracy of K-Fold for Decision Tree : 51.142857142857146
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.408 0.371 0.355 0.329 0.351 0.339 0.378 0.418 0.347 0.333]
Mean Accuracy of K-Fold for Random Forest : 36.285714285714285
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.543 0.471 0.459 0.416 0.467 0.433 0.48  0.502 0.433 0.484]
Mean Accuracy of K-Fold for Random Forest : 46.87755102040816
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.602 0.551 0.524 0.5   0.539 0.52  0.549 0.543 0.516 0.535]
Mean Accuracy of K-Fold for Random Forest : 53.79591836734694
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.627 0.578 0.565 0.551 0.567 0.545 0.58  0.559 0.545 0.557]
Mean Accuracy of K-Fold for Random Forest : 56.73469387755101
Random Forest with 5 max_depth
Accuracies fo

In [16]:
# Hinglish GPT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.786 0.796 0.8   0.769 0.806 0.761 0.802 0.782 0.806 0.814]
Mean Accuracy of K-Fold for Logistic Regression : 79.22448979591836
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.488 0.498 0.455 0.457 0.488 0.486 0.484 0.478 0.494 0.476]
Mean Accuracy of K-Fold for KNN Model : 48.0204081632653
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.498 0.524 0.469 0.455 0.5   0.496 0.463 0.482 0.522 0.48 ]
Mean Accuracy of K-Fold for KNN Model : 48.897959183673464
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.506 0.518 0.5   0.467 0.524 0.486 0.476 0.498 0.524 0.476]
Mean Accuracy of K-Fold for KNN Model : 49.755102040816325
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.51  0.506 0.476 0.471 0.529 0.512 0.476 0.476 0.518 0.494]
Mean Accuracy of K-Fold for KNN Model : 49.67346938775511
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.51  0.522 0.482 0.471 0.516 0.52  0.49  0.4

Accuracies for K-Fold for Decision Tree : [0.533 0.516 0.504 0.537 0.496 0.514 0.471 0.51  0.516 0.486]
Mean Accuracy of K-Fold for Decision Tree : 50.83673469387755
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.257 0.3   0.29  0.359 0.359 0.294 0.241 0.416 0.276 0.269]
Mean Accuracy of K-Fold for Random Forest : 30.612244897959183
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.547 0.494 0.449 0.449 0.476 0.459 0.49  0.508 0.443 0.463]
Mean Accuracy of K-Fold for Random Forest : 47.77551020408163
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.58  0.549 0.514 0.529 0.539 0.488 0.531 0.535 0.49  0.522]
Mean Accuracy of K-Fold for Random Forest : 52.755102040816325
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.622 0.592 0.561 0.537 0.569 0.557 0.551 0.578 0.557 0.578]
Mean Accuracy of K-Fold for Random Forest : 57.0204081632653
Random Forest with 5 max_depth
Accuracies for

In [17]:
# XLM vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//xlm_base_finetuned_vectorized_kabita_dataset.csv")

# Logistic regression
tv_lr_model = LogisticRegression(max_iter=1000)
ml_training(tv_lr_model, x_df, labels, "Logistic Regression")

# KNN Model
neighbors_list = [3, 4, 5, 6, 7, 8]
for x in neighbors_list:
    print("KNN with",x,"Neighbors")
    tv_knn_model = KNeighborsClassifier(n_neighbors=x)
    ml_training(tv_knn_model,x_df, labels,"KNN Model")
    
# Gaussian Naive Bayes
tv_gnb_model = GaussianNB()
ml_training(tv_gnb_model,x_df, labels,"Gaussian Naive Bayes")

# Bernoulli Naive Bayes
tv_bnb_model = BernoulliNB()
ml_training(tv_bnb_model,x_df, labels,"Bernoulli Naive Bayes")

# Support Vector Machine Classifier
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']
for a_kernel in svm_kernels:
    print("Working on SVM Kernal:", a_kernel)
    tv_svm_model = svm.SVC(kernel=a_kernel)
    ml_training(tv_svm_model,x_df, labels,"SVM")

# Decision Tree Classifier
for x in range(1,21):
    print("Decision Tree with",x,"max_depth")
    tv_dt_model = DecisionTreeClassifier(random_state=3, max_depth=x)
    ml_training(tv_dt_model,x_df, labels,"Decision Tree")
    
# Random Forest
for x in range(1,21):
    print("Random Forest with",x,"max_depth")
    tv_rf_model = RandomForestClassifier(max_depth=x, random_state=3)
    ml_training(tv_rf_model,x_df, labels,"Random Forest")
    
# scaling using MinMax scaler
mms_scale=MinMaxScaler(feature_range=(0,10))
m_df=mms_scale.fit_transform(x_df)
np.set_printoptions(precision=3)
# Multinomial Naive Bayes
tv_mnb_model = MultinomialNB()
ml_training(tv_mnb_model,m_df, labels,"Multinomial Naive Bayes")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Accuracies for K-Fold for Logistic Regression : [0.549 0.52  0.498 0.533 0.488 0.524 0.537 0.518 0.522 0.527]
Mean Accuracy of K-Fold for Logistic Regression : 52.16326530612244
KNN with 3 Neighbors
Accuracies for K-Fold for KNN Model : [0.473 0.476 0.439 0.492 0.427 0.439 0.453 0.441 0.512 0.461]
Mean Accuracy of K-Fold for KNN Model : 46.12244897959183
KNN with 4 Neighbors
Accuracies for K-Fold for KNN Model : [0.48  0.486 0.465 0.508 0.451 0.447 0.449 0.455 0.506 0.471]
Mean Accuracy of K-Fold for KNN Model : 47.183673469387756
KNN with 5 Neighbors
Accuracies for K-Fold for KNN Model : [0.48  0.504 0.48  0.514 0.463 0.471 0.461 0.449 0.51  0.469]
Mean Accuracy of K-Fold for KNN Model : 48.0204081632653
KNN with 6 Neighbors
Accuracies for K-Fold for KNN Model : [0.473 0.492 0.492 0.508 0.455 0.473 0.473 0.453 0.512 0.473]
Mean Accuracy of K-Fold for KNN Model : 48.061224489795926
KNN with 7 Neighbors
Accuracies for K-Fold for KNN Model : [0.473 0.504 0.488 0.522 0.455 0.473 0.473 0.4

Accuracies for K-Fold for Decision Tree : [0.363 0.416 0.369 0.378 0.349 0.38  0.394 0.371 0.402 0.339]
Mean Accuracy of K-Fold for Decision Tree : 37.612244897959194
Random Forest with 1 max_depth
Accuracies for K-Fold for Random Forest : [0.2   0.265 0.239 0.251 0.271 0.255 0.218 0.249 0.253 0.229]
Mean Accuracy of K-Fold for Random Forest : 24.306122448979593
Random Forest with 2 max_depth
Accuracies for K-Fold for Random Forest : [0.347 0.371 0.333 0.333 0.363 0.335 0.367 0.355 0.339 0.296]
Mean Accuracy of K-Fold for Random Forest : 34.38775510204082
Random Forest with 3 max_depth
Accuracies for K-Fold for Random Forest : [0.355 0.441 0.394 0.351 0.384 0.365 0.394 0.404 0.371 0.337]
Mean Accuracy of K-Fold for Random Forest : 37.95918367346939
Random Forest with 4 max_depth
Accuracies for K-Fold for Random Forest : [0.4   0.445 0.4   0.371 0.388 0.39  0.416 0.447 0.402 0.367]
Mean Accuracy of K-Fold for Random Forest : 40.26530612244898
Random Forest with 5 max_depth
Accuracies fo