# Imports

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder, PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
import numpy as np
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV

# Data Bases

In [None]:
#Import the training data
train = pd.read_csv('https://github.com/tcastrom/CEFR-French-/raw/main/Data/training_data.csv')
train.set_index('id', inplace=True)
display(train.head())

#Import the unlabel data
unlabel = pd.read_csv('https://github.com/tcastrom/CEFR-French-/raw/main/Data/unlabelled_test_data.csv')
unlabel.set_index('id', inplace=True)
display(unlabel.head())

# Encoding of the Data

In [None]:
# Encode the difficulty column using LabelEncoder
le = LabelEncoder()
train['difficulty'] = le.fit_transform(train['difficulty'])

# Display how the difficulty has been encoded and the values associated with each encoding
display(le.classes_)
print(f'The values [0, 1, 2, 3, 4, 5] are represented by{le.inverse_transform([0, 1, 2, 3, 4, 5])}')

In [None]:
# Create X and y
X = train['sentence']
y = train['difficulty']

# Split the data into train and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Create a TfidfVectorizer object
vectorizer = TfidfVectorizer()

# Fit and transform the vectorizer on the training data
X_train = vectorizer.fit_transform(X_train)

# Transform the vectorizer on the testing data
X_test = vectorizer.transform(X_test)

# Linear Regression

In [None]:
# Do a linear regression 
linear_regression = LinearRegression()
linear_regression.fit(X_train, y_train)

# Predict the difficulty on the testing data
y_pred = linear_regression.predict(X_test)

# Round the predictions to the nearest integer and make sure the values are between 0 and 5
y_pred = np.round(y_pred)
y_pred[y_pred < 0] = 0
y_pred[y_pred > 5] = 5

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred.astype(int))

# Calculate the accuracy
accuracy_reg_lin = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_reg_lin}')

# Calculate the Precision 
precision_reg_lin = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_reg_lin}')

# Calculate the Recall
recall_reg_lin = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_reg_lin}')

# Calculate the F1 Score
f1_reg_lin = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_reg_lin}')

# Logistic  Regression with cross validation

In [None]:
# Do a logistic regression woth cross validation
from sklearn.linear_model import LogisticRegressionCV

# Create a logistic regression object
logistic_regression = LogisticRegressionCV(cv=5, max_iter=1000)

# Fit the model on the training data
logistic_regression.fit(X_train, y_train)

# Predict the difficulty on the testing data
y_pred = logistic_regression.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_log_reg = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_log_reg}')

# Calculate the Precision
precision_log_reg = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_log_reg}')

# Calculate the Recall
recall_log_reg = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_log_reg}')

# Calculate the F1 Score
f1_log_reg = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_log_reg}')




# KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Define the parameter grid
param_grid_knn = {
    'n_neighbors': range(1, 11),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

# Create a base model
knn = KNeighborsClassifier()

# Instantiate the grid search model
grid_search_knn = GridSearchCV(estimator=knn, param_grid=param_grid_knn, cv=5)

# Fit the grid search to the data
grid_search_knn.fit(X_train, y_train)

# Get the best parameters
best_params_knn = grid_search_knn.best_params_

print(f"The best parameters are : {best_params_knn}")

In [None]:
# Retrieve the best model
best_model_knn = grid_search_knn.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_knn.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_knn = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_knn}')

# Calculate the Precision
precision_knn = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_knn}')

# Calculate the Recall
recall_knn = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_knn}')

# Calculate the F1 Score
f1_knn = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_knn}')

In [None]:
# Decision Tree with GridSearchCV
from sklearn.tree import DecisionTreeClassifier

# Define the parameter grid
param_grid_dt = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': range(1, 11),
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5, 10],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_leaf_nodes': [None, 10, 20, 30]
}

# Create a base model
dt = DecisionTreeClassifier()

# Instantiate the grid search model
grid_search_dt = GridSearchCV(estimator=dt, param_grid=param_grid_dt, cv=5)

# Fit the grid search to the data
grid_search_dt.fit(X_train, y_train)

# Get the best parameters
best_params_dt = grid_search_dt.best_params_

print(f"The best parameters are : {best_params_dt}")

In [None]:
# Retrieve the best model
best_model_dt = grid_search_dt.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_dt.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_dt = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_dt}')

# Calculate the Precision
precision_dt = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_dt}')

# Calculate the Recall
recall_dt = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_dt}')

# Calculate the F1 Score
f1_dt = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_dt}')


# Random forest

In [None]:
# Random Forest with GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Define the parameter grid
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'criterion': ['gini', 'entropy'],
    'max_depth': range(1, 11),
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 5, 10],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Create a base model
rf = RandomForestClassifier()

# Instantiate the grid search model
grid_search_rf = GridSearchCV(estimator=rf, param_grid=param_grid_rf, cv=5)

# Fit the grid search to the data
grid_search_rf.fit(X_train, y_train)

# Get the best parameters
best_params_rf = grid_search_rf.best_params_
print(f"The best parameters are : {best_params_rf}")

In [None]:
# Retrieve the best model
best_model_rf = grid_search_rf.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_rf.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_rf = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_rf}')

# Calculate the Precision
precision_rf = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_rf}')

# Calculate the Recall
recall_rf = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_rf}')

# Calculate the F1 Score
f1_rf = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_rf}')

# Suport Vector Machine

In [None]:
# Support Vector Machine with GridSearchCV
from sklearn.svm import SVC

# Define the parameter grid
param_grid_svm = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2, 3, 4],
    'gamma': ['scale', 'auto']
}

# Create a base model
svm = SVC()

# Instantiate the grid search model
grid_search_svm = GridSearchCV(estimator=svm, param_grid=param_grid_svm, cv=5)

# Fit the grid search to the data
grid_search_svm.fit(X_train, y_train)

# Get the best parameters
best_params_svm = grid_search_svm.best_params_
print(f"The best parameters are : {best_params_svm}")

In [None]:
# Retrieve the best model
best_model_svm = grid_search_svm.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_svm.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_svm = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_svm}')

# Calculate the Precision
precision_svm = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_svm}')

# Calculate the Recall
recall_svm = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_svm}')

# Calculate the F1 Score
f1_svm = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_svm}')


# Naive Bayes

In [None]:
# Naive Bayes with GridSearchCV
from sklearn.naive_bayes import MultinomialNB

# Define the parameter grid
param_grid_nb = {
    'alpha': [0.1, 1, 10],
    'fit_prior': [True, False]
}

# Create a base model
nb = MultinomialNB()

# Instantiate the grid search model
grid_search_nb = GridSearchCV(estimator=nb, param_grid=param_grid_nb, cv=5)

# Fit the grid search to the data
grid_search_nb.fit(X_train, y_train)

# Get the best parameters
best_params_nb = grid_search_nb.best_params_
print(f"The best parameters are : {best_params_nb}")


In [None]:
# Retrieve the best model
best_model_nb = grid_search_nb.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_nb.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_nb = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_nb}')

# Calculate the Precision
precision_nb = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_nb}')

# Calculate the Recall
recall_nb = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_nb}')

# Calculate the F1 Score
f1_nb = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_nb}')


# Stochastic Gradient Descent

In [None]:
# Stochastic Gradient Descent with GridSearchCV
from sklearn.linear_model import SGDClassifier

# Define the parameter grid
param_grid_sgd = {
    'loss': ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron'],
    'penalty': ['l2', 'l1', 'elasticnet'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'max_iter': [1000, 2000, 5000]
}

# Create a base model
sgd = SGDClassifier()

# Instantiate the grid search model
grid_search_sgd = GridSearchCV(estimator=sgd, param_grid=param_grid_sgd, cv=5)

# Fit the grid search to the data
grid_search_sgd.fit(X_train, y_train)

# Get the best parameters
best_params_sgd = grid_search_sgd.best_params_
print(f"The best parameters are : {best_params_sgd}")

In [None]:
# Retrieve the best model
best_model_sgd = grid_search_sgd.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_sgd.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_sgd = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_sgd}')

# Calculate the Precision
precision_sgd = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_sgd}')

# Calculate the Recall
recall_sgd = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_sgd}')

# Calculate the F1 Score
f1_sgd = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_sgd}')

# Gradient Boosting Machines (GBM)

In [None]:
# Gradient Boosting with GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier

# Define the parameter grid
param_grid_gb = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1],
    'max_depth': range(1, 11),
    'subsample': [0.5, 0.75, 1],
    'max_features': ['auto', 'sqrt', 'log2']
}

# Create a base model
gb = GradientBoostingClassifier()

# Instantiate the grid search model
grid_search_gb = GridSearchCV(estimator=gb, param_grid=param_grid_gb, cv=5)

# Fit the grid search to the data
grid_search_gb.fit(X_train, y_train)

# Get the best parameters
best_params_gb = grid_search_gb.best_params_
print(f"The best parameters are : {best_params_gb}")

In [None]:
# Retrieve the best model
best_model_gb = grid_search_gb.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_gb.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_gb = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_gb}')

# Calculate the Precision
precision_gb = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_gb}')

# Calculate the Recall
recall_gb = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_gb}')

# Calculate the F1 Score
f1_gb = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_gb}')

# AdaBoost

In [None]:
# AdaBoost with GridSearchCV
from sklearn.ensemble import AdaBoostClassifier

# Define the parameter grid
param_grid_ab = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1],
    'algorithm': ['SAMME', 'SAMME.R']
}

# Create a base model
ab = AdaBoostClassifier()

# Instantiate the grid search model
grid_search_ab = GridSearchCV(estimator=ab, param_grid=param_grid_ab, cv=5)

# Fit the grid search to the data
grid_search_ab.fit(X_train, y_train)

# Get the best parameters
best_params_ab = grid_search_ab.best_params_
print(f"The best parameters are : {best_params_ab}")

In [None]:
# Retrieve the best model
best_model_ab = grid_search_ab.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_ab.predict(X_test)

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_ab = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_ab}')

# Calculate the Precision
precision_ab = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_ab}')

# Calculate the Recall
recall_ab = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_ab}')

# Calculate the F1 Score
f1_ab = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_ab}')

# Quadratic Discriminant Analysis (QDA)

In [None]:
# Quadratic Discriminant Analysis with GridSearchCV
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

# Define the parameter grid
param_grid_qda = {
    'reg_param': [0.0, 0.1, 0.5, 1.0]
}

# Create a base model
qda = QuadraticDiscriminantAnalysis()

# Instantiate the grid search model
grid_search_qda = GridSearchCV(estimator=qda, param_grid=param_grid_qda, cv=5)

# Fit the grid search to the data
grid_search_qda.fit(X_train.toarray(), y_train)

# Get the best parameters
best_params_qda = grid_search_qda.best_params_
print(f"The best parameters are : {best_params_qda}")

In [None]:
# Retrieve the best model
best_model_qda = grid_search_qda.best_estimator_

# Predict the difficulty on the testing data
y_pred = best_model_qda.predict(X_test.toarray())

# Reverse the encoding of the difficulty
y_test_unencoded = le.inverse_transform(y_test)
y_pred_unencoded = le.inverse_transform(y_pred)

# Calculate the accuracy
accuracy_qda = accuracy_score(y_test_unencoded, y_pred_unencoded)
print(f'Accuracy: {accuracy_qda}')

# Calculate the Precision
precision_qda = precision_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Precision: {precision_qda}')

# Calculate the Recall
recall_qda = recall_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'Recall: {recall_qda}')

# Calculate the F1 Score
f1_qda = f1_score(y_test_unencoded, y_pred_unencoded, average='weighted')
print(f'F1 Score: {f1_qda}')

# Final Table

In [None]:
# Compile all the results into a Table. The columns are : Model, Accuracy, Precision, Recall, F1 Score 
from IPython.display import display


results = pd.DataFrame({
    'Model': ['Linear Regression', 'Logistic Regression', 'K-Nearest Neighbors', 'Decision Tree', 'Random Forest', 'Support Vector Machine', 'Naive Bayes', 'Stochastic Gradient Descent', 'Gradient Boosting', 'AdaBoost', 'Quadratic Discriminant Analysis'],
    'Accuracy': [accuracy_reg_lin, accuracy_log_reg, accuracy_knn, accuracy_dt, accuracy_rf, accuracy_svm, accuracy_nb, accuracy_sgd, accuracy_gb, accuracy_ab, accuracy_qda],
    'Precision': [precision_reg_lin, precision_log_reg, precision_knn, precision_dt, precision_rf, precision_svm, precision_nb, precision_sgd, precision_gb, precision_ab, precision_qda],
    'Recall': [recall_reg_lin, recall_log_reg, recall_knn, recall_dt, recall_rf, recall_svm, recall_nb, recall_sgd, recall_gb, recall_ab, recall_qda],
    'F1 Score': [f1_reg_lin, f1_log_reg, f1_knn, f1_dt, f1_rf, f1_svm, f1_nb, f1_sgd, f1_gb, f1_ab, f1_qda]
})

#Round the values to 2 decimal places
results = results.round(2)

display(results)

# Function to format numbers to 2 significant figures
def format_sigfig(x):
    if isinstance(x, float):
        return '{:.2g}'.format(x)
    return x

# Apply formatting function to the DataFrame
formatted_results = results.applymap(format_sigfig)


# Use the .style attribute for better formatting
styled_results = formatted_results.style.set_table_styles([
    {'selector': 'thead th', 'props': [('background-color', 'grey'), ('color', 'black'), ('font-weight', 'bold')]},
    {'selector': 'tbody tr:nth-child(even)', 'props': [('background-color', '#f2f2f2'),('color', 'black'), ('font-weight', 'bold')]},
    {'selector': 'tbody tr:nth-child(odd)', 'props': [('background-color', 'white'),('color', 'black'), ('font-weight', 'bold')]}
]).set_properties(**{'text-align': 'center'}).set_caption("Model Performance Comparison")

display(styled_results)

<style type="text/css">
#T_f4a83 thead th {
  background-color: grey;
  color: black;
  font-weight: bold;
}
#T_f4a83 tbody tr:nth-child(even) {
  background-color: #f2f2f2;
  color: black;
  font-weight: bold;
}
#T_f4a83 tbody tr:nth-child(odd) {
  background-color: white;
  color: black;
  font-weight: bold;
}
#T_f4a83_row0_col0, #T_f4a83_row0_col1, #T_f4a83_row0_col2, #T_f4a83_row0_col3, #T_f4a83_row0_col4, #T_f4a83_row1_col0, #T_f4a83_row1_col1, #T_f4a83_row1_col2, #T_f4a83_row1_col3, #T_f4a83_row1_col4, #T_f4a83_row2_col0, #T_f4a83_row2_col1, #T_f4a83_row2_col2, #T_f4a83_row2_col3, #T_f4a83_row2_col4, #T_f4a83_row3_col0, #T_f4a83_row3_col1, #T_f4a83_row3_col2, #T_f4a83_row3_col3, #T_f4a83_row3_col4, #T_f4a83_row4_col0, #T_f4a83_row4_col1, #T_f4a83_row4_col2, #T_f4a83_row4_col3, #T_f4a83_row4_col4, #T_f4a83_row5_col0, #T_f4a83_row5_col1, #T_f4a83_row5_col2, #T_f4a83_row5_col3, #T_f4a83_row5_col4, #T_f4a83_row6_col0, #T_f4a83_row6_col1, #T_f4a83_row6_col2, #T_f4a83_row6_col3, #T_f4a83_row6_col4, #T_f4a83_row7_col0, #T_f4a83_row7_col1, #T_f4a83_row7_col2, #T_f4a83_row7_col3, #T_f4a83_row7_col4, #T_f4a83_row8_col0, #T_f4a83_row8_col1, #T_f4a83_row8_col2, #T_f4a83_row8_col3, #T_f4a83_row8_col4, #T_f4a83_row9_col0, #T_f4a83_row9_col1, #T_f4a83_row9_col2, #T_f4a83_row9_col3, #T_f4a83_row9_col4, #T_f4a83_row10_col0, #T_f4a83_row10_col1, #T_f4a83_row10_col2, #T_f4a83_row10_col3, #T_f4a83_row10_col4 {
  text-align: center;
}
</style>
<table id="T_f4a83">
  <caption>Model Performance Comparison</caption>
  <thead>
    <tr>
      <th class="blank level0" >&nbsp;</th>
      <th id="T_f4a83_level0_col0" class="col_heading level0 col0" >Model</th>
      <th id="T_f4a83_level0_col1" class="col_heading level0 col1" >Accuracy</th>
      <th id="T_f4a83_level0_col2" class="col_heading level0 col2" >Precision</th>
      <th id="T_f4a83_level0_col3" class="col_heading level0 col3" >Recall</th>
      <th id="T_f4a83_level0_col4" class="col_heading level0 col4" >F1 Score</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th id="T_f4a83_level0_row0" class="row_heading level0 row0" >0</th>
      <td id="T_f4a83_row0_col0" class="data row0 col0" >Linear Regression</td>
      <td id="T_f4a83_row0_col1" class="data row0 col1" >0.26</td>
      <td id="T_f4a83_row0_col2" class="data row0 col2" >0.25</td>
      <td id="T_f4a83_row0_col3" class="data row0 col3" >0.26</td>
      <td id="T_f4a83_row0_col4" class="data row0 col4" >0.25</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row1" class="row_heading level0 row1" >1</th>
      <td id="T_f4a83_row1_col0" class="data row1 col0" >Logistic Regression</td>
      <td id="T_f4a83_row1_col1" class="data row1 col1" >0.47</td>
      <td id="T_f4a83_row1_col2" class="data row1 col2" >0.47</td>
      <td id="T_f4a83_row1_col3" class="data row1 col3" >0.47</td>
      <td id="T_f4a83_row1_col4" class="data row1 col4" >0.47</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row2" class="row_heading level0 row2" >2</th>
      <td id="T_f4a83_row2_col0" class="data row2 col0" >K-Nearest Neighbors</td>
      <td id="T_f4a83_row2_col1" class="data row2 col1" >0.36</td>
      <td id="T_f4a83_row2_col2" class="data row2 col2" >0.4</td>
      <td id="T_f4a83_row2_col3" class="data row2 col3" >0.36</td>
      <td id="T_f4a83_row2_col4" class="data row2 col4" >0.35</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row3" class="row_heading level0 row3" >3</th>
      <td id="T_f4a83_row3_col0" class="data row3 col0" >Decision Tree</td>
      <td id="T_f4a83_row3_col1" class="data row3 col1" >0.22</td>
      <td id="T_f4a83_row3_col2" class="data row3 col2" >0.24</td>
      <td id="T_f4a83_row3_col3" class="data row3 col3" >0.22</td>
      <td id="T_f4a83_row3_col4" class="data row3 col4" >0.16</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row4" class="row_heading level0 row4" >4</th>
      <td id="T_f4a83_row4_col0" class="data row4 col0" >Random Forest</td>
      <td id="T_f4a83_row4_col1" class="data row4 col1" >0.38</td>
      <td id="T_f4a83_row4_col2" class="data row4 col2" >0.37</td>
      <td id="T_f4a83_row4_col3" class="data row4 col3" >0.38</td>
      <td id="T_f4a83_row4_col4" class="data row4 col4" >0.34</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row5" class="row_heading level0 row5" >5</th>
      <td id="T_f4a83_row5_col0" class="data row5 col0" >Support Vector Machine</td>
      <td id="T_f4a83_row5_col1" class="data row5 col1" >0.48</td>
      <td id="T_f4a83_row5_col2" class="data row5 col2" >0.48</td>
      <td id="T_f4a83_row5_col3" class="data row5 col3" >0.48</td>
      <td id="T_f4a83_row5_col4" class="data row5 col4" >0.48</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row6" class="row_heading level0 row6" >6</th>
      <td id="T_f4a83_row6_col0" class="data row6 col0" >Naive Bayes</td>
      <td id="T_f4a83_row6_col1" class="data row6 col1" >0.45</td>
      <td id="T_f4a83_row6_col2" class="data row6 col2" >0.47</td>
      <td id="T_f4a83_row6_col3" class="data row6 col3" >0.45</td>
      <td id="T_f4a83_row6_col4" class="data row6 col4" >0.45</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row7" class="row_heading level0 row7" >7</th>
      <td id="T_f4a83_row7_col0" class="data row7 col0" >Stochastic Gradient Descent</td>
      <td id="T_f4a83_row7_col1" class="data row7 col1" >0.46</td>
      <td id="T_f4a83_row7_col2" class="data row7 col2" >0.46</td>
      <td id="T_f4a83_row7_col3" class="data row7 col3" >0.46</td>
      <td id="T_f4a83_row7_col4" class="data row7 col4" >0.45</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row8" class="row_heading level0 row8" >8</th>
      <td id="T_f4a83_row8_col0" class="data row8 col0" >Gradient Boosting</td>
      <td id="T_f4a83_row8_col1" class="data row8 col1" >0.44</td>
      <td id="T_f4a83_row8_col2" class="data row8 col2" >0.44</td>
      <td id="T_f4a83_row8_col3" class="data row8 col3" >0.44</td>
      <td id="T_f4a83_row8_col4" class="data row8 col4" >0.43</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row9" class="row_heading level0 row9" >9</th>
      <td id="T_f4a83_row9_col0" class="data row9 col0" >AdaBoost</td>
      <td id="T_f4a83_row9_col1" class="data row9 col1" >0.35</td>
      <td id="T_f4a83_row9_col2" class="data row9 col2" >0.36</td>
      <td id="T_f4a83_row9_col3" class="data row9 col3" >0.35</td>
      <td id="T_f4a83_row9_col4" class="data row9 col4" >0.35</td>
    </tr>
    <tr>
      <th id="T_f4a83_level0_row10" class="row_heading level0 row10" >10</th>
      <td id="T_f4a83_row10_col0" class="data row10 col0" >Quadratic Discriminant Analysis</td>
      <td id="T_f4a83_row10_col1" class="data row10 col1" >0.17</td>
      <td id="T_f4a83_row10_col2" class="data row10 col2" >0.2</td>
      <td id="T_f4a83_row10_col3" class="data row10 col3" >0.17</td>
      <td id="T_f4a83_row10_col4" class="data row10 col4" >0.15</td>
    </tr>
  </tbody>
</table>
