In [1]:
# data analysis and wrangling
import pandas as pd
import numpy as np
import random as rnd

# visualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# machine learning
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier

In [2]:
import pandas as pd

# Read the training dataset
train_data = pd.read_csv('train.csv')

# Read the test dataset
test_data = pd.read_csv('test.csv')

# Display metadata for the training dataset
print("Training Dataset Summary:")
train_data.info()

# Preview the first few entries in the training dataset
print("\nSample Records from Training Data:")
print(train_data.head())

# Display metadata for the test dataset
print("\nTest Dataset Summary:")
test_data.info()

# Preview the first few entries in the test dataset
print("\nSample Records from Test Data:")
print(test_data.head())


Training Dataset Summary:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB

Sample Records from Training Data:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4 

In [6]:
def get_title(name):
    title_search = re.search(' ([A-Za-z]+)\.', name)
    if title_search:
        return title_search.group(1)
    return ""


def map_title(title):
    title_mapping = {
        "Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4,
        "Dr": 5, "Rev": 5, "Col": 5, "Major": 5, "Mlle": 2,
        "Countess": 5, "Ms": 2, "Lady": 5, "Jonkheer": 5,
        "Don": 5, "Dona": 5, "Mme": 3, "Capt": 5, "Sir": 5
    }
    return title_mapping.get(title, 5) 

  title_search = re.search(' ([A-Za-z]+)\.', name)


In [7]:
import re

# Extract title from the 'Name' column
print("Extracting titles from names...")
train_data["Title"] = train_data["Name"].apply(get_title)
test_data["Title"] = test_data["Name"].apply(get_title)

# Display unique titles found in each dataset
print(f"Unique titles in training data: {train_data['Title'].unique()}")
print(f"Unique titles in test data: {test_data['Title'].unique()}")

# Convert extracted titles to numerical categories
print("\nMapping titles to numeric values...")
train_data["Title"] = train_data["Title"].apply(map_title)
test_data["Title"] = test_data["Title"].apply(map_title)

# Preview the first few entries to ensure correct processing
print("\nTraining data sample with updated titles:")
print(train_data[["Name", "Title"]].head())

print("\nTest data sample with updated titles:")
print(test_data[["Name", "Title"]].head())

# Display the distribution of title categories
print("\nTitle distribution in training data:")
print(train_data["Title"].value_counts().sort_index())

print("\nTitle distribution in test data:")
print(test_data["Title"].value_counts().sort_index())

# Save the modified datasets to CSV files
print("\nSaving updated datasets...")
train_data.to_csv("train_updated.csv", index=False)
test_data.to_csv("test_updated.csv", index=False)

print("Files saved: 'train_updated.csv' and 'test_updated.csv'")


Extracting titles from names...
Unique titles in training data: ['Mr' 'Mrs' 'Miss' 'Master' 'Don' 'Rev' 'Dr' 'Mme' 'Ms' 'Major' 'Lady'
 'Sir' 'Mlle' 'Col' 'Capt' 'Countess' 'Jonkheer']
Unique titles in test data: ['Mr' 'Mrs' 'Miss' 'Master' 'Ms' 'Col' 'Rev' 'Dr' 'Dona']

Mapping titles to numeric values...

Training data sample with updated titles:
                                                Name  Title
0                            Braund, Mr. Owen Harris      1
1  Cumings, Mrs. John Bradley (Florence Briggs Th...      3
2                             Heikkinen, Miss. Laina      2
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)      3
4                           Allen, Mr. William Henry      1

Test data sample with updated titles:
                                           Name  Title
0                              Kelly, Mr. James      1
1              Wilkes, Mrs. James (Ellen Needs)      3
2                     Myles, Mr. Thomas Francis      1
3                             

In [8]:
# Function to encode 'Sex' column into numerical values
def sex_to_numeric(sex):
    return 1 if sex == 'male' else 0

# Transform 'Sex' column in both datasets
train_data["Sex"] = train_data["Sex"].apply(sex_to_numeric)
test_data["Sex"] = test_data["Sex"].apply(sex_to_numeric)

# Preview data after transformation
print("Preview of training data with encoded 'Sex' column:")
print(train_data[["PassengerId", "Sex", "Age", "Survived"]].head())

print("\nPreview of test data with encoded 'Sex' column:")
print(test_data[["PassengerId", "Sex", "Age"]].head())

# Display distribution of encoded 'Sex' values
print("\nSex distribution in training dataset:")
print(train_data["Sex"].value_counts())

print("\nSex distribution in test dataset:")
print(test_data["Sex"].value_counts())

# Confirm unique values in the 'Sex' column
print("\nDistinct values in 'Sex' column (train):", train_data["Sex"].unique())
print("Distinct values in 'Sex' column (test):", test_data["Sex"].unique())

# Save the processed datasets
train_data.to_csv("train_updated.csv", index=False)
test_data.to_csv("test_updated.csv", index=False)

print("\nProcessed datasets successfully saved as 'train_updated.csv' and 'test_updated.csv'")

Preview of training data with encoded 'Sex' column:
   PassengerId  Sex   Age  Survived
0            1    1  22.0         0
1            2    0  38.0         1
2            3    0  26.0         1
3            4    0  35.0         1
4            5    1  35.0         0

Preview of test data with encoded 'Sex' column:
   PassengerId  Sex   Age
0          892    1  34.5
1          893    0  47.0
2          894    1  62.0
3          895    1  27.0
4          896    0  22.0

Sex distribution in training dataset:
Sex
1    577
0    314
Name: count, dtype: int64

Sex distribution in test dataset:
Sex
1    266
0    152
Name: count, dtype: int64

Distinct values in 'Sex' column (train): [1 0]
Distinct values in 'Sex' column (test): [1 0]

Processed datasets successfully saved as 'train_updated.csv' and 'test_updated.csv'


In [9]:
# Function to compute the family size for each passenger
def calculate_family_size(row):
    return row["SibSp"] + row["Parch"] + 1  # Including the passenger themselves

# Generate the 'FamilySize' column for both datasets
train_data["FamilySize"] = train_data.apply(calculate_family_size, axis=1)
test_data["FamilySize"] = test_data.apply(calculate_family_size, axis=1)

# Preview data to verify the new column
print("Sample of training data with 'FamilySize' column added:")
print(train_data[["PassengerId", "SibSp", "Parch", "FamilySize"]].head())

print("\nSample of test data with 'FamilySize' column added:")
print(test_data[["PassengerId", "SibSp", "Parch", "FamilySize"]].head())

# Display the distribution of family sizes
print("\nFamily size distribution in the training dataset:")
print(train_data["FamilySize"].value_counts().sort_index())

print("\nFamily size distribution in the test dataset:")
print(test_data["FamilySize"].value_counts().sort_index())

# Save the processed datasets to CSV files
train_data.to_csv("train_updated.csv", index=False)
test_data.to_csv("test_updated.csv", index=False)

print("\nThe processed datasets have been successfully saved as 'train_updated.csv' and 'test_updated.csv'")


Sample of training data with 'FamilySize' column added:
   PassengerId  SibSp  Parch  FamilySize
0            1      1      0           2
1            2      1      0           2
2            3      0      0           1
3            4      1      0           2
4            5      0      0           1

Sample of test data with 'FamilySize' column added:
   PassengerId  SibSp  Parch  FamilySize
0          892      0      0           1
1          893      1      0           2
2          894      0      0           1
3          895      0      0           1
4          896      1      1           3

Family size distribution in the training dataset:
FamilySize
1     537
2     161
3     102
4      29
5      15
6      22
7      12
8       6
11      7
Name: count, dtype: int64

Family size distribution in the test dataset:
FamilySize
1     253
2      74
3      57
4      14
5       7
6       3
7       4
8       2
11      4
Name: count, dtype: int64

The processed datasets have been successfully 

In [10]:
# Display the original column names before modifications
print("Initial columns in training dataset:", train_data.columns.tolist())
print("Initial columns in test dataset:", test_data.columns.tolist())

# Specify columns to be dropped and remove them from both datasets
columns_to_drop = ["Cabin", "Ticket"]
train_data = train_data.drop(columns=columns_to_drop)
test_data = test_data.drop(columns=columns_to_drop)

# Confirm column removal by displaying the updated column lists
print("\nColumns remaining in training dataset after removal:", train_data.columns.tolist())
print("Columns remaining in test dataset after removal:", test_data.columns.tolist())

# Display a preview of the modified datasets
print("\nSample rows from training dataset post-modification:")
print(train_data.head())

print("\nSample rows from test dataset post-modification:")
print(test_data.head())

# Save the processed datasets to new CSV files
train_data.to_csv("train_updated.csv", index=False)
test_data.to_csv("test_updated.csv", index=False)

print("\nThe modified datasets have been successfully stored as 'train_updated.csv' and 'test_updated.csv'")


Initial columns in training dataset: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked', 'Title', 'FamilySize']
Initial columns in test dataset: ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked', 'Title', 'FamilySize']

Columns remaining in training dataset after removal: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Title', 'FamilySize']
Columns remaining in test dataset after removal: ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Title', 'FamilySize']

Sample rows from training dataset post-modification:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name  Sex 

In [11]:
# Check and display the count of missing values in the Age column before imputation
print("Missing values in Age column (train) before filling:", train_data["Age"].isna().sum())
print("Missing values in Age column (test) before filling:", test_data["Age"].isna().sum())

# Replace null values in the Age column with 29
train_data["Age"] = train_data["Age"].fillna(29)
test_data["Age"] = test_data["Age"].fillna(29)

# Confirm and display the count of missing values in Age column after imputation
print("\nMissing values in Age column (train) after filling:", train_data["Age"].isna().sum())
print("Missing values in Age column (test) after filling:", test_data["Age"].isna().sum())

# Preview the updated data
print("\nSample rows from training dataset after handling Age:")
print(train_data[["PassengerId", "Age"]].head())

print("\nSample rows from test dataset after handling Age:")
print(test_data[["PassengerId", "Age"]].head())

# Save the modified datasets
train_data.to_csv("train_updated.csv", index=False)
test_data.to_csv("test_updated.csv", index=False)

print("\nProcessed datasets have been successfully saved as 'train_updated.csv' and 'test_updated.csv'")


Missing values in Age column (train) before filling: 177
Missing values in Age column (test) before filling: 86

Missing values in Age column (train) after filling: 0
Missing values in Age column (test) after filling: 0

Sample rows from training dataset after handling Age:
   PassengerId   Age
0            1  22.0
1            2  38.0
2            3  26.0
3            4  35.0
4            5  35.0

Sample rows from test dataset after handling Age:
   PassengerId   Age
0          892  34.5
1          893  47.0
2          894  62.0
3          895  27.0
4          896  22.0

Processed datasets have been successfully saved as 'train_updated.csv' and 'test_updated.csv'


In [12]:
# Function to assign age category based on age range
def assign_age_category(age):
    if 0 <= age <= 25:
        return 1
    elif 26 <= age <= 50:
        return 2
    elif 51 <= age <= 75:
        return 3
    else:
        return 4

# Apply the function to both training and test datasets to categorize 'Age'
train_data['Age'] = train_data['Age'].apply(assign_age_category)
test_data['Age'] = test_data['Age'].apply(assign_age_category)

# Display the first few rows of both datasets to verify the new Age categories
print("First few rows of training data with Age categories:")
print(train_data[['PassengerId', 'Age']].head())

print("\nFirst few rows of test data with Age categories:")
print(test_data[['PassengerId', 'Age']].head())

# Show the distribution of the new Age categories in the training dataset
print("\nAge category distribution in training data:")
print(train_data['Age'].value_counts().sort_index())

# Show the distribution of the new Age categories in the test dataset
print("\nAge category distribution in test data:")
print(test_data['Age'].value_counts().sort_index())

# Save the modified datasets to CSV
train_data.to_csv('train_updated.csv', index=False)
test_data.to_csv('test_updated.csv', index=False)

print("\nUpdated datasets have been saved as 'train_updated.csv' and 'test_updated.csv'")


First few rows of training data with Age categories:
   PassengerId  Age
0            1    1
1            2    2
2            3    2
3            4    2
4            5    2

First few rows of test data with Age categories:
   PassengerId  Age
0          892    2
1          893    2
2          894    3
3          895    2
4          896    1

Age category distribution in training data:
Age
1    301
2    526
3     63
4      1
Name: count, dtype: int64

Age category distribution in test data:
Age
1    142
2    245
3     30
4      1
Name: count, dtype: int64

Updated datasets have been saved as 'train_updated.csv' and 'test_updated.csv'


In [13]:
# Display the original column names in both datasets
print("Original columns in the training dataset:", train_data.columns.tolist())
print("Original columns in the test dataset:", test_data.columns.tolist())

# Remove the 'Fare' column from both the training and test datasets
train_data.drop('Fare', axis=1, inplace=True)
test_data.drop('Fare', axis=1, inplace=True)

# Show the updated column names after removing 'Fare'
print("\nColumns in the training dataset after removing 'Fare':", train_data.columns.tolist())
print("Columns in the test dataset after removing 'Fare':", test_data.columns.tolist())

# Show the first few rows of both datasets to verify the removal
print("\nSample rows from the training dataset after column removal:")
print(train_data.head())

print("\nSample rows from the test dataset after column removal:")
print(test_data.head())

# Save the modified datasets to CSV files
train_data.to_csv('train_updated.csv', index=False)
test_data.to_csv('test_updated.csv', index=False)

print("\nThe datasets have been successfully saved as 'train_updated.csv' and 'test_updated.csv'")


Original columns in the training dataset: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Title', 'FamilySize']
Original columns in the test dataset: ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Title', 'FamilySize']

Columns in the training dataset after removing 'Fare': ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked', 'Title', 'FamilySize']
Columns in the test dataset after removing 'Fare': ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked', 'Title', 'FamilySize']

Sample rows from the training dataset after column removal:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name  Sex  Age  SibSp  Parch  \
0          

In [14]:
# Print the original column names in both datasets
print("Initial columns in the training dataset:", train_data.columns.tolist())
print("Initial columns in the test dataset:", test_data.columns.tolist())

# Columns to be removed from both datasets
columns_to_remove = ['Fare', 'SibSp', 'Parch', 'TitleNum', 'AgeCategory']

# Remove the specified columns, ignoring errors if they don't exist
train_data.drop(columns=columns_to_remove, errors='ignore', inplace=True)
test_data.drop(columns=columns_to_remove, errors='ignore', inplace=True)

# Print the updated column names to verify the changes
print("\nColumns in the training dataset after removal:", train_data.columns.tolist())
print("Columns in the test dataset after removal:", test_data.columns.tolist())

# Show the first few rows to confirm the removal of columns
print("\nSample rows from the training dataset after column removal:")
print(train_data.head())

print("\nSample rows from the test dataset after column removal:")
print(test_data.head())

# Save the updated datasets to CSV files
train_data.to_csv('train_updated.csv', index=False)
test_data.to_csv('test_updated.csv', index=False)

print("\nUpdated datasets have been successfully saved as 'train_updated.csv' and 'test_updated.csv'")


Initial columns in the training dataset: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked', 'Title', 'FamilySize']
Initial columns in the test dataset: ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked', 'Title', 'FamilySize']

Columns in the training dataset after removal: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'Embarked', 'Title', 'FamilySize']
Columns in the test dataset after removal: ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'Embarked', 'Title', 'FamilySize']

Sample rows from the training dataset after column removal:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name  Sex  Age Embarked  \
0                            Braund, Mr. Owen Harris    1    1        S   
1  Cumings,

In [15]:
# Print initial column names from both training and test datasets
print("Initial columns in the training dataset:", train_data.columns.tolist())
print("Initial columns in the test dataset:", test_data.columns.tolist())

# Remove the 'Name' column from both datasets
train_data.drop(columns=['Name'], inplace=True)
test_data.drop(columns=['Name'], inplace=True)

# Print updated column names to verify the change
print("\nColumns in the training dataset after removal:", train_data.columns.tolist())
print("Columns in the test dataset after removal:", test_data.columns.tolist())

# Display the first few rows to ensure the column removal was successful
print("\nPreview of the training dataset after column removal:")
print(train_data.head())

print("\nPreview of the test dataset after column removal:")
print(test_data.head())

# Save the modified datasets to CSV files
train_data.to_csv('train_updated.csv', index=False)
test_data.to_csv('test_updated.csv', index=False)

print("\nModified datasets have been successfully saved as 'train_updated.csv' and 'test_updated.csv'")


Initial columns in the training dataset: ['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'Embarked', 'Title', 'FamilySize']
Initial columns in the test dataset: ['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'Embarked', 'Title', 'FamilySize']

Columns in the training dataset after removal: ['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'Embarked', 'Title', 'FamilySize']
Columns in the test dataset after removal: ['PassengerId', 'Pclass', 'Sex', 'Age', 'Embarked', 'Title', 'FamilySize']

Preview of the training dataset after column removal:
   PassengerId  Survived  Pclass  Sex  Age Embarked  Title  FamilySize
0            1         0       3    1    1        S      1           2
1            2         1       1    0    2        C      3           2
2            3         1       3    0    2        S      2           1
3            4         1       1    0    2        S      3           2
4            5         0       3    1    2        S      1           1

Preview o

In [16]:
# Function to map Embarked values to numeric codes
def map_embarked(value):
    mapping = {'S': 1, 'Q': 2, 'C': 3}
    return mapping.get(value, 0)  # Default to 0 for any unknown values

# Apply the mapping to the 'Embarked' column in both datasets
train_data['Embarked'] = train_data['Embarked'].map(map_embarked)
test_data['Embarked'] = test_data['Embarked'].map(map_embarked)

# Show the first few rows to confirm the changes
print("Preview of training data with updated 'Embarked' column:")
print(train_data[['PassengerId', 'Embarked']].head())

print("\nPreview of test data with updated 'Embarked' column:")
print(test_data[['PassengerId', 'Embarked']].head())

# Display the distribution of 'Embarked' values in the training dataset
print("\nFrequency of 'Embarked' values in the training dataset:")
print(train_data['Embarked'].value_counts().sort_index())

# Display the distribution of 'Embarked' values in the test dataset
print("\nFrequency of 'Embarked' values in the test dataset:")
print(test_data['Embarked'].value_counts().sort_index())

# Save the modified datasets to new CSV files
train_data.to_csv('train_updated.csv', index=False)
test_data.to_csv('test_updated.csv', index=False)

print("\nThe updated datasets have been saved as 'train_updated.csv' and 'test_updated.csv'")


Preview of training data with updated 'Embarked' column:
   PassengerId  Embarked
0            1         1
1            2         3
2            3         1
3            4         1
4            5         1

Preview of test data with updated 'Embarked' column:
   PassengerId  Embarked
0          892         2
1          893         1
2          894         2
3          895         1
4          896         1

Frequency of 'Embarked' values in the training dataset:
Embarked
0      2
1    644
2     77
3    168
Name: count, dtype: int64

Frequency of 'Embarked' values in the test dataset:
Embarked
1    270
2     46
3    102
Name: count, dtype: int64

The updated datasets have been saved as 'train_updated.csv' and 'test_updated.csv'


In [22]:
# Import necessary libraries
train_set = pd.read_csv('train_updated.csv')
test_set = pd.read_csv('test_updated.csv')

# Separate features and target for training dataset
X_train_set = train_set.drop('Survived', axis=1)
y_train_set = train_set['Survived']

# Prepare the test dataset
X_test_set = test_set.copy()

# Feature scaling
scaler_tool = StandardScaler()
X_train_scaled_set = scaler_tool.fit_transform(X_train_set)
X_test_scaled_set = scaler_tool.transform(X_test_set)

# Define the models
classification_models = {
    'SVM': SVC(),
    'KNN Classifier': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Random Forest Classifier': RandomForestClassifier(n_estimators=100),
    'Naive Bayes Classifier': GaussianNB(),
    'Perceptron Model': Perceptron(),
    'SGD Classifier': SGDClassifier(),
    'Linear SVC Model': LinearSVC(),
    'Decision Tree Classifier': DecisionTreeClassifier()
}

# Train the models and record the accuracies
model_accuracies = {}
for model_name, classifier in classification_models.items():
    classifier.fit(X_train_scaled_set, y_train_set)
    accuracy_score = round(classifier.score(X_train_scaled_set, y_train_set) * 100, 2)
    model_accuracies[model_name] = accuracy_score

# Sort the accuracy scores in descending order
sorted_model_accuracies = dict(sorted(model_accuracies.items(), key=lambda item: item[1], reverse=True))

# Display the sorted accuracies
print("\nAccuracies of the models (from highest to lowest):")
for model_name, accuracy in sorted_model_accuracies.items():
    print(f"{model_name}: {accuracy}%")

# Optional: Make predictions using the model with the highest accuracy (Random Forest assumed to be the best)
best_classifier = classification_models['Random Forest Classifier']
predicted_values = best_classifier.predict(X_test_scaled_set)

# Prepare submission DataFrame
submission_file = pd.DataFrame({
    'PassengerId': test_set['PassengerId'],
    'Survived': predicted_values
})


Accuracies of the models (from highest to lowest):
Decision Tree Classifier: 100.0%
Random Forest Classifier: 99.89%
KNN Classifier: 85.86%
SVM: 83.73%
Logistic Regression: 80.7%
Naive Bayes Classifier: 80.58%
Linear SVC Model: 80.36%
Perceptron Model: 79.35%
SGD Classifier: 74.97%


