# Selecting the best model for classifying the fuel type

### Importing the standard libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.python.keras.api import keras

### Importing the dataset

In [None]:
dataset = pd.read_csv('../assets/car-details-for-ml.csv')

dataset.head()

### Splitting data to train and test sets

In [None]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=73)

### Feature scaling the data

Some of the models might require the features to be scaled. Feature scaling is a common practice in machine learning and helps to normalize the features because some models are sensitive to the scale.

In [None]:
from sklearn.preprocessing import StandardScaler

X_std_scaler = StandardScaler()

X_train_scaled = X_std_scaler.fit_transform(X_train)
X_test_scaled = X_std_scaler.transform(X_test)

# Sample the scaled values
print(X_train_scaled[:1, :12])
print(X_test_scaled[:1, :12])

### Defining methods for helping to evaluate the models

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

# Confusion Matrix = Makes a matrix of the predictions and actual values 
# Accuracy score = Percentage of correct predictions

labels = ['Model', 'Conf Matrix', 'Accuracy']
results = []

def model_evaluation(model: str, y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    results.append([model, cm, accuracy])
    return [model, model, cm, accuracy]