In [2]:
import sys
sys.path.append("..")
from models.optimisers import GridSearch
import numpy as np
from data_processing.preprocessing import *
from data_processing.cross_validation import *
from data_processing.train_test_split import *
from models.classification_tree import ClassificationTree
from models.logistic_regression import SoftmaxRegression
from models.knn import KNearestNeighbours

X_titanic, y_titanic = load_dataset(file_path="../datasets/Titanic-Dataset.csv",target_col=1,sep=',',header=True)

categorical_columns = [3,7,9]  
numerical_columns = [4, 5, 6, 8]     
ordinal_columns = [1]   

categorical_pipeline = PreprocessingPipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder())
])

numerical_pipeline = PreprocessingPipeline([
    ("imputer", SimpleImputer(strategy="mean")),
    ("converter", NumericConverter()),
    ("scaler", MinMaxScaler())
])

ordinal_pipeline = PreprocessingPipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OrdinalEncoder())
])

preprocessor = CombinedPreprocessor(
    categorical=(categorical_pipeline, categorical_columns),
    numerical=(numerical_pipeline, numerical_columns),
    ordinal=(ordinal_pipeline, ordinal_columns) 
)

X_train_t, X_val_t, y_train_t, y_val_t = train_test_split(X_titanic, y_titanic, 0.2, 2233)

knn_search = GridSearch(KNearestNeighbours())
tree_search = GridSearch(ClassificationTree())
softmax_search = GridSearch(SoftmaxRegression())

X_train_t = preprocessor.fit_transform(X_train_t)
X_val_t = preprocessor.transform(X_val_t)


knn_search.fit(X_train_t, y_train_t, X_val_t, y_val_t)
tree_search.fit(X_train_t, y_train_t, X_val_t, y_val_t)
softmax_search.fit(X_train_t, y_train_t, X_val_t, y_val_t)


knn_titanic = knn_search.best_model_
tree_titanic = tree_search.best_model_
softmax_titanic = softmax_search.best_model_

true_labels_titanic_knn, predictions_titanic_knn = k_folds_predictions(knn_titanic, X=X_titanic, y=y_titanic, k=5, seed=2108, preprocessor=preprocessor )


true_labels_titanic_tree, predictions_titanic_tree = k_folds_predictions(tree_titanic, X=X_titanic, y=y_titanic, k=5, seed=2108, preprocessor=preprocessor )


true_labels_titanic_softmax, predictions_titanic_softmax = k_folds_predictions(softmax_titanic, X=X_titanic, y=y_titanic, k=5, seed=2108, preprocessor=preprocessor )



numConvert = NumericConverter()

true_labels_titanic_knn = numConvert.fit_transform(true_labels_titanic_knn)
predictions_titanic_knn = numConvert.transform(predictions_titanic_knn)

true_labels_titanic_tree = numConvert.fit_transform(true_labels_titanic_tree)
predictions_titanic_tree = numConvert.transform(predictions_titanic_tree)

true_labels_titanic_softmax = numConvert.fit_transform(true_labels_titanic_softmax)
predictions_titanic_softmax = numConvert.transform(predictions_titanic_softmax)

print("KNN Accuracy: ", np.mean(true_labels_titanic_knn == predictions_titanic_knn))
print("CART Accuracy: ", np.mean(true_labels_titanic_tree == predictions_titanic_tree))
print("Softmax Accuracy: ", np.mean(true_labels_titanic_softmax == predictions_titanic_softmax))



KNN Accuracy:  0.8148148148148148
CART Accuracy:  0.8204264870931538
Softmax Accuracy:  0.7878787878787878


In [3]:
from data_processing.metrics import calculate_metrics
print("KNN on Titanic metrics: ",calculate_metrics(true_labels_titanic_knn, predictions_titanic_knn) )
print("CART on Titanic metrics: ",calculate_metrics(true_labels_titanic_tree, predictions_titanic_tree) )
print("Softmax on Titanic metrics: ",calculate_metrics(true_labels_titanic_softmax, predictions_titanic_softmax) )


KNN on Titanic metrics:  {'precision': 0.8074637673148568, 'recall': 0.7962563512606653, 'f1_score': 0.8008426072055108, 'specificity': 0.7962563512606653}
CART on Titanic metrics:  {'precision': 0.8137953367875648, 'recall': 0.8019125683060109, 'f1_score': 0.8067538551629905, 'specificity': 0.8019125683060109}
Softmax on Titanic metrics:  {'precision': 0.7776785714285714, 'recall': 0.768334771354616, 'f1_score': 0.7721643835616439, 'specificity': 0.768334771354616}
