# Titanic Dataset

We started with the Titanic dataset from Kaggle. The dataset contains 891 rows and 12 columns. The columns are as follows:

- PassengerId: Unique ID of the passenger
- Survived: Whether the passenger survived or not (0 = No, 1 = Yes)
- Pclass: Ticket class (1 = 1st, 2 = 2nd, 3 = 3rd)
- Sex: Sex of the passenger (male or female)
- Age: Age of the passenger
- SibSp: Number of siblings/spouses aboard the Titanic
- Parch: Number of parents/children aboard the Titanic
- Ticket: Ticket number
- Fare: Passenger fare
- Cabin: Cabin number
- Embarked: Port of embarkation (C = Cherbourg, Q = Queenstown, S = Southampton)

Noteably, the 'Survived' column is the target variable and the remaining columns are the features. Additionally, Age in the data is fractional if less than 1. If the age is estimated, is it in the form of xx.5. Sibsp and Parch are the number of siblings/spouses and parents/children aboard the Titanic respectively.

## Library Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
import shap
import statsmodels.api as sm

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, class_likelihood_ratios, RocCurveDisplay, DetCurveDisplay, PrecisionRecallDisplay
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn_pandas import DataFrameMapper
from shap import LinearExplainer, KernelExplainer, Explanation
from scipy.stats import pearsonr
import matplotlib.gridspec as gridspec


## Data Cleaning and Feature Engineering

In [None]:
df_titanic_raw = pd.read_csv('titanic/train.csv')

In [None]:
df_titanic_raw = df_titanic_raw[(df_titanic_raw['Age'].notna())  & (df_titanic_raw['Fare'].notna()) & (df_titanic_raw['Embarked'].notna())]
df_titanic_raw = df_titanic_raw.drop(['PassengerId', 'Name', 'Ticket'],axis=1)

In [None]:
df_titanic_raw['Sex'].replace(['male', 'female'], [1, 0], inplace=True)
df_titanic_raw['Embarked'].replace(['C', 'Q', 'S'], [0, 1, 2], inplace=True)
df_titanic_raw = df_titanic_raw.reset_index(drop=True)
df_titanic_raw

In [None]:
df_titanic_raw['Deck'] = df_titanic_raw['Cabin'].map(lambda x: x[0] if x is not np.nan else 'U')
df_titanic_raw['Deck'].replace(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'T', 'U'], [0, 1, 2, 3, 4, 5, 6, 7, 8], inplace=True)
df_titanic_raw['Age_Group'] = df_titanic_raw['Age'].map(lambda x: 0 if x < 18 else 1)
df_titanic_engineered = df_titanic_raw.drop(['Cabin'], axis=1)
df_titanic_engineered['Family_Size']=df_titanic_raw['SibSp'] + df_titanic_raw['Parch']
df_titanic_engineered['Fare_Per_Person']=df_titanic_engineered['Fare'] / (df_titanic_engineered['Family_Size'] + 1)
df_titanic = df_titanic_engineered

survived_labels = ['No', 'Yes']
age_labels = ['Child', 'Adult']
class_labels = ['1st', '2nd', '3rd']
embarked_labels = ['Cherbourg', 'Queenstown', 'Southampton']
sex_labels = ['Female', 'Male']
deck_labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'T', 'U']

df_titanic

In [None]:
plt.figure(figsize=(10, 5))
pca = PCA(n_components=2)
pca.fit(df_titanic)
pca_df = pd.DataFrame(pca.transform(df_titanic), columns=['PC1', 'PC2'])
pca_df['Survived'] = df_titanic['Survived']

jg = sns.jointplot(data=pca_df, x="PC1", y="PC2", hue="Survived", palette='coolwarm')
jg.fig.subplots_adjust(top=0.95)
jg.fig.suptitle('2-D Principal Component Analysis of Titanic Dataset')
ax = plt.gca()
handles, _ = ax.get_legend_handles_labels()
ax.legend(handles=handles, labels=survived_labels, title='Survived?')


fig, ax0 = plt.subplots(figsize=(10, 5))

ax0.set_title('Titanic Attributes Correlation Heatmap')
corr = df_titanic.corr()
sns.heatmap(ax=ax0, data=round(corr, 2), annot=True, cmap="coolwarm", fmt='.2f')

dfcols = pd.DataFrame(columns=df_titanic.columns)
pvalues = dfcols.transpose().join(dfcols, how='outer')
for r in df_titanic.columns:
    for c in df_titanic.columns:
        tmp = df_titanic[df_titanic[r].notnull() & df_titanic[c].notnull()]
        pvalues[r][c] = round(pearsonr(tmp[r], tmp[c])[1], 4)

print(pvalues['Survived'].sort_values())

In [None]:
def percentage_above_bar_relative_to_xgroup(ax, rotation=0):
    all_heights = [[p.get_height() for p in bars] for bars in ax.containers]
    for bars in ax.containers:
        for i, p in enumerate(bars):
            total = sum(xgroup[i] for xgroup in all_heights)
            percentage = f'{(100 * p.get_height() / total) :.1f}%'
            ax.annotate(percentage, (p.get_x() + p.get_width() / 2, p.get_height()), size=11, ha='center', va='bottom', rotation=rotation)

In [None]:
fig, ((ax0, ax1), (ax2, ax3)) = plt.subplots(nrows=2, ncols=2, tight_layout=True, figsize=(10, 10))

ax0.set_title('Survival by Age Group')
sns.countplot(ax=ax0, data=df_titanic, x="Age_Group", hue="Survived")
percentage_above_bar_relative_to_xgroup(ax0)
ax0.set_xticklabels(age_labels)
ax0.legend(survived_labels, title='Survived?')

ax1.set_title('Survival by Age')
sns.histplot(ax=ax1, data=df_titanic, x="Age", hue="Survived", multiple="stack")

ax2.set_title('Survival by Sex')
sns.countplot(ax=ax2, data=df_titanic, x="Sex", hue="Survived")
percentage_above_bar_relative_to_xgroup(ax2)
ax2.set_xticklabels(sex_labels)
ax2.legend(survived_labels, title='Survived?')

ax3.set_title('Comparison of Age and Sex by Survival')
sns.boxplot(ax=ax3, data=df_titanic, x="Sex", y="Age", hue="Survived")
percentage_above_bar_relative_to_xgroup(ax3)
ax3.set_xticklabels(sex_labels)
handles, _ = ax3.get_legend_handles_labels()
ax3.legend(handles=handles, labels=survived_labels, title='Survived?')


In [None]:
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, tight_layout=True, figsize=(10, 5))

ax0.set_title('Class by Embarked Location')
sns.countplot(ax=ax0, data=df_titanic, x="Embarked", hue="Pclass")
percentage_above_bar_relative_to_xgroup(ax0)
ax0.set_xticklabels(embarked_labels, rotation=45)
ax0.set_xlabel('Embarked Location')
ax0.legend(class_labels, title='Class')

ax1.set_title('Survival by Embarked Location')
sns.countplot(ax=ax1, data=df_titanic, x="Embarked", hue="Survived")
percentage_above_bar_relative_to_xgroup(ax1)
ax1.set_xticklabels(embarked_labels, rotation=45)
ax1.set_xlabel('Embarked Location')
ax1.legend(survived_labels, title='Survived?')

In [None]:
fig = plt.figure(tight_layout=True, figsize=(10, 15))
gs = gridspec.GridSpec(nrows=3, ncols=2)
ax0 = fig.add_subplot(gs[0, 0])
ax1 = fig.add_subplot(gs[0, 1])
ax2 = fig.add_subplot(gs[1, :])
ax3 = fig.add_subplot(gs[2, :])

ax0.set_title('Fare by Class')
sns.boxplot(ax=ax0, data=df_titanic, x="Pclass", y="Fare")
percentage_above_bar_relative_to_xgroup(ax0, 90)
ax0.set_xticklabels(class_labels)
ax0.set_xlabel('Class')
ax0.set_ylabel('Fare')

ax1.set_title('Personal Fare by Deck')
sns.boxplot(ax=ax1, data=df_titanic, x="Deck", y="Fare_Per_Person", order=[0, 1, 2, 3, 4, 5, 6, 7, 8])
ax1.set_xticklabels(deck_labels)
ax1.set_xlabel('Deck')
ax1.set_ylabel('Personal Fare')

ax2.set_title('Deck by Class')
sns.countplot(ax=ax2, data=df_titanic, x="Deck", hue="Pclass", order=[0, 1, 2, 3, 4, 5, 6, 7, 8])
percentage_above_bar_relative_to_xgroup(ax2, 90)
ax2.set_xticklabels(deck_labels)
ax2.set_xlabel('Deck')
ax2.legend(class_labels, title='Class')

ax3.set_title('Survival by Deck')
sns.countplot(ax=ax3, data=df_titanic, x="Deck", hue="Survived", order=[0, 1, 2, 3, 4, 5, 6, 7, 8])
percentage_above_bar_relative_to_xgroup(ax3, 90)
ax3.set_xticklabels(deck_labels)
ax3.set_xlabel('Deck')
ax3.legend(survived_labels, title='Survived?')

In [None]:
fig, ((ax0, ax1), (ax2, ax3)) = plt.subplots(nrows=2, ncols=2, tight_layout=True, figsize=(10, 10))

ax0.set_title('Fare by Family Size')
sns.boxplot(ax=ax0, data=df_titanic, x="Family_Size", y="Fare")
ax0.set_xlabel('Family Size')
ax0.set_ylabel('Fare')

ax1.set_title('Family Size by Class')
sns.countplot(ax=ax1, data=df_titanic, x="Pclass", hue="Family_Size")
percentage_above_bar_relative_to_xgroup(ax1, 90)
ax1.set_xlabel('Class')

ax2.set_title('Survival by Family Size')
sns.countplot(ax=ax2, data=df_titanic, x="Family_Size", hue="Survived")
percentage_above_bar_relative_to_xgroup(ax2, 90)
ax2.set_xlabel('Family Size')
ax2.legend(survived_labels, title='Survived?')

ax3.set_title('Survival by Class')
sns.countplot(ax=ax3, data=df_titanic, x="Pclass", hue="Survived")
percentage_above_bar_relative_to_xgroup(ax3)
ax3.set_xlabel('Class')
ax3.legend(survived_labels, title='Survived?')


## Dataset Splitting

In [None]:
X_train = df_titanic.drop('Survived',axis=1)
y_train = df_titanic.Survived

df_titanic_test = pd.read_csv('titanic/test.csv')
df_titanic_test_labels = pd.read_csv('titanic/gender_submission.csv')
df_titanic_test = df_titanic_test.merge(df_titanic_test_labels, on='PassengerId')

df_titanic_test = df_titanic_test.drop(['PassengerId', 'Name', 'Ticket'], axis=1)
df_titanic_test = df_titanic_test[(df_titanic_test['Age'].notna())  & (df_titanic_test['Fare'].notna()) & (df_titanic_test['Embarked'].notna())]   
df_titanic_test['Sex'].replace(['male', 'female'], [1, 0], inplace=True)
df_titanic_test['Embarked'].replace(['C', 'Q', 'S'], [0, 1, 2], inplace=True)
df_titanic_test = df_titanic_test.reset_index(drop=True)
df_titanic_test['Deck'] = df_titanic_test['Cabin'].map(lambda x: x[0] if x is not np.nan else 'U')
df_titanic_test['Deck'].replace(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'T', 'U'], [0, 1, 2, 3, 4, 5, 6, 7, 8], inplace=True)
df_titanic_test['Age_Group'] = df_titanic_test['Age'].map(lambda x: 0 if x < 18 else 1)
df_titanic_test = df_titanic_test.drop(['Cabin'], axis=1)
df_titanic_test['Family_Size']=df_titanic_test['SibSp'] + df_titanic_test['Parch']
df_titanic_test['Fare_Per_Person']=df_titanic_test['Fare'] / (df_titanic_test['Family_Size'] + 1)

X_test = df_titanic_test.drop('Survived',axis=1)
y_test = df_titanic_test.Survived

## Naive Bayes

In [None]:
model = GaussianNB()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
nb_acc = accuracy_score(y_test, y_pred)
nb_ratio = class_likelihood_ratios(y_test, y_pred)
nb_roc = RocCurveDisplay.from_estimator(model, X_test, y_test)
nb_det = DetCurveDisplay.from_estimator(model, X_test, y_test)
nb_pr = PrecisionRecallDisplay.from_estimator(model, X_test, y_test)

print(nb_acc)

## Logistic Regression

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
lr_acc = accuracy_score(y_test, y_pred)
lr_ratio = class_likelihood_ratios(y_test, y_pred)
lr_roc = RocCurveDisplay.from_estimator(model, X_test, y_test)
lr_det = DetCurveDisplay.from_estimator(model, X_test, y_test)
lr_pr = PrecisionRecallDisplay.from_estimator(model, X_test, y_test)

print(lr_acc)

In [None]:
plt.figure(figsize=(5, 5))
plt.barh(X_train.columns, model.coef_[0])
plt.title("Logistic regression Coefficient values")
plt.show()

In [None]:
explainer = shap.LinearExplainer(model, X_train.astype(float), feature_perturbation="interventional")
shap_values = explainer.shap_values(X_test.astype(float))
shap.summary_plot(shap_values, X_test)
shap.summary_plot(shap_values, X_train, plot_type="bar")

## Decision Tree

In [None]:
model = DecisionTreeClassifier(random_state=0)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
tree_acc = accuracy_score(y_test, y_pred)
tree_ratio = class_likelihood_ratios(y_test, y_pred)
tree_roc = RocCurveDisplay.from_estimator(model, X_test, y_test)
tree_det = DetCurveDisplay.from_estimator(model, X_test, y_test)
tree_pr = PrecisionRecallDisplay.from_estimator(model, X_test, y_test)

print(tree_acc)

In [None]:
sorted_idx = model.feature_importances_.argsort()
features = X_train.columns
result = sorted(zip(features, model.feature_importances_), key = lambda x: x[1], reverse=False)
plt.barh([x[0] for x in result], [x[1] for x in result])

## Nearest Neighbours

In [None]:
model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
knn_acc = accuracy_score(y_test, y_pred)
knn_ratio = class_likelihood_ratios(y_test, y_pred)
knn_roc = RocCurveDisplay.from_estimator(model, X_test, y_test)
knn_det = DetCurveDisplay.from_estimator(model, X_test, y_test)
knn_pr = PrecisionRecallDisplay.from_estimator(model, X_test, y_test)

print(knn_acc)

## Support Vector Machine

In [None]:
model = LinearSVC()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
svc_acc = accuracy_score(y_test, y_pred)
svc_ratio = class_likelihood_ratios(y_test, y_pred)
svc_roc = RocCurveDisplay.from_estimator(model, X_test, y_test)
svc_det = DetCurveDisplay.from_estimator(model, X_test, y_test)
svc_pr = PrecisionRecallDisplay.from_estimator(model, X_test, y_test)

print(svc_acc)

## Neural Network

In [None]:
model = MLPClassifier()
model.fit(X_train, y_train)  
y_pred = model.predict(X_test)
mlp_acc = accuracy_score(y_test, y_pred)
mlp_ratio = class_likelihood_ratios(y_test, y_pred)
mlp_roc = RocCurveDisplay.from_estimator(model, X_test, y_test)
mlp_det = DetCurveDisplay.from_estimator(model, X_test, y_test)
mlp_pr = PrecisionRecallDisplay.from_estimator(model, X_test, y_test)

print(mlp_acc)

In [None]:
explainer = shap.KernelExplainer(model.predict, X_train)
shap_values = explainer.shap_values(X_test, nsamples=100)
shap.summary_plot(shap_values, X_test)

exp = Explanation(shap_values, explainer.expected_value, data=X_test.values, feature_names=X_test.columns)
shap.plots.waterfall(exp[0])

## Comparison Plots

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('ROC Curve')
lr_roc.plot(ax=ax)
tree_roc.plot(ax=ax)
knn_roc.plot(ax=ax)
nb_roc.plot(ax=ax)
svc_roc.plot(ax=ax)
mlp_roc.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('DET Curve')
lr_det.plot(ax=ax)
tree_det.plot(ax=ax)
knn_det.plot(ax=ax)
nb_det.plot(ax=ax)
svc_det.plot(ax=ax)
mlp_det.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Precision-Recall Curve')
lr_pr.plot(ax=ax)
tree_pr.plot(ax=ax)
knn_pr.plot(ax=ax)
nb_pr.plot(ax=ax)
svc_pr.plot(ax=ax)
mlp_pr.plot(ax=ax)

model_labels = ['Logistic Regression', 'Decision Tree', 'KNN', 'Naive Bayes', 'SVC', 'MLP']

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Accuracy')
ax.bar(model_labels, [lr_acc, tree_acc, knn_acc, nb_acc, svc_acc, mlp_acc])
ax.set_xlabel('Classifier')

for bars in ax.containers:
    ax.bar_label(bars)

fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, tight_layout=True,figsize=(10, 5))
ax0.set_title('Positive Likelihood Ratio')
ax0.bar(model_labels, [lr_ratio[0], tree_ratio[0], knn_ratio[0], nb_ratio[0], svc_ratio[0], mlp_ratio[0]])
ax0.set_xticklabels(labels=model_labels, rotation=45)
ax0.set_xlabel('Classifier')

ax1.set_title('Negative Likelihood Ratio')
ax1.bar(model_labels, [lr_ratio[1], tree_ratio[1], knn_ratio[1], nb_ratio[1], svc_ratio[1], mlp_ratio[1]])
ax1.set_xticklabels(labels=model_labels, rotation=45)
ax1.set_xlabel('Classifier')

## Hyperparameter Tuning

### Logistic Regression

In [None]:
model = LogisticRegression()

parameter_space = {
    'penalty': ['l1', 'l2'],
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'solver': ['liblinear', 'saga'],
    'max_iter': [100, 1000, 2500, 5000]
}

clf = GridSearchCV(model, parameter_space, n_jobs=-1, cv=5)
clf.fit(X_train, y_train)

print('Best parameters found:\n', clf.best_params_)
y_pred = clf.predict(X_test)
opt_lr_acc = accuracy_score(y_test, y_pred)
opt_lr_ratio = class_likelihood_ratios(y_test, y_pred)
opt_lr_roc = RocCurveDisplay.from_estimator(clf, X_test, y_test)
opt_lr_det = DetCurveDisplay.from_estimator(clf, X_test, y_test)
opt_lr_pr = PrecisionRecallDisplay.from_estimator(clf, X_test, y_test)

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Comparison of Optimal vs Default Logistic Regression ROC Curve')
lr_roc.plot(ax=ax)
opt_lr_roc.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Comparison of Optimal vs Default Logistic Regression DET Curve')
lr_det.plot(ax=ax)
opt_lr_det.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Comparison of Optimal vs Default Logistic Regression Precision-Recall Curve')
lr_pr.plot(ax=ax)
opt_lr_pr.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Comparison of Optimal vs Default Logistic Regression Accuracy')
ax.bar(['Default', 'Optimal'], [lr_acc, opt_lr_acc])
ax.set_xlabel('Classifier')

fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, tight_layout=True,figsize=(10, 5))
ax0.set_title('Comparison of Optimal vs Default Logistic Regression Positive Likelihood Ratio')
ax0.bar(['Default', 'Optimal'], [lr_ratio[0], opt_lr_ratio[0]])
ax0.set_xlabel('Classifier')

ax1.set_title('Comparison of Optimal vs Default Logistic Regression Negative Likelihood Ratio')
ax1.bar(['Default', 'Optimal'], [lr_ratio[1], opt_lr_ratio[1]])
ax1.set_xlabel('Classifier')

### Nerual Network

In [None]:
model = MLPClassifier()

parameter_space = {
    'hidden_layer_sizes': [(150,100,50), (120,80,40), (100,50,30), (10,30,10), (20,)],
    'max_iter': [50, 100, 150],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

clf = GridSearchCV(model, parameter_space, n_jobs=-1, cv=5)
clf.fit(X_train, y_train)

print('Best parameters found:\n', clf.best_params_)
y_pred = clf.predict(X_test)
opt_mlp_acc = accuracy_score(y_test, y_pred)
opt_mlp_ratio = class_likelihood_ratios(y_test, y_pred)
opt_mlp_roc = RocCurveDisplay.from_estimator(clf, X_test, y_test)
opt_mlp_det = DetCurveDisplay.from_estimator(clf, X_test, y_test)
opt_mlp_pr = PrecisionRecallDisplay.from_estimator(clf, X_test, y_test)

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Comparison of Optimal vs Default MLP ROC Curve')
mlp_roc.plot(ax=ax)
opt_mlp_roc.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Comparison of Optimal vs Default MLP DET Curve')
mlp_det.plot(ax=ax)
opt_mlp_det.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Comparison of Optimal vs Default MLP Precision-Recall Curve')
mlp_pr.plot(ax=ax)
opt_mlp_pr.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Comparison of Optimal vs Default MLP Accuracy')
ax.bar(['Default', 'Optimal'], [mlp_acc, opt_mlp_acc])
ax.set_xlabel('Classifier')

fig, (ax0, ax1) = plt.subplots(nrows=2, ncols=1, tight_layout=True,figsize=(10, 5))
ax0.set_title('Comparison of Optimal vs Default MLP Positive Likelihood Ratio')
ax0.bar(['Default', 'Optimal'], [mlp_ratio[0], opt_mlp_ratio[0]])
ax0.set_xlabel('Classifier')

ax1.set_title('Comparison of Optimal vs Default MLP Negative Likelihood Ratio')
ax1.bar(['Default', 'Optimal'], [mlp_ratio[1], opt_mlp_ratio[1]])
ax1.set_xlabel('Classifier')


### Comaprison to Other Models

In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('ROC Curve')
lr_roc.plot(ax=ax)
tree_roc.plot(ax=ax)
knn_roc.plot(ax=ax)
nb_roc.plot(ax=ax)
svc_roc.plot(ax=ax)
mlp_roc.plot(ax=ax)
opt_lr_roc.plot(ax=ax)
opt_mlp_roc.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('DET Curve')
lr_det.plot(ax=ax)
tree_det.plot(ax=ax)
knn_det.plot(ax=ax)
nb_det.plot(ax=ax)
svc_det.plot(ax=ax)
mlp_det.plot(ax=ax)
opt_lr_det.plot(ax=ax)
opt_mlp_det.plot(ax=ax)

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Precision-Recall Curve')
lr_pr.plot(ax=ax)
tree_pr.plot(ax=ax)
knn_pr.plot(ax=ax)
nb_pr.plot(ax=ax)
svc_pr.plot(ax=ax)
mlp_pr.plot(ax=ax)
opt_lr_pr.plot(ax=ax)
opt_mlp_pr.plot(ax=ax)

model_labels = ['Logistic Regression', 'Decision Tree', 'KNN', 'Naive Bayes', 'SVC', 'MLP', 'LR Optimal', 'MLP Optimal']

fig, ax = plt.subplots(figsize=(10, 5))
ax.set_title('Accuracy')
ax.bar(model_labels, [lr_acc, tree_acc, knn_acc, nb_acc, svc_acc, mlp_acc, opt_lr_acc, opt_mlp_acc])
ax.set_xlabel('Classifier')

fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, tight_layout=True,figsize=(10, 5))
ax0.set_title('Positive Likelihood Ratio')
ax0.bar(model_labels, [lr_ratio[0], tree_ratio[0], knn_ratio[0], nb_ratio[0], svc_ratio[0], mlp_ratio[0], opt_lr_ratio[0], opt_mlp_ratio[0]])
ax0.set_xticklabels(labels=model_labels, rotation=45)
ax0.set_xlabel('Classifier')

ax1.set_title('Negative Likelihood Ratio')
ax1.bar(model_labels, [lr_ratio[1], tree_ratio[1], knn_ratio[1], nb_ratio[1], svc_ratio[1], mlp_ratio[1], opt_lr_ratio[1], opt_mlp_ratio[1]])
ax1.set_xticklabels(labels=model_labels, rotation=45)
ax1.set_xlabel('Classifier')