#### Ex 15.0

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

from xgboost import XGBClassifier
from xgboost import plot_importance

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

# Custom function for plotting 1D plots
def plot_1d(x, c="Survived"):
    y = "stub"
    df[y] = 0
    plot_2d(x, y)
    del df[y]

# Custom function for plotting 2D plots
def plot_2d(x, y, c="Survived"):
    df.groupby([x, y])[c]\
        .mean()\
        .reset_index()\
        .plot(kind='scatter', x=x, y=y, c=c, colormap='viridis')

# Custom funciton for plotting 3D plots
def plot_3d(x, y, z, c="Survived"):
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')

    dfm = df.groupby([x, y, z])[c].mean().reset_index()
    
    # Creating the plot
    sc = ax.scatter(xs=dfm[x], ys=dfm[y], zs=dfm[z], c=dfm[c], cmap='viridis', edgecolor='k', s=40, alpha=0.7)
    
    # Adding color bar
    plt.colorbar(sc, ax=ax, label=c)
    
    # Setting the labels
    ax.set_xlabel(x)
    ax.set_ylabel(y)
    ax.set_zlabel(z)
    
    # Title
    ax.set_title('3D Scatter Plot')
    
    plt.show()

def plot_predictions(x, y, z, model, df_x, df_y):
    # Initialize plot
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')
    
    # Predict on the entire dataset
    predictions = model.predict(df_x)
    
    # Plot correctly classified points
    correct_survived = df_x[(df_y == 1) & (predictions == 1)]
    correct_not_survived = df_x[(df_y == 0) & (predictions == 0)]
    ax.scatter(correct_survived[x], correct_survived[y], correct_survived[z], c='green', marker='o', label='Correctly Survived')
    ax.scatter(correct_not_survived[x], correct_not_survived[y], correct_not_survived[z], c='purple', marker='o', label='Correctly Not Survived')
    
    # Plot incorrectly classified points
    incorrect_survived = df_x[(df_y == 1) & (predictions == 0)]
    incorrect_not_survived = df_x[(df_y == 0) & (predictions == 1)]
    ax.scatter(incorrect_survived[x], incorrect_survived[y], incorrect_survived[z], c='red', marker='x', label='Incorrectly Predicted as Survived')
    ax.scatter(incorrect_not_survived[x], incorrect_not_survived[y], incorrect_not_survived[z], c='orange', marker='x', label='Incorrectly Predicted as Not Survived')
    
    # Set labels
    ax.set_xlabel(x)
    ax.set_ylabel(y)
    ax.set_zlabel(z)
    
    plt.legend()
    plt.show()

#### Ex 15.1

In [None]:
df = train_df = pd.read_csv("clean_train_titanic.csv")
df

#### Ex 15.2

In [None]:
Features = ['Parch', 'Pclass', 'SibSp', 'CatSex', 'CatEmbarked', 'CatAge', 'CatFare']
Classes = 'Survived'

#### Ex 15.3

In [None]:
from sklearn.model_selection import train_test_split

x = df[Features].values
y = df[Classes].values

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size = 0.3, random_state = 5, stratify = y)

#### Ex 15.4

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Initialize and train Decision Tree classifier
dt = DecisionTreeClassifier()
dt.fit(train_x, train_y)

# Score the classifier:
dt_score = dt.score(test_x, test_y)
print("Decision Tree Classifier Score:", dt_score)

#### Ex 15.5

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize and train Random Forest classifier
rf = RandomForestClassifier()
rf.fit(train_x, train_y)

# Score the classifier:
rf_score = rf.score(test_x, test_y)
print("Decision Tree Classifier Score:", rf_score)

#### Ex 15.6

In [None]:
from sklearn.svm import SVC

# Initialize and train SVM classifier
svm = SVC()
svm.fit(train_x, train_y)

# Score the classifier:
svm_score = svm.score(test_x, test_y)
print("SVM Classifier Score:", svm_score)

#### Ex 15.7

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Initialize and train KNN classifier
knn = KNeighborsClassifier()
knn.fit(train_x, train_y)

# Score the classifier:
knn_score = knn.score(test_x, test_y)
print("KNN Classifier Score:", knn_score)

#### Ex 15.8

In [None]:
import xgboost as xgb

# Initialize and train XGBoost classifier
xgb_clf = xgb.XGBClassifier(n_estimators=100, learning_rate=0.5)
xgb_clf.fit(train_x, train_y)
xgb_clf.get_booster().feature_names = Features

# Predict and calculate accuracy
xgb_clf_score = xgb_clf.score(test_x, test_y)
print("XGBoost Classifier Score:", xgb_clf_score)

#### Ex 15.8.1

In [None]:
from xgboost import plot_tree

from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 80,50

plot_tree(xgb_clf, num_trees=0, rankdir='LR')

#### Ex 15.9

In [None]:
from tensorflow.keras.utils import to_categorical

# Convert labels to categorical
train_y_cat = to_categorical(train_y, num_classes=2)
test_y_cat = to_categorical(test_y, num_classes=2)

train_y_cat

#### Ex 15.10

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# # Convert labels to categorical

model = Sequential([
    Dense(2, activation='softmax'),
])

# Compile the model
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(train_x, train_y_cat, validation_data=(test_x, test_y_cat), epochs=50, verbose=0)

# Evaluate the model
score = model.evaluate(test_x, test_y_cat, verbose=0)
print(f'Model accuracy: {score[1]}')

#### 15.10.1

In [None]:
from tensorflow.keras.utils import plot_model

# Generate a plot of your model
plot_model(model, show_shapes=True, show_layer_names=True)

#### Ex 15.10.2

In [None]:
weights, biases = model.layers[0].get_weights()

import matplotlib.pyplot as plt
import seaborn as sns

# Visualize the weights
plt.figure(figsize=(10, 8))
sns.heatmap(weights, annot=True, fmt=".2f", cmap="viridis")
plt.title("Weights Visualization")
plt.xlabel("Output Neurons")
plt.ylabel("Input Features")
plt.yticks(ticks=plt.yticks()[0], labels=Features)
plt.show()


#### Ex 15.11

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Reshape x to have a 'channel' dimension and approximate square form for CNN input
# Here, we're creating a 3x3 image for each sample. This requires padding as 3*3=9 > len(Features)
# Since 9 is the next square number that can accommodate 7 features
# Calculate padding size to reach the next square number that fits all features
pad_size = 9 - len(Features)

# Pad the features to have 9 values per sample
x_padded = np.pad(x, [(0, 0), (0, pad_size)], mode='constant')
x_padded = x_padded.astype("float32")

# Reshape to a 3x3 structure, adding an additional dimension for channels
x_reshaped = x_padded.reshape(-1, 3, 3, 1)

# Split the data
cnn_train_x, cnn_test_x, cnn_train_y, cnn_test_y = train_test_split(x_reshaped, y, test_size=0.3, random_state=5, stratify=y)

# Define the CNN model
model = Sequential([
    Conv2D(32, kernel_size=(2, 2)),
    Flatten(),
    Dense(2, activation='softmax'),
])

# Compile the model
model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(cnn_train_x, train_y_cat, validation_data=(cnn_test_x, test_y_cat), epochs=20, verbose=0)

# Evaluate the model
score = model.evaluate(cnn_test_x, test_y_cat, verbose=0)
print(f'Model accuracy: {score[1]}')

#### 15.11.1

In [None]:
from tensorflow.keras.utils import plot_model

# Generate a plot of your model
plot_model(model, show_shapes=True, show_layer_names=True)

#### 15.12

In [None]:
df = train_df = pd.read_csv("extra_clean_train_titanic.csv")
df

#### 15.12.1

In [None]:
Features = ['Parch', 'Pclass', 'SibSp', 'CatSex', 'CatEmbarked', 'CatAge', 'CatFare']
Classes = 'Survived'

x = df[Features].values
y = df[Classes].values

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size = 0.3, random_state = 5, stratify = y)

# Categoricals needed for deep-learning
train_y_cat = to_categorical(train_y, num_classes=2)
test_y_cat = to_categorical(test_y, num_classes=2)