#### Ex 13.0

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline

# Custom function for plotting 2D plots
def plot_2d(x, y, c="Survived"):
    df.groupby([x, y])[c]\
        .mean()\
        .reset_index()\
        .plot(kind='scatter', x=x, y=y, c=c, colormap='viridis')

# Custom funciton for plotting 3D plots
def plot_3d(x, y, z, c="Survived"):
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')

    dfm = df.groupby([x, y, z])[c].mean().reset_index()
    
    # Creating the plot
    sc = ax.scatter(xs=dfm[x], ys=dfm[y], zs=dfm[z], c=dfm[c], cmap='viridis', edgecolor='k', s=40, alpha=0.7)
    
    # Adding color bar
    plt.colorbar(sc, ax=ax, label=c)
    
    # Setting the labels
    ax.set_xlabel(x)
    ax.set_ylabel(y)
    ax.set_zlabel(z)
    
    # Title
    ax.set_title('3D Scatter Plot')
    
    plt.show()

#### Ex 13.1

In [None]:
df = pd.read_csv("titanic_dataset.csv")
df

#### Ex 13.2

In [None]:
test_df = pd.read_csv("test_dataset.csv")

(len(df), len(test_df))

____

## (13.3) Feature Engineering: Missing "Embarked" values


#### Ex 13.3.1

In [None]:
df[pd.isna(df["Embarked"])]

#### Ex 13.3.2

In [None]:
df['Embarked'].value_counts(dropna=False).plot(kind='bar')

#### Ex 13.3.3

In [None]:
# Possible values are 'S', 'C' or 'Q'
df['Embarked'] = df['Embarked'].fillna(value=_______)

#### Ex 13.3.4

In [None]:
df[pd.isna(df["Embarked"])]["Name"].count()

____

## (13.4) Feature Engineering: Missing "Age" values

#### Ex 13.4.1

In [None]:
df[pd.isna(df["Age"])]


#### Ex 13.4.2

In [None]:
# You can use any valid dataframe column in the x-axis, which ones are more correlated with age?
# I.e. you may use 'Name', 'Sex, 'Pclass', 'Fare', etc...
df.plot(kind='scatter', x=_______, y='Age')

#### Ex 13.4.3

In [None]:
plt.figure(figsize=(4,10))
sns.heatmap(df.copy().apply(lambda col: col.astype('category').cat.codes if col.dtype == 'object' else col).corr()[["Age"]], vmax=0.6, square=True, annot=True)

#### Ex 13.4.4

In [None]:
plot_2d(x='SibSp', y='Pclass', c='Age')

#### Ex 13.4.5

In [None]:
df.groupby(['SibSp', 'Pclass'])['Age'].mean()

#### Ex 13.4.6

In [None]:
sibsp_pclass_averages = df.groupby(['SibSp', 'Pclass'])['Age'].transform('mean')
df['Age'] = df['Age'].fillna(sibsp_pclass_averages)

#### Ex 13.4.7

In [None]:
df[pd.isna(df["Age"])]

#### Ex 13.4.8

In [None]:
# We need to insert a float, but which float will you pick? 25.6? 19.1? Some other number?
df['Age'] = df['Age'].fillna(value=_______)

#### Ex 13.4.9

In [None]:
df[pd.isna(df["Age"])]["Name"].count()

____

## (13.5) Feature Engineering: Categorization


#### Ex 13.5.1

In [None]:
df['CatSex'] = df['Sex'].map({'male': 0, 'female': 1})
df['CatEmbarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q':2})

df

#### Ex 13.5.2

In [None]:
age_buckets = np.linspace(0, 80, _______)

age_buckets

#### Ex 13.5.3

In [None]:
df['CatAge'] = pd.cut(df['Age'], age_buckets, labels=False)

df

#### Ex 13.5.4

In [None]:
fare_buckets = np.linspace(0, 520, _______)

fare_buckets

#### Ex 13.5.5

In [None]:
df['CatFare'] = pd.cut(df['Fare'], fare_buckets, labels=False)

df

____

## (13.6) Feature Engineering: Visualising Linear Correlations

#### Ex 13.6.1

In [None]:
train_df = df.drop(['PassengerId', 'Name', 'Sex', 'Age', 'Ticket', 'Fare', 'Cabin', 'Embarked'],axis=1)
train_df

#### Ex 13.6.2

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(train_df.corr(), vmax=0.6, square=True, annot=True)

#### Ex 13.6.3

In [None]:
plot_2d(x='Parch', y='CatFare')

#### Ex 13.6.4

In [None]:
plot_2d(x='SibSp', y='CatAge')

#### Ex 13.6.5

In [None]:
plot_2d(x='Pclass', y='CatSex')

#### Ex 13.6.6

In [None]:
fig, axs = plt.subplots(ncols=2, figsize=(15,5))
axs[0].set_title('female')
sns.countplot(x='Pclass', hue='Survived', data=train_df.loc[train_df['CatSex'] == True], ax=axs[0])
axs[1].set_title('male')
sns.countplot(x='Pclass', hue='Survived', data=train_df.loc[train_df['CatSex'] == False], ax=axs[1])

#### Ex 13.6.7

In [None]:
plot_3d("Parch", "CatEmbarked", "CatFare")

#### Ex 13.6.8

In [None]:
plot_3d("Parch", "SibSp", "CatFare")

#### Ex 13.6.9

In [None]:
plot_3d(_______, _______, _______)