**NOTE: This notebook is written for the Google Colab platform. However it can also be run (possibly with minor modifications) as a standard Jupyter notebook.** 



In [None]:
#@title -- Installation of Packages -- { display-mode: "form" }
import sys
!{sys.executable} -m pip install git+https://github.com/michalgregor/class_utils.git

In [None]:
#@title -- Import of Necessary Packages -- { display-mode: "form" }
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from class_utils.statsmodels import SMLogisticRegression
from class_utils.sklearn import (
    make_pd_column_transformer, transformer_extensions
)
from class_utils.plots import ColGrid, smoothscatter
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from statsmodels.graphics.tsaplots import plot_acf

In [None]:
#@title -- Downloading Data -- { display-mode: "form" }
DATA_HOME = "https://github.com/michalgregor/ml_notebooks/blob/main/data/{}?raw=1"

from class_utils.download import download_file_maybe_extract
download_file_maybe_extract(DATA_HOME.format("titanic.zip"), directory="data/titanic")

# also create a directory for storing any outputs
import os
os.makedirs("output", exist_ok=True)

In [None]:
df = pd.read_csv("data/titanic/train.csv")

In [None]:
categorical_inputs = ["Pclass", "Sex", "Embarked"]
numeric_inputs = ["Age", "SibSp", 'Parch', 'Fare']
output = "Survived"

input_preproc = make_pd_column_transformer(
    (make_pipeline(
        SimpleImputer(strategy='constant', fill_value='MISSING'),
        OrdinalEncoder()),
     categorical_inputs),
    
    (make_pipeline(
        SimpleImputer(),
        StandardScaler()),
     numeric_inputs)
)

X = input_preproc.fit_transform(df[categorical_inputs+numeric_inputs])
Y = df[output].values.reshape(-1)

In [None]:
model = SMLogisticRegression()
model.fit(X, Y)

In [None]:
model.summary()