# Building models from Scratch

with PyTorch and fastai

## Setup

In [None]:
from pathlib import Path
import torch
import numpy as np
import pandas as pd
from fastai.data.transforms import RandomSplitter
import torch.nn.functional as F

In [None]:
ROOT_PATH = Path.cwd().parent
DATA_PATH = ROOT_PATH / "data" / "titanic"

## Load data

In [None]:
train_df = pd.read_csv(DATA_PATH / "train.csv")

In [None]:
train_df.info()

## Process data

In [None]:
def preprocess(df: pd.DataFrame) -> pd.DataFrame:
    modes = df.mode().iloc[0]
    df.fillna(modes, inplace=True)
    df["Fare"] = np.log(df["Fare"] + 1)
    df = pd.get_dummies(df, columns=["Sex", "Pclass", "Embarked"])

    return df

In [None]:
processed_df = preprocess(train_df)

In [None]:
processed_df.info()

In [None]:
target = "Survived"
features = [
    col
    for col in processed_df.columns
    if col not in ("PassengerId", "Survived", "Name", "Ticket", "Fare", "Cabin")
]

In [None]:
t_target = torch.tensor(processed_df[target], dtype=torch.float)

In [None]:
t_features = torch.tensor(
    processed_df[features].astype(np.float64).values, dtype=torch.float
)

## Modeling

### Linear model

In [None]:
torch.manual_seed(1337)

In [None]:
def calc_preds(coeffs: torch.Tensor, features: torch.Tensor) -> torch.Tensor:
    # return torch.sigmoid((coeffs*features).sum(axis=1))
    return torch.sigmoid(features @ coeffs)

In [None]:
def calc_loss(
    coeffs: torch.Tensor, features: torch.Tensor, target: torch.Tensor
) -> torch.Tensor:
    return torch.abs(calc_preds(coeffs, features) - target).mean()

#### Get random coefficients for our features

In [None]:
coeffs = torch.rand(t_features.shape[1]) - 0.5

In [None]:
coeffs

#### Scale features

In [None]:
vals, indices = t_features.max(dim=0)
t_features = t_features / vals

#### Create predictions from linear model by multiplying features with coefficents and adding them up

In [None]:
preds = (t_features * coeffs).sum(axis=1)

In [None]:
preds[:10]

#### Define loss function as average of absolute value between predictions and target

In [None]:
loss = torch.abs(preds - t_target).mean()

In [None]:
loss

#### Gradient descent step

In [None]:
coeffs.requires_grad_()

In [None]:
loss = calc_loss(coeffs, t_features, t_target)
loss.backward()
with torch.no_grad():
    coeffs.sub_(coeffs.grad * 0.1)
    coeffs.grad.zero_()
    print(calc_loss(coeffs, t_features, t_target))

#### Train the linear model

In [None]:
def init_coeffs():
    # _coeffs = torch.rand(t_features.shape[1]) - 0.5
    _coeffs = torch.rand(t_features.shape[1], 1) * 0.1
    _coeffs.requires_grad_()
    return _coeffs

In [None]:
def update_coeffs(coeffs: torch.Tensor, lr: float):
    coeffs.sub_(coeffs.grad * lr)
    coeffs.grad.zero_()

In [None]:
def train_one_epoch(coeffs: torch.Tensor, lr: float, train_features, train_target):
    loss = calc_loss(coeffs, train_features, train_target)
    loss.backward()
    with torch.no_grad():
        update_coeffs(coeffs, lr)
    print(f"Loss: {loss:.3f}")

In [None]:
def train_model(epochs: int, lr: float, train_features, train_target):
    torch.manual_seed(42)
    coeffs = init_coeffs()
    for i in range(epochs):
        train_one_epoch(coeffs, lr, train_features, train_target)
    return coeffs

In [None]:
def calc_acc(coeffs, features, target):
    return (target.bool() == (calc_preds(coeffs, features) > 0.5)).float().mean()

In [None]:
train_split, val_split = RandomSplitter(seed=1337)(processed_df)

In [None]:
train_features, val_features = t_features[train_split], t_features[train_split]
train_target, val_target = t_target[train_split], t_target[train_split]

In [None]:
coeffs = train_model(
    epochs=18, lr=0.2, train_features=train_features, train_target=train_target
)

#### Getting predictions

In [None]:
preds = calc_preds(coeffs, val_features)

In [None]:
results = val_target.bool() == (preds > 0.5)

In [None]:
results[:10]

Average accuracy

In [None]:
results.float().mean()

Preds contain value smaller 0 and bigger 0 - use sigmoid function to get around that

In [None]:
(preds < 0).any() and (preds > 1.0).any()

#### Train new model using sigmoid function to calculate predictions

In [None]:
new_coeffs = train_model(
    epochs=30, lr=100, train_features=train_features, train_target=train_target
)

In [None]:
calc_acc(coeffs=new_coeffs, features=val_features, target=val_target)

### Neural network

In [None]:
def init_coeffs(n_hidden: int = 20):
    l1 = (torch.rand(t_features.shape[1], n_hidden) - 0.5) / n_hidden
    l2 = torch.randn(n_hidden, 1) - 0.3
    const = torch.rand(1)[0]
    return l1.requires_grad_(), l2.requires_grad_(), const.requires_grad_()

In [None]:
def calc_preds(coeffs, features):
    l1, l2, const = coeffs
    result = F.relu(features @ l1)
    result = result @ l2 + const

    return torch.sigmoid(result)

In [None]:
def update_coeffs(coeffs, lr):
    for layer in coeffs:
        layer.sub_(layer.grad * lr)
        layer.grad.zero_()

In [None]:
coeffs_nn = train_model(
    epochs=18, lr=0.2, train_features=train_features, train_target=train_target
)

In [None]:
calc_acc(coeffs=coeffs_nn, features=val_features, target=val_target)