# NN Model

In [5]:
import torch, numpy as np, pandas as pd
from torch import tensor
from fastai.data.transforms import RandomSplitter
import zipfile

zipName = 'titanic.zip'

zf = zipfile.ZipFile(zipName) 
df_gs = pd.read_csv(zf.open('gender_submission.csv'))
df_train = pd.read_csv(zf.open('train.csv'))
df_test = pd.read_csv(zf.open('test.csv'))

In [6]:
modes = df_train.mode().iloc[0]
df_train.fillna(modes, inplace=True)

In [7]:
df_train['logfare'] = np.log(df_train['Fare']+1)

In [8]:
df_train = pd.get_dummies(df_train, columns=['Sex','Pclass','Embarked'])

added_cols = [ 'Sex_female', 'Sex_male',
               'Pclass_1', 'Pclass_2', 'Pclass_3', 
               'Embarked_C', 'Embarked_Q', 'Embarked_S']

In [9]:
indep_cols = ['Age','SibSp','Parch','logfare'] + added_cols

t_dep = tensor(df_train.Survived)
t_indep = tensor(df_train[indep_cols].values, dtype=torch.float)
vals, indices = t_indep.max(dim=0)
t_indep = t_indep / vals

In [20]:
trn_split, val_split = RandomSplitter(seed=42)(df_train)
trn_indep, val_indep = t_indep[trn_split], t_indep[val_split]
trn_dep, val_dep = t_dep[trn_split], t_dep[val_split]
trn_dep = trn_dep[:,None]
val_dep = val_dep[:,None]
len(trn_indep),len(val_indep)

(713, 178)

In [21]:
# Instructions for Linear Model Machine Learning

import torch.nn.functional as F

'Gather: \
- dependent variables, independent variables \
- use pd.get_dummies(df, columns=[]) to create dummy columns \
- use tensor(df_train[indep_cols].values, dtype=torch.float) to get tensor \
- import from fastai.data.transforms import RandomSplitter \
-- use trn_split, val_split = RandomSplitter()(df) to get splits \
-- split test, train by using t_dep/indep[trn/val_split] \
-- index column dims with special value None trn/val_dep = trn/val_dep[:, None]'

def init_coeffs(n_hidden=20):
    'calculates random set of coefficients based on shape of \
    independent variables tensor.'
    layer1 = (torch.rand(n_coeff, n_hidden)-0.5)/n_hidden
    layer2 = torch.rand(n_hidden, 1)-0.3
    const = torch.rand(1)[0]
    return layer1.requires_grad_(), layer2.requires_grad_(), const.requires_grad_()

def calc_preds(coeffs, indeps): 
    'calculates predictions based on coefficients * independent variables'
    #return (indeps*coeffs).sum(axis=1)
    #return indeps@coeffs
    
    l1, l2, const = coeffs
    res = F.relu(indeps@l1)
    res = res@l2 + const    
    return torch.sigmoid(res)

def calc_loss(coeffs, indeps, deps): 
    'calculates the mean absolute difference loss'
    return torch.abs(calc_preds(coeffs, indeps)-deps).mean()

def update_coeffs(coeffs, lr):
    'Multiples the coefficients by the learning rate and resets \
     the gradients to zero'
#    coeffs.sub_(coeffs.grad * lr)
#    coeffs.grad.zero_()    
    for layer in coeffs:
        layer.sub_(layer.grad * lr)
        layer.grad.zero_()
    
def one_epoch(coeffs, lr):
    'Calculates the loss and indicates that a gradient is needed'
    loss = calc_loss(coeffs, trn_indep, trn_dep)
    loss.backward()
    #Disable gradient calculation
    with torch.no_grad(): update_coeffs(coeffs, lr)
    print(f"{loss:.3f}", end=";")
    
def train_model(epochs=30, lr=0.01):
    coeffs = init_coeffs()
    for i in range(epochs): one_epoch(coeffs, lr=lr)
    return coeffs

def show_coeffs(): return dict(zip(indep_cols, coeffs.requires_grad_(False)))

def acc(coeffs): return (val_dep.bool()==(calc_preds(coeffs, val_indep)>0.5)).float().mean()

In [34]:
torch.manual_seed(442)

n_coeff = t_indep.shape[1]
coeffs = init_coeffs()

coeffs = train_model(lr=5);
acc(coeffs)

0.554;0.529;0.481;0.388;0.296;0.256;0.236;0.227;0.222;0.219;0.216;0.214;0.212;0.210;0.208;0.207;0.205;0.204;0.203;0.202;0.202;0.201;0.200;0.200;0.199;0.199;0.199;0.198;0.198;0.198;

tensor(0.8258)

In [None]:
df_test['Fare'] = df_test.Fare.fillna(0)
df_test.fillna(modes, inplace=True)
df_test['logfare'] = np.log(df_test['Fare']+1)
df_test = pd.get_dummies(df_test, columns=["Sex","Pclass","Embarked"])

tst_indep = tensor(df_test[indep_cols].values, dtype=torch.float)
tst_indep = tst_indep / vals

In [None]:
df_test['Survived'] = (calc_preds(coeffs,tst_indep)>0.5).int()

In [None]:
sub_df = df_test[['PassengerId','Survived']]
sub_df.to_csv('submission.csv', index=False)

In [None]:
!head sub.csv