# Trying to fit a multilayer deep learning model with pytorch

## Preparing data first in the most direct way

In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [4]:
from matplotlib import pyplot as plt

df_train = pd.read_csv('/kaggle/input/titanic/train.csv')
df_test = pd.read_csv('/kaggle/input/titanic/test.csv')
df_all = pd.concat([df_train, df_test])
df_train.name = 'Training Set'
df_test.name = 'Test Set'
df_all.name = 'All Set'
df_all.sample(5)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
355,1247,,1,"Julian, Mr. Henry Forbes",male,50.0,0,0,113044,26.0,E60,S
192,1084,,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S
594,595,0.0,2,"Chapman, Mr. John Henry",male,37.0,1,0,SC/AH 29037,26.0,,S
804,805,1.0,3,"Hedman, Mr. Oskar Arvid",male,27.0,0,0,347089,6.975,,S
367,368,1.0,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C


In [5]:
def prepare_features_basic(data):
  # missing values
  data.loc[data['Age'].isna(), 'Age'] = data['Age'].mode()[0].item()
  data.loc[data['Embarked'].isna(), 'Embarked'] = data['Embarked'].mode().item()
  data['Fare'].fillna(data['Fare'].mode().item())
  # Cabin: no need as we do delete it
  # categorical
  data = pd.get_dummies(data, columns = ['Sex', 'Pclass', 'Embarked'])
  # useless columns
  data = data.drop(['Cabin', 'Name', 'Ticket', 'PassengerId'], axis = 1)
  # numerical
  data['Age'] /= data['Age'].max() 
  data['SibSp'] /= data['SibSp'].max()
  data['Parch'] /= data['Parch'].max()
  data['Fare'] = np.log1p(data['Fare'])
  data['Fare'] /= data['Fare'].max()
  return data

In [6]:
def prepare_features_initial(data):
  # missing values
  f_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'] = f_age_mean  
  m_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'] = m_age_mean  
  data['Age'] = data['Age'].astype(float)
  
  data.loc[data['PassengerId'] == 62, 'Embarked'] = 'S'
  data.loc[data['PassengerId'] == 830, 'Embarked'] = 'S'
  
  r = data.loc[(data['Pclass'] == 3) & (data['Embarked'].str.contains('S')) & (data['SibSp'] == 0) & (data['Parch'] == 0), 'Fare'].mode().item()
  data.loc[data['PassengerId'] == 1044, 'Fare'] = r
  # Cabin: no need as we do delete it
  # categorical
  data = pd.get_dummies(data, columns = ['Sex', 'Pclass', 'Embarked'])
  # useless columns
  data = data.drop(['Cabin', 'Name', 'Ticket', 'PassengerId'], axis = 1)
  # numerical
  data['Age'] /= data['Age'].max() 
  data['SibSp'] /= data['SibSp'].max()
  data['Parch'] /= data['Parch'].max()
  data['Fare'] = np.log1p(data['Fare'])
  data['Fare'] /= data['Fare'].max()
  return data

In [7]:
def prepare_features_medium(data):
  # missing values
  f_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'] = f_age_mean  
  m_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'] = m_age_mean  
  data['Age'] = data['Age'].astype(float)
  
  data.loc[data['PassengerId'] == 62, 'Embarked'] = 'S'
  data.loc[data['PassengerId'] == 830, 'Embarked'] = 'S'
  
  r = data.loc[(data['Pclass'] == 3) & (data['Embarked'].str.contains('S')) & (data['SibSp'] == 0) & (data['Parch'] == 0), 'Fare'].mode().item()
  data.loc[data['PassengerId'] == 1044, 'Fare'] = r
  # Cabin: no need as we do delete it

  # we combine SibSp and Parch to measure the family size including the passenger
  data['Family'] = 1 + data['SibSp'] + data['Parch']
  # categorical
  data = pd.get_dummies(data, columns = ['Sex', 'Pclass', 'Embarked'])
  # useless columns
  data = data.drop(['Cabin', 'Name', 'Ticket', 'PassengerId', 'SibSp', 'Parch'], axis = 1)
  # numerical
  data['Age'] /= data['Age'].max() 
  data['Family'] /= data['Family'].max()
  data['Fare'] = np.log1p(data['Fare'])
  data['Fare'] /= data['Fare'].max()
  return data

In [8]:
df_all = prepare_features_medium(df_all)

In [9]:
df_train = df_all.iloc[:891]
df_train_y = df_train['Survived']
df_train_x = df_train.drop(['Survived'], axis=1)

df_test = df_all.iloc[891:]
df_train.tail(5)

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
886,0.0,0.3375,0.422864,0.090909,0,1,0,1,0,0,0,1
887,1.0,0.2375,0.550238,0.090909,1,0,1,0,0,0,0,1
888,0.0,0.358589,0.512205,0.363636,1,0,0,0,1,0,0,1
889,1.0,0.325,0.550238,0.090909,0,1,1,0,0,1,0,0
890,0.0,0.4,0.347554,0.090909,0,1,0,0,1,0,1,0


In [10]:
df_test.head()

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
0,,0.43125,0.348997,0.090909,0,1,0,0,1,0,1,0
1,,0.5875,0.333195,0.181818,1,0,0,0,1,0,0,1
2,,0.775,0.379604,0.090909,0,1,0,1,0,0,1,0
3,,0.3375,0.363449,0.090909,0,1,0,0,1,0,0,1
4,,0.275,0.414494,0.272727,1,0,0,0,1,0,0,1


## Now fitting a deep learning model 

In [41]:
import torch
import torch.nn.functional as F

train = torch.tensor(df_train_x.values, dtype=torch.float)
labels = torch.tensor(df_train_y.values, dtype=torch.float)
# no bias because we do hot encoding rather than dummy 
train.shape



torch.Size([891, 11])

In [42]:
def compute_preds(coeffs, x):
    layer1, layer2, bias = coeffs
    a = F.relu(x @ layer1)
    b = F.relu(a @ layer2 + bias)
    return torch.sigmoid(b)

In [43]:
def compute_loss(coeffs, x, y):
    #preds = compute_preds(coeffs, x)
    #return torch.where(y == 1.0, 1.0 - pracosheds, preds).sum()
    return torch.abs(compute_preds(coeffs, x) - y).mean()
    #return torch.square(compute_preds(coeffs, x) - y).mean()

In [58]:
def upgrade_coeffs(coeffs, rate):
    layer1, layer2, bias = coeffs
    layer1.sub_(layer1.grad * rate)
    layer2.sub_(layer2.grad * rate)
    layer1.grad.zero_()
    layer2.grad.zero_()

In [59]:
def run_epoch(weights, x, y, rate):
    loss = compute_loss(weights, x, y)
    loss.backward()
    with torch.no_grad():
        upgrade_coeffs(weights, rate)
    print(f"{loss:.3f}", end="; ")

In [64]:
def init_coeffs(n_coeff=11, n_hidden=20):
    layer1 = (torch.rand(n_coeff, n_hidden) - 0.5) / n_hidden
    layer1.requires_grad_()
    layer2 = (torch.rand(n_hidden) - 0.5)
    layer2.requires_grad_()
    bias = torch.rand(1)
    return layer1, layer2, bias

In [86]:
def train_model(epochs=30, lr=0.01):
    weights = init_coeffs(11, 6)
    #weights.retain_grad()
    for e in range(epochs):
            run_epoch(weights, train, labels, lr)
    return weights

In [87]:
def accuracy(coeffs, x, y):
    with torch.no_grad():
        preds = compute_preds(coeffs, x)
    return ((preds > 0.5) == y.bool()).float().mean()

In [102]:
parameters = train_model(100, 2)

0.503; 0.495; 0.494; 0.494; 0.494; 0.494; 0.494; 0.493; 0.493; 0.493; 0.491; 0.490; 0.488; 0.487; 0.485; 0.483; 0.480; 0.477; 0.473; 0.470; 0.465; 0.461; 0.456; 0.452; 0.447; 0.443; 0.440; 0.436; 0.434; 0.431; 0.429; 0.428; 0.426; 0.425; 0.424; 0.423; 0.422; 0.421; 0.421; 0.420; 0.420; 0.420; 0.419; 0.419; 0.419; 0.418; 0.418; 0.418; 0.418; 0.418; 0.417; 0.417; 0.417; 0.417; 0.417; 0.417; 0.417; 0.417; 0.417; 0.417; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.416; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 0.415; 

In [103]:
accuracy(parameters, train, labels)

tensor(0.7856)

* (60, 1.5) gives 0.7789 accuracy
* (100, 2) with 6 at layer2 gives 0.7856 accuracy

In [104]:
def show_coeffs():
    with torch.no_grad():
        s = dict(zip(df_train_x.columns, parameters))
    return s
show_coeffs()

{'Age': tensor([[-1.4518e-02,  2.0056e-01, -2.9394e-02,  1.4943e-02, -2.2983e-03,
           2.1093e-01],
         [-8.8498e-02,  1.4028e-01, -5.9613e-03,  7.3443e-02,  8.8617e-02,
           5.2600e-01],
         [-2.7841e-02,  1.4723e-01, -3.7652e-02,  8.0281e-02,  5.7484e-02,
          -4.0435e-02],
         [ 5.3613e-04, -3.5030e-01, -4.1163e-02, -6.7125e-02, -1.5121e-01,
           1.6079e+00],
         [-6.7696e-03,  6.2205e-01,  1.3476e-02, -4.3515e-02,  3.3518e-01,
          -6.9345e-01],
         [ 5.8206e-02, -8.8332e-03,  4.4444e-02, -3.8001e-02,  5.6577e-02,
           3.5334e-01],
         [-5.4524e-02, -4.2010e-02,  2.9480e-02, -5.7256e-02,  3.7865e-02,
           4.3434e-01],
         [-4.0818e-02,  1.9211e-01,  7.7691e-02, -4.4162e-02,  9.6307e-02,
           1.1297e-02],
         [-3.8121e-02,  1.2631e-01,  4.8272e-02, -6.2979e-02,  2.9331e-03,
           2.9491e-01],
         [ 6.6174e-02,  2.3335e-02,  2.0228e-02, -5.6469e-02,  1.9343e-02,
           2.8014e-01],
   

In [105]:
with torch.no_grad():
    df_test_x = torch.tensor(df_test.drop(['Survived'], axis=1).values, dtype=torch.float)
    result = compute_preds(parameters, df_test_x)
result

tensor([0.5000, 0.9873, 0.5000, 0.5000, 0.9870, 0.5000, 0.9887, 0.5000, 0.9875,
        0.5000, 0.5000, 0.5000, 0.9959, 0.5000, 0.9961, 0.9960, 0.5000, 0.5000,
        0.9867, 0.9884, 0.5000, 0.5000, 0.9955, 0.5000, 0.9969, 0.5000, 0.9959,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.9880, 0.9880, 0.5000, 0.5000,
        0.9871, 0.9869, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.9957, 0.9959,
        0.5000, 0.5000, 0.5000, 0.9967, 0.9879, 0.5000, 0.5000, 0.9958, 0.9963,
        0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.9970, 0.5000, 0.5000, 0.5000,
        0.9883, 0.5000, 0.9960, 0.9881, 0.5000, 0.5000, 0.9966, 0.9884, 0.5000,
        0.9870, 0.5000, 0.9969, 0.5147, 0.5000, 0.9956, 0.5000, 0.9884, 0.5000,
        0.5158, 0.5000, 0.5000, 0.5000, 0.5000, 0.9886, 0.9866, 0.9887, 0.5000,
        0.9874, 0.5000, 0.9955, 0.5000, 0.5000, 0.5000, 0.9965, 0.5000, 0.9867,
        0.5000, 0.9962, 0.5000, 0.5000, 0.5000, 0.9872, 0.5000, 0.5000, 0.5000,
        0.5000, 0.5000, 0.5000, 0.9887, 

In [106]:
results = torch.where(result <= 0.5, 0.0, 1.0)
results


tensor([0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0.,
        1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0.,
        1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0.,
        1., 0., 1., 1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 1., 0.,
        1., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 1.,
        0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0.,
        1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1.,
        0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1.,
        0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 

In [107]:
results = pd.DataFrame(results)
results.columns = ['Survived']
results.head()

Unnamed: 0,Survived
0,0.0
1,1.0
2,0.0
3,0.0
4,1.0


In [108]:
df_submit = df_test.fillna(results)
df_submit.head()

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
0,0.0,0.43125,0.348997,0.090909,0,1,0,0,1,0,1,0
1,1.0,0.5875,0.333195,0.181818,1,0,0,0,1,0,0,1
2,0.0,0.775,0.379604,0.090909,0,1,0,1,0,0,1,0
3,0.0,0.3375,0.363449,0.090909,0,1,0,0,1,0,0,1
4,1.0,0.275,0.414494,0.272727,1,0,0,0,1,0,0,1


In [109]:
df_submit = df_submit.drop(['Age', 'Family', 'Fare', 'Sex_female', 'Sex_male', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S'], axis=1)
df_submit['Survived'] = df_submit['Survived'].astype('int')
df_submit['PassengerId'] = (892 + df_submit.index) 
df_submit.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 418 entries, 0 to 417
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Survived     418 non-null    int64
 1   PassengerId  418 non-null    int64
dtypes: int64(2)
memory usage: 9.8 KB


In [114]:
df_submit.to_csv('/kaggle/working/20230525_titanic_preds_deepml_mae.csv', columns=['PassengerId', 'Survived'], header=True, index=False, sep=',')

In [116]:
!tail /kaggle/working/20230525_titanic_preds_deepml_mae.csv
!tail /kaggle/input/titanic/gender_submission.csv

1300,1
1301,1
1302,1
1303,1
1304,1
1305,0
1306,1
1307,0
1308,0
1309,0
1300,1
1301,1
1302,1
1303,1
1304,1
1305,0
1306,1
1307,0
1308,0
1309,0


scored **0.7488** worst than single layer model