# Trying to fit a multilayer deep learning model with pytorch

## Preparing data first in the most direct way

In [8]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [9]:
from matplotlib import pyplot as plt

df_train = pd.read_csv('/kaggle/input/titanic/train.csv')
df_test = pd.read_csv('/kaggle/input/titanic/test.csv')
df_all = pd.concat([df_train, df_test])
df_train.name = 'Training Set'
df_test.name = 'Test Set'
df_all.name = 'All Set'
df_all.sample(5)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
46,938,,1,"Chevre, Mr. Paul Romaine",male,45.0,0,0,PC 17594,29.7,A9,C
60,61,0.0,3,"Sirayanian, Mr. Orsen",male,22.0,0,0,2669,7.2292,,C
242,1134,,1,"Spedden, Mr. Frederic Oakley",male,45.0,1,1,16966,134.5,E34,C
444,445,1.0,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S
264,1156,,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30.0,0,0,C.A. 34644,12.7375,,C


In [10]:
def prepare_features_basic(data):
  # missing values
  data.loc[data['Age'].isna(), 'Age'] = data['Age'].mode()[0].item()
  data.loc[data['Embarked'].isna(), 'Embarked'] = data['Embarked'].mode().item()
  data['Fare'].fillna(data['Fare'].mode().item())
  # Cabin: no need as we do delete it
  # categorical
  data = pd.get_dummies(data, columns = ['Sex', 'Pclass', 'Embarked'])
  # useless columns
  data = data.drop(['Cabin', 'Name', 'Ticket', 'PassengerId'], axis = 1)
  # numerical
  data['Age'] /= data['Age'].max() 
  data['SibSp'] /= data['SibSp'].max()
  data['Parch'] /= data['Parch'].max()
  data['Fare'] = np.log1p(data['Fare'])
  data['Fare'] /= data['Fare'].max()
  return data

In [11]:
def prepare_features_initial(data):
  # missing values
  f_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'] = f_age_mean  
  m_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'] = m_age_mean  
  data['Age'] = data['Age'].astype(float)
  
  data.loc[data['PassengerId'] == 62, 'Embarked'] = 'S'
  data.loc[data['PassengerId'] == 830, 'Embarked'] = 'S'
  
  r = data.loc[(data['Pclass'] == 3) & (data['Embarked'].str.contains('S')) & (data['SibSp'] == 0) & (data['Parch'] == 0), 'Fare'].mode().item()
  data.loc[data['PassengerId'] == 1044, 'Fare'] = r
  # Cabin: no need as we do delete it
  # categorical
  data = pd.get_dummies(data, columns = ['Sex', 'Pclass', 'Embarked'])
  # useless columns
  data = data.drop(['Cabin', 'Name', 'Ticket', 'PassengerId'], axis = 1)
  # numerical
  data['Age'] /= data['Age'].max() 
  data['SibSp'] /= data['SibSp'].max()
  data['Parch'] /= data['Parch'].max()
  data['Fare'] = np.log1p(data['Fare'])
  data['Fare'] /= data['Fare'].max()
  return data

In [12]:
def prepare_features_medium(data):
  # missing values
  f_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'] = f_age_mean  
  m_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'] = m_age_mean  
  data['Age'] = data['Age'].astype(float)
  
  data.loc[data['PassengerId'] == 62, 'Embarked'] = 'S'
  data.loc[data['PassengerId'] == 830, 'Embarked'] = 'S'
  
  r = data.loc[(data['Pclass'] == 3) & (data['Embarked'].str.contains('S')) & (data['SibSp'] == 0) & (data['Parch'] == 0), 'Fare'].mode().item()
  data.loc[data['PassengerId'] == 1044, 'Fare'] = r
  # Cabin: no need as we do delete it

  # we combine SibSp and Parch to measure the family size including the passenger
  data['Family'] = 1 + data['SibSp'] + data['Parch']
  # categorical
  data = pd.get_dummies(data, columns = ['Sex', 'Pclass', 'Embarked'])
  # useless columns
  data = data.drop(['Cabin', 'Name', 'Ticket', 'PassengerId', 'SibSp', 'Parch'], axis = 1)
  # numerical
  data['Age'] /= data['Age'].max() 
  data['Family'] /= data['Family'].max()
  data['Fare'] = np.log1p(data['Fare'])
  data['Fare'] /= data['Fare'].max()
  return data

In [13]:
df_all = prepare_features_medium(df_all)

In [14]:
df_train = df_all.iloc[:891]
df_train_y = df_train['Survived']
df_train_x = df_train.drop(['Survived'], axis=1)

df_test = df_all.iloc[891:]
df_train.tail(5)

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
886,0.0,0.3375,0.422864,0.090909,0,1,0,1,0,0,0,1
887,1.0,0.2375,0.550238,0.090909,1,0,1,0,0,0,0,1
888,0.0,0.358589,0.512205,0.363636,1,0,0,0,1,0,0,1
889,1.0,0.325,0.550238,0.090909,0,1,1,0,0,1,0,0
890,0.0,0.4,0.347554,0.090909,0,1,0,0,1,0,1,0


In [15]:
df_test.head()

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
0,,0.43125,0.348997,0.090909,0,1,0,0,1,0,1,0
1,,0.5875,0.333195,0.181818,1,0,0,0,1,0,0,1
2,,0.775,0.379604,0.090909,0,1,0,1,0,0,1,0
3,,0.3375,0.363449,0.090909,0,1,0,0,1,0,0,1
4,,0.275,0.414494,0.272727,1,0,0,0,1,0,0,1


## Now fitting a deep learning model 

In [16]:
import torch
import torch.nn.functional as F

train = torch.tensor(df_train_x.values, dtype=torch.float)
labels = torch.tensor(df_train_y.values, dtype=torch.float)
# no bias because we do hot encoding rather than dummy 
train.shape



torch.Size([891, 11])

In [17]:
def compute_preds(coeffs, x):
    layer1, layer2, bias = coeffs
    a = F.relu(x @ layer1)
    return torch.sigmoid(a @ layer2 + bias)

In [18]:
def compute_loss(coeffs, x, y):
    #preds = compute_preds(coeffs, x)
    #return torch.where(y == 1.0, 1.0 - pracosheds, preds).sum()
    return torch.abs(compute_preds(coeffs, x) - y).mean()
    #return torch.square(compute_preds(coeffs, x) - y).mean()

In [19]:
def upgrade_coeffs(coeffs, rate):
    layer1, layer2, bias = coeffs
    layer1.sub_(layer1.grad * rate)
    layer2.sub_(layer2.grad * rate)
    layer1.grad.zero_()
    layer2.grad.zero_()

In [20]:
def run_epoch(weights, x, y, rate):
    loss = compute_loss(weights, x, y)
    loss.backward()
    with torch.no_grad():
        upgrade_coeffs(weights, rate)
    print(f"{loss:.3f}", end="; ")

In [21]:
def init_coeffs(n_coeff=11, n_hidden=20):
    layer1 = (torch.rand(n_coeff, n_hidden) - 0.5) / n_hidden
    layer1.requires_grad_()
    layer2 = (torch.rand(n_hidden) - 0.3)
    layer2.requires_grad_()
    bias = torch.rand(1)[0]
    return layer1, layer2, bias

In [22]:
def train_model(epochs=30, lr=0.01):
    weights = init_coeffs(11, 6)
    #weights.retain_grad()
    for e in range(epochs):
            run_epoch(weights, train, labels, lr)
    return weights

In [23]:
def accuracy(coeffs, x, y):
    with torch.no_grad():
        preds = compute_preds(coeffs, x)
    return ((preds > 0.5) == y.bool()).float().mean()

In [24]:
parameters = train_model(30, 20)

0.517; 0.483; 0.282; 0.245; 0.215; 0.214; 0.212; 0.206; 0.201; 0.204; 0.196; 0.197; 0.193; 0.190; 0.190; 0.190; 0.190; 0.189; 0.189; 0.189; 0.189; 0.189; 0.189; 0.189; 0.189; 0.189; 0.189; 0.189; 0.189; 0.189; 

In [25]:
accuracy(parameters, train, labels)

tensor(0.8114)

* (60, 1.5) gives 0.7789 accuracy
* (100, 2) with 6 at layer2 gives 0.7856 accuracy
* (30, 20) with 6 at layer2 gives 0.8114 accuracy

In [34]:
def show_coeffs():
    with torch.no_grad():
        s = dict(zip(df_train_x.columns, parameters))
    return s
show_coeffs()

{'Age': tensor([[-1.4323e-02, -2.9022e-02,  3.4873e-02,  4.6406e-03,  3.7359e-01,
           4.3627e-01],
         [-5.8772e-02,  4.2331e-01,  1.9351e-01, -5.1008e-02,  1.8011e-01,
           2.0542e-01],
         [ 6.1150e-02, -2.1003e-01, -1.3281e-01,  3.6732e-02,  3.9257e-01,
           4.5976e-01],
         [-4.9601e-02,  2.0249e+00,  8.2987e-01, -7.0441e-02, -1.0872e+00,
          -1.2184e+00],
         [-3.4694e-02, -1.1837e+00, -6.4436e-01, -1.2772e-02,  1.6602e+00,
           1.8430e+00],
         [-7.7039e-02,  7.2195e-01,  4.4201e-01, -1.8166e-02, -3.0351e-01,
          -3.0585e-01],
         [-7.2480e-02,  6.2737e-01,  3.0788e-01,  8.4944e-02, -3.1845e-01,
          -3.5587e-01],
         [-5.0681e-02, -5.5741e-01, -5.8352e-01, -2.9140e-02,  1.2298e+00,
           1.3528e+00],
         [ 2.5018e-02,  5.7611e-01,  2.2140e-01, -4.5541e-02, -1.1120e-01,
          -1.9117e-01],
         [ 4.7014e-04,  5.3119e-01,  4.9335e-02, -5.1553e-03, -3.6559e-01,
          -4.0114e-01],
   

In [35]:
with torch.no_grad():
    df_test_x = torch.tensor(df_test.drop(['Survived'], axis=1).values, dtype=torch.float)
    result = compute_preds(parameters, df_test_x)
result

tensor([1.5030e-06, 9.3238e-03, 1.8216e-03, 1.6067e-09, 1.3652e-02, 2.1642e-09,
        9.9562e-01, 1.8696e-06, 9.9261e-01, 1.1249e-09, 1.5222e-09, 1.4597e-06,
        9.9931e-01, 9.5134e-07, 9.9878e-01, 9.9992e-01, 3.5242e-03, 7.1585e-07,
        1.5048e-02, 9.8597e-01, 8.2163e-04, 2.3124e-09, 9.9923e-01, 1.8583e-03,
        9.9996e-01, 7.2960e-10, 9.9995e-01, 6.9102e-07, 1.6061e-06, 3.5087e-07,
        1.2920e-06, 1.9374e-06, 8.4999e-03, 9.4666e-03, 1.4942e-03, 7.5918e-07,
        1.8235e-02, 2.1906e-02, 1.6641e-09, 1.1645e-09, 3.6106e-07, 2.1313e-06,
        1.1703e-09, 9.9874e-01, 9.9883e-01, 1.7034e-09, 1.2842e-03, 1.6811e-06,
        9.9996e-01, 9.8924e-03, 1.8202e-06, 1.6427e-03, 9.9866e-01, 9.9814e-01,
        1.5353e-03, 9.1127e-07, 1.3468e-09, 1.7111e-09, 1.1574e-09, 9.9996e-01,
        2.0573e-09, 2.5838e-06, 2.0142e-09, 9.9589e-01, 1.1811e-03, 9.9884e-01,
        9.9591e-01, 1.3319e-06, 1.7932e-03, 9.9599e-01, 9.9588e-01, 1.8725e-09,
        1.8091e-02, 1.9016e-03, 9.9996e-

In [36]:
results = torch.where(result <= 0.5, 0.0, 1.0)
results


tensor([0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0.,
        0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 1., 1.,
        0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0.,
        0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0.,
        0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0.,
        0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0.,
        1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1.,
        0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 1., 0., 1., 0., 1., 0., 

In [37]:
results = pd.DataFrame(results)
results.columns = ['Survived']
results.head()

Unnamed: 0,Survived
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [38]:
df_submit = df_test.fillna(results)
df_submit.head()

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
0,0.0,0.43125,0.348997,0.090909,0,1,0,0,1,0,1,0
1,0.0,0.5875,0.333195,0.181818,1,0,0,0,1,0,0,1
2,0.0,0.775,0.379604,0.090909,0,1,0,1,0,0,1,0
3,0.0,0.3375,0.363449,0.090909,0,1,0,0,1,0,0,1
4,0.0,0.275,0.414494,0.272727,1,0,0,0,1,0,0,1


In [39]:
df_submit = df_submit.drop(['Age', 'Family', 'Fare', 'Sex_female', 'Sex_male', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S'], axis=1)
df_submit['Survived'] = df_submit['Survived'].astype('int')
df_submit['PassengerId'] = (892 + df_submit.index) 
df_submit.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 418 entries, 0 to 417
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Survived     418 non-null    int64
 1   PassengerId  418 non-null    int64
dtypes: int64(2)
memory usage: 9.8 KB


In [40]:
df_submit.to_csv('/kaggle/working/20230526_titanic_preds_deepml_mae.csv', columns=['PassengerId', 'Survived'], header=True, index=False, sep=',')

In [41]:
!tail /kaggle/working/20230526_titanic_preds_deepml_mae.csv
!tail /kaggle/input/titanic/gender_submission.csv

1300,1
1301,0
1302,1
1303,1
1304,0
1305,0
1306,1
1307,0
1308,0
1309,0
1300,1
1301,1
1302,1
1303,1
1304,1
1305,0
1306,1
1307,0
1308,0
1309,0


scored **0.7488** worst than single layer model

scored **0.77751** with one relu one sigmoid