# Trying to fit a multilayer deep learning model with pytorch

## Preparing data first in the most direct way

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [2]:
from matplotlib import pyplot as plt

df_train = pd.read_csv('/kaggle/input/titanic/train.csv')
df_test = pd.read_csv('/kaggle/input/titanic/test.csv')
df_all = pd.concat([df_train, df_test])
df_train.name = 'Training Set'
df_test.name = 'Test Set'
df_all.name = 'All Set'
df_all.sample(5)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
193,194,1.0,2,"Navratil, Master. Michel M",male,3.0,1,1,230080,26.0,F2,S
315,316,1.0,3,"Nilsson, Miss. Helmina Josefina",female,26.0,0,0,347470,7.8542,,S
699,700,0.0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42.0,0,0,348121,7.65,F G63,S
296,297,0.0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C
827,828,1.0,2,"Mallet, Master. Andre",male,1.0,0,2,S.C./PARIS 2079,37.0042,,C


In [3]:
def prepare_features_medium(data):
  # missing values
  f_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("female")), 'Age'] = f_age_mean  
  m_age_mean = data.loc[(~data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'].mean()
  data.loc[(data['Age'].isna()) & (data['Sex'].str.contains("male")), 'Age'] = m_age_mean  
  data['Age'] = data['Age'].astype(float)
  
  data.loc[data['PassengerId'] == 62, 'Embarked'] = 'S'
  data.loc[data['PassengerId'] == 830, 'Embarked'] = 'S'
  
  r = data.loc[(data['Pclass'] == 3) & (data['Embarked'].str.contains('S')) & (data['SibSp'] == 0) & (data['Parch'] == 0), 'Fare'].mode().item()
  data.loc[data['PassengerId'] == 1044, 'Fare'] = r
  # Cabin: no need as we do delete it

  # we combine SibSp and Parch to measure the family size including the passenger
  data['Family'] = 1 + data['SibSp'] + data['Parch']
  # categorical
  data = pd.get_dummies(data, columns = ['Sex', 'Pclass', 'Embarked'])
  # useless columns
  data = data.drop(['Cabin', 'Name', 'Ticket', 'PassengerId', 'SibSp', 'Parch'], axis = 1)
  # numerical
  data['Age'] /= data['Age'].max() 
  data['Family'] /= data['Family'].max()
  data['Fare'] = np.log1p(data['Fare'])
  data['Fare'] /= data['Fare'].max()
  return data

In [4]:
df_all = prepare_features_medium(df_all)

In [5]:
df_train = df_all.iloc[:891]
df_train_y = df_train['Survived']
df_train_x = df_train.drop(['Survived'], axis=1)

df_test = df_all.iloc[891:]
df_train.tail(5)

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
886,0.0,0.3375,0.422864,0.090909,0,1,0,1,0,0,0,1
887,1.0,0.2375,0.550238,0.090909,1,0,1,0,0,0,0,1
888,0.0,0.358589,0.512205,0.363636,1,0,0,0,1,0,0,1
889,1.0,0.325,0.550238,0.090909,0,1,1,0,0,1,0,0
890,0.0,0.4,0.347554,0.090909,0,1,0,0,1,0,1,0


In [6]:
df_test.head()

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
0,,0.43125,0.348997,0.090909,0,1,0,0,1,0,1,0
1,,0.5875,0.333195,0.181818,1,0,0,0,1,0,0,1
2,,0.775,0.379604,0.090909,0,1,0,1,0,0,1,0
3,,0.3375,0.363449,0.090909,0,1,0,0,1,0,0,1
4,,0.275,0.414494,0.272727,1,0,0,0,1,0,0,1


## Now fitting a deep learning model 

In [118]:
import torch
import torch.nn.functional as F
import torch.nn as nn

train = torch.tensor(df_train_x.values, dtype=torch.float)
labels = torch.tensor(df_train_y.values, dtype=torch.float)
labels = torch.unsqueeze(labels, 0)
# no bias because we do hot encoding rather than dummy 
train.shape
labels.shape



torch.Size([1, 891])

In [119]:
labels = torch.transpose(labels, 0, 1)
labels.shape

torch.Size([891, 1])

In [120]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [142]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(11, 20, bias=False),
            nn.ReLU(),
            nn.Linear(20, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [143]:
model = NeuralNetwork().to(device) # or model = NeuralNetwork()
print(model)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=11, out_features=20, bias=False)
    (1): ReLU()
    (2): Linear(in_features=20, out_features=1, bias=True)
    (3): Sigmoid()
  )
)


In [144]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=11, out_features=20, bias=False)
    (1): ReLU()
    (2): Linear(in_features=20, out_features=1, bias=True)
    (3): Sigmoid()
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([20, 11]) | Values : tensor([[ 0.1821,  0.0054,  0.1197, -0.0731, -0.1398,  0.0907,  0.0704,  0.2359,
         -0.2704,  0.0292, -0.2669],
        [ 0.2274,  0.0520, -0.2240, -0.1461, -0.0134,  0.2984,  0.2548, -0.1724,
          0.1908,  0.2677, -0.0265]], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([1, 20]) | Values : tensor([[ 0.1969, -0.1514,  0.0049, -0.0760, -0.1208,  0.1441,  0.0421,  0.0531,
          0.0847, -0.1933, -0.1504,  0.0168, -0.1393, -0.2008,  0.2190, -0.0312,
         -0.2114,  0.1658,  0.0791, -0.1464]], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Size: torch.Size([1]) | Values : tensor([0.0646], grad_fn=<SliceBackward0>) 



In [145]:
loss_fn = nn.L1Loss(reduction='mean')

In [146]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [147]:
def train_loop(x, y, model, loss_fn, optimizer):
    pred = model(x)
    loss = loss_fn(pred, y)
    # Backpropagation
    loss.backward()
    optimizer.step()
    # optimizer.zero_grad()
    print(f"{loss:.3f}", end="; ")

In [148]:
#    layer1 = (torch.rand(n_coeff, n_hidden) - 0.5) / n_hidden
#    layer2 = (torch.rand(n_hidden) - 0.3)
#    bias = torch.rand(1)[0]


In [149]:
def train_model(epochs=30, lr=0.1):
    for e in range(epochs):
        train_loop(train, labels, model, loss_fn, optimizer)
    
def accuracy():
    with torch.no_grad():
        preds = model(train)
        loss = loss_fn(preds, labels)
        acc = ((preds > 0.5) == labels.bool()).float().mean()
        print(f"accuracy: {acc:.5f}", end="\n")

In [150]:
learning_rate = 20
epochs = 30
parameters = train_model(30, 20)

0.479; 0.353; 0.327; 0.215; 0.239; 0.214; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 0.213; 

In [151]:
accuracy()

accuracy: 0.78676


* (60, 1.5) gives 0.7789 accuracy
* (100, 2) with 6 at layer2 gives 0.7856 accuracy
* (30, 20) with 6 at layer2 gives 0.8114 accuracy

In [155]:
with torch.no_grad():
    df_test_x = torch.tensor(df_test.drop(['Survived'], axis=1).values, dtype=torch.float)
    result = model(df_test_x)
result.shape

torch.Size([418, 1])

In [156]:
results = torch.where(result <= 0.5, 0.0, 1.0)
results = pd.DataFrame(results)
results.columns = ['Survived']
results.head()

Unnamed: 0,Survived
0,0.0
1,1.0
2,0.0
3,0.0
4,1.0


In [157]:
df_submit = df_test.fillna(results)
df_submit.head()

Unnamed: 0,Survived,Age,Fare,Family,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
0,0.0,0.43125,0.348997,0.090909,0,1,0,0,1,0,1,0
1,1.0,0.5875,0.333195,0.181818,1,0,0,0,1,0,0,1
2,0.0,0.775,0.379604,0.090909,0,1,0,1,0,0,1,0
3,0.0,0.3375,0.363449,0.090909,0,1,0,0,1,0,0,1
4,1.0,0.275,0.414494,0.272727,1,0,0,0,1,0,0,1


In [158]:
df_submit = df_submit.drop(['Age', 'Family', 'Fare', 'Sex_female', 'Sex_male', 'Pclass_1', 'Pclass_2', 'Pclass_3', 'Embarked_C', 'Embarked_Q', 'Embarked_S'], axis=1)
df_submit['Survived'] = df_submit['Survived'].astype('int')
df_submit['PassengerId'] = (892 + df_submit.index) 
df_submit.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 418 entries, 0 to 417
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Survived     418 non-null    int64
 1   PassengerId  418 non-null    int64
dtypes: int64(2)
memory usage: 9.8 KB


In [159]:
df_submit.to_csv('/kaggle/working/20230526_titanic_preds_allpytorch.csv', columns=['PassengerId', 'Survived'], header=True, index=False, sep=',')

In [160]:
!tail /kaggle/working/20230526_titanic_preds_allpytorch.csv
!tail /kaggle/input/titanic/gender_submission.csv

1300,1
1301,1
1302,1
1303,1
1304,1
1305,0
1306,1
1307,0
1308,0
1309,0
1300,1
1301,1
1302,1
1303,1
1304,1
1305,0
1306,1
1307,0
1308,0
1309,0


scored **0.7488** worst than single layer model

scored **0.77751** with one relu one sigmoid