
# Check GPU

In [1]:
!nvidia-smi

Thu Aug 18 04:04:28 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Download Data

In [2]:
!gdown --id '1g5SMqm-NSBZcu33N8oz4zNkJSfU7hLUG' --output train.tsv
!gdown --id '1Co8kV8yvhccf37r_umd40Xc6Sp0WCHID' --output test.tsv
!gdown --id '1UUaP7yAQqnxl7hzx7gyOeHuJ0mrHzDvN' --output gender_submission.tsv
!ls

Downloading...
From: https://drive.google.com/uc?id=1g5SMqm-NSBZcu33N8oz4zNkJSfU7hLUG
To: /content/train.tsv
100% 61.2k/61.2k [00:00<00:00, 69.8MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Co8kV8yvhccf37r_umd40Xc6Sp0WCHID
To: /content/test.tsv
100% 28.6k/28.6k [00:00<00:00, 27.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1UUaP7yAQqnxl7hzx7gyOeHuJ0mrHzDvN
To: /content/gender_submission.tsv
100% 3.26k/3.26k [00:00<00:00, 5.67MB/s]
gender_submission.tsv  sample_data     test.tsv
model.pt	       submission.csv  train.tsv


# DataSet & Library Loading

In [22]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

df_train = pd.read_csv('train.tsv')
df_test  = pd.read_csv('test.tsv')
df_sub   = pd.read_csv('gender_submission.tsv')


# Making the dataset ready for the model

- let's drop the unnecessary columns
- encode the categorical (no details)
- impute the necessary columns (again no details)
- scale both the train and test data for linear models
- split the data for the model

In [23]:
# drop the unnecessary
df_train.drop(['Name','Ticket','Cabin'],axis=1,inplace=True)
df_test.drop( ['Name','Ticket','Cabin'],axis=1,inplace=True)

# one hot encoding
sex   = pd.get_dummies(df_train['Sex'],drop_first=True)
embark  = pd.get_dummies(df_train['Embarked'],drop_first=True)
df_train = pd.concat([df_train,sex,embark],axis=1)
df_train.drop(['Sex','Embarked'],axis=1,inplace=True)
sex   = pd.get_dummies(df_test['Sex'],drop_first=True)
embark  = pd.get_dummies(df_test['Embarked'],drop_first=True)
df_test = pd.concat([df_test,sex,embark],axis=1)
df_test.drop(['Sex','Embarked'],axis=1,inplace=True)

# fill Nan
df_train.fillna(df_train.mean(),inplace=True)
df_test.fillna(df_test.mean(),inplace=True)

# split label
y_train = df_train.loc[:, 'Survived'].values

# scale data
Scaler1 = StandardScaler()
Scaler2 = StandardScaler()

train_columns = df_train.columns
test_columns  = df_test.columns

df_train = pd.DataFrame(Scaler1.fit_transform(df_train))
df_test  = pd.DataFrame(Scaler2.fit_transform(df_test))

df_train.columns = train_columns
df_test.columns  = test_columns

# drop id & label
X_train = df_train.iloc[:,2:].values

# Pytorch

In [5]:
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.autograd import Variable

# Pytorch Logistic Regression Model

In [6]:
#thank you very much https://www.kaggle.com/mburakergenc/ttianic-minimal-pytorch-mlp
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(8, 512)
        self.fc2 = nn.Linear(512, 512)
        self.fc3 = nn.Linear(512, 2)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x
model = Net()
print(model)

Net(
  (fc1): Linear(in_features=8, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


# Pytorch Loss Function (Cross Entropy CE)

In [7]:
criterion = nn.CrossEntropyLoss()

# Pytorch Optimizer (Stochastic Gradient Descent SGD)

In [8]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [9]:
import random
def same_seeds(seed):
    # Python built-in random module
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Torch
    torch.manual_seed(seed)
    # Cuda
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

# Pytorch Training

In [25]:
same_seeds(63)
batch_size = 64
n_epochs = 500
batch_no = len(X_train) // batch_size

train_loss = 0
train_loss_min = np.Inf
for epoch in range(n_epochs):
    for i in range(batch_no):
        start = i * batch_size
        end   = start + batch_size
        x_var = Variable(torch.FloatTensor(X_train[start:end]))
        y_var = Variable(torch.LongTensor(y_train[start:end])) 
        
        optimizer.zero_grad()
        output = model(x_var)
        loss   = criterion(output,y_var)
        loss.backward()
        optimizer.step()
        
        values, labels = torch.max(output, 1)
        num_right   = np.sum(labels.data.numpy() == y_train[start:end])
        train_loss += loss.item()*batch_size
    
    train_loss = train_loss / len(X_train)

    print(f"Epoch: {epoch+1} \tTrain Loss: {train_loss:5f} \tTrain Accuracy: {num_right / len(y_train[start:end]):.5f}")

    # if the loss dereases then save the model
    if train_loss <= train_loss_min: 
        torch.save(model.state_dict(), "model.pt")
        train_loss_min = train_loss


Epoch: 1 	Train Loss: 0.329174 	Train Accuracy: 0.89062
Epoch: 2 	Train Loss: 0.330248 	Train Accuracy: 0.87500
Epoch: 3 	Train Loss: 0.334576 	Train Accuracy: 0.89062
Epoch: 4 	Train Loss: 0.328976 	Train Accuracy: 0.87500
Epoch: 5 	Train Loss: 0.339663 	Train Accuracy: 0.85938
Epoch: 6 	Train Loss: 0.335437 	Train Accuracy: 0.90625
Epoch: 7 	Train Loss: 0.334802 	Train Accuracy: 0.89062
Epoch: 8 	Train Loss: 0.332801 	Train Accuracy: 0.89062
Epoch: 9 	Train Loss: 0.337600 	Train Accuracy: 0.90625
Epoch: 10 	Train Loss: 0.334990 	Train Accuracy: 0.89062
Epoch: 11 	Train Loss: 0.332472 	Train Accuracy: 0.85938
Epoch: 12 	Train Loss: 0.333294 	Train Accuracy: 0.87500
Epoch: 13 	Train Loss: 0.331614 	Train Accuracy: 0.87500
Epoch: 14 	Train Loss: 0.334494 	Train Accuracy: 0.87500
Epoch: 15 	Train Loss: 0.334243 	Train Accuracy: 0.87500
Epoch: 16 	Train Loss: 0.330864 	Train Accuracy: 0.87500
Epoch: 17 	Train Loss: 0.331356 	Train Accuracy: 0.87500
Epoch: 18 	Train Loss: 0.333821 	Train A

# predictions

In [26]:
X_test     = df_test.iloc[:,1:].values
X_test_var = Variable(torch.FloatTensor(X_test), requires_grad=False) 
with torch.no_grad():
    test_result = model(X_test_var)
values, labels = torch.max(test_result, 1)
survived = labels.data.numpy()

# submission

In [27]:
submission = pd.DataFrame({'PassengerId': df_sub['PassengerId'], 'Survived': survived})
submission.to_csv('submission.csv', index=False)