In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import random

In [2]:
!kaggle competitions download -c titanic

titanic.zip: Skipping, found more recently modified local copy (use --force to force download)


In [3]:
import zipfile

zp = zipfile.ZipFile('titanic.zip') 
df = pd.read_csv(zp.open('train.csv'))

In [4]:
df = df.drop(['PassengerId', 'Name', 'Parch', 'Ticket', 'Cabin'], axis = 1)

df.loc[df['SibSp'] >= 1, 'SibSp'] = 1
df['Embarked'] = df['Embarked'].fillna('S')

df = df.dropna(subset = ['Age'])
df['Age'] = df['Age'].astype(int)

df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Fare,Embarked
0,0,3,male,22,1,7.25,S
1,1,1,female,38,1,71.2833,C
2,1,3,female,26,0,7.925,S
3,1,1,female,35,1,53.1,S
4,0,3,male,35,0,8.05,S


In [5]:
df = pd.concat([df, pd.get_dummies(df['Sex'], prefix='Sex')],axis=1)
df = pd.concat([df, pd.get_dummies(df['Pclass'], prefix='Pclass')],axis=1)
df = pd.concat([df, pd.get_dummies(df['Embarked'], prefix='Embarked')],axis=1)
  
df = df.drop(['Sex', 'Pclass', 'Embarked'], axis = 1)
df.head()

Unnamed: 0,Survived,Age,SibSp,Fare,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3,Embarked_C,Embarked_Q,Embarked_S
0,0,22,1,7.25,0,1,0,0,1,0,0,1
1,1,38,1,71.2833,1,0,1,0,0,1,0,0
2,1,26,0,7.925,1,0,0,0,1,0,0,1
3,1,35,1,53.1,1,0,1,0,0,0,0,1
4,0,35,0,8.05,0,1,0,0,1,0,0,1


In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.drop(['Survived'], axis = 1), df['Survived'], test_size=0.2)
y_train

52     1
595    0
227    0
293    0
117    0
      ..
157    0
625    0
665    0
817    0
519    0
Name: Survived, Length: 571, dtype: int64

In [7]:
class NN:
    def __init__(self, activation_func = 'relu', n_hidden_units = 100, learning_rate = 0.03):
        self.learning_rate = learning_rate
        self.n_hidden_units = n_hidden_units
        
        self.sigm = lambda x: 1 / (1 + np.exp(-x))
        self.sigm_slope = lambda x: self.sigm(x) * (1 - self.sigm(x)) #don`t need it
        
        if activation_func == 'sigm':
            self.activation_func = self.sigm
            self.activation_func_slope = self.sigm_slope
        elif activation_func == 'tanh':
            self.activation_func = lambda x: np.tanh(x)
            self.activation_func_slope = lambda x:  1 - np.tanh(x)**2
        elif activation_func == 'relu':
            self.activation_func = lambda x: np.maximum(x, 0)
            self.activation_func_slope = lambda x: np.where(x <= 0, 0, 1)
            
    def fit(self, X_train, y_train):
        self.X = X_train.to_numpy().T
        self.y = y_train.to_numpy().reshape((1, -1))
        
        self.W1 = np.random.normal(0, 0.01, (self.n_hidden_units, self.X.shape[0]))
        self.b1 = np.ones((self.n_hidden_units, 1))
        self.W2 = np.random.normal(0, 0.01, (1, self.n_hidden_units))
        self.b2 = np.ones((1, 1))
        
        for i in range(1001):
            #Forward propagation
            z1 = self.W1.dot(self.X) + self.b1
            a1 = self.activation_func(z1)
            z2 = self.W2.dot(a1) + self.b2
            a2 = self.sigm(z2)
            
            #Backpropagation
            m = self.y.shape[1]
            dz2 = a2 - self.y
            dW2 = dz2.dot(a1.T) / m
            db2 = np.sum(dz2, axis = 1, keepdims = True) / m
            
            dz1 = np.dot(self.W2.T, dz2) * self.activation_func_slope(z1)
            dW1 = dz1.dot(self.X.T) / m
            db1 = np.sum(dz1, axis = 1, keepdims = True) / m
            
            self.W1 -= self.learning_rate * dW1
            self.W2 -= self.learning_rate * dW2
            self.b1 -= self.learning_rate * db1
            self.b2 -= self.learning_rate * db2
            
            if (i)%100 == 0:
                J = -np.sum(self.y * np.log(a2) + (1 - self.y) * np.log(1-a2)) / len(self.y)
                print('cost function at step', i, ': ', J)
            
    def predict_proba(self, X_test):
        return self.sigm(self.W2.dot(self.activation_func(self.W1.dot(X_test.T) + self.b1)) + self.b2)
    
    def predict(self, X_test):
        y = self.predict_proba(X_test)
        
        y[y > 0.5] = 1
        y[y <= 0.5] = 0
           
        return y.T

In [8]:
clf = NN(activation_func = 'tanh')
print('sigmoid', clf.sigm(np.array([0.2, 0.5, -0.1])))
print('sigmoid derivative', clf.sigm_slope(np.array([0.2, 0.5, -0.1])))

print('\ntanh', clf.activation_func(np.array([0.2, 0.5, -0.1])))
print('tanh derivative', clf.activation_func_slope(np.array([0.2, 0.5, -0.1])))

clf = NN()
print('\nrelu', clf.activation_func(np.array([0.2, 0.5, -0.1])))
print('relu derivative', clf.activation_func_slope(np.array([0.2, 0.5, -0.1])))

sigmoid [0.549834   0.62245933 0.47502081]
sigmoid derivative [0.24751657 0.23500371 0.24937604]

tanh [ 0.19737532  0.46211716 -0.09966799]
tanh derivative [0.96104298 0.78644773 0.99006629]

relu [0.2 0.5 0. ]
relu derivative [1 1 0]


In [9]:
clf.fit(X_train, y_train)

from sklearn.metrics import accuracy_score
accuracy_score(y_test, clf.predict(X_test))

cost function at step 0 :  518.9542403904859
cost function at step 100 :  347.0797873917912
cost function at step 200 :  342.13770011508063
cost function at step 300 :  336.3550179578936
cost function at step 400 :  334.00530578072295
cost function at step 500 :  332.9345121238141
cost function at step 600 :  326.70454047786393
cost function at step 700 :  320.67940349275693
cost function at step 800 :  316.84785939794824
cost function at step 900 :  353.647014074562
cost function at step 1000 :  302.0075597165543


0.8041958041958042

In [10]:
clf = NN(activation_func = 'tanh')
clf.fit(X_train, y_train)
accuracy_score(y_test, clf.predict(X_test))

cost function at step 0 :  516.8758876426131
cost function at step 100 :  350.2354059000677
cost function at step 200 :  343.7835470525414
cost function at step 300 :  339.8118815589106
cost function at step 400 :  336.76612796264374
cost function at step 500 :  333.1323505980099
cost function at step 600 :  329.2952078902314
cost function at step 700 :  324.758475583276
cost function at step 800 :  321.5570592443346
cost function at step 900 :  317.84960298962056
cost function at step 1000 :  314.3191628823252


0.7552447552447552

In [11]:
clf = NN(activation_func = 'sigm')
clf.fit(X_train, y_train)
accuracy_score(y_test, clf.predict(X_test))

cost function at step 0 :  504.8204316307859
cost function at step 100 :  354.3979121469538
cost function at step 200 :  351.845890551782
cost function at step 300 :  350.6556166120705
cost function at step 400 :  349.41005015547785
cost function at step 500 :  348.1563932166241
cost function at step 600 :  346.95430125763806
cost function at step 700 :  345.82363254266807
cost function at step 800 :  344.7675616019955
cost function at step 900 :  343.78280733992835
cost function at step 1000 :  342.8458971352273


0.7482517482517482

In [12]:
from sklearn.neural_network import MLPClassifier

clf2 = MLPClassifier(solver = 'sgd')
clf2.fit(X_train, y_train)

accuracy_score(y_test, clf2.predict(X_test))

0.7692307692307693