In [1]:
import numpy as np
import pandas as pd
train_df= pd.read_csv('titanic/train.csv')
test_df= pd.read_csv('titanic/test.csv')

def clean(df):
    df['Age'].fillna(train_df["Age"].median(skipna=True), inplace= True)
    df["Embarked"].fillna(train_df["Embarked"].value_counts().idxmax(), inplace=True)
    df.drop("Cabin", axis=1, inplace=True)
    df.drop("Name", axis=1, inplace=True)
    df.drop("Ticket", axis=1, inplace=True)
    df.drop("PassengerId",axis=1,inplace=True)
    
    return df
a= train_df.copy()
b= test_df.copy()

train= clean(a)
test= clean(b)

li= [round(num) for num in train['Fare']]
train['Fare']=li

li= [round(num) for num in train['Age']]
train['Age']=li

train['Sex'] =train['Sex'].map({'male':1,'female':0})
train['Embarked'] =train['Embarked'].map({'S':0,'C':1,'Q':2})

#-------------------------
test= test.fillna(train['Fare'].median())

lu= [round(num) for num in test['Fare']]
test['Fare']=lu

lu= [round(num) for num in test['Age']]
test['Age']=lu

test['Sex'] =test['Sex'].map({'male':1,'female':0})
test['Embarked'] =test['Embarked'].map({'S':0,'C':1,'Q':2})

from sklearn.model_selection import train_test_split
X= train.drop('Survived',axis=1).to_numpy().T # 7,891 -- (input,n_examples)
Y= pd.DataFrame(train['Survived']).to_numpy()

##  Structure
    def initialize_parameters_deep(layers_dims):
        ...
        return parameters 
    def L_model_forward(X, parameters):
        ...
        return AL, caches
    def compute_cost(AL, Y):
        ...
        return cost
    def L_model_backward(AL, Y, caches):
        ...
        return grads
    def update_parameters(parameters, grads, learning_rate):
        ...
        return parameters
    def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
        parameters = initialize_parameters_deep(layers_dims)

        for i in range(0, num_iterations):

            AL, caches = L_model_forward(X,parameters)
            cost = compute_cost(AL,Y)

            grads = L_model_backward(AL,Y,caches)

            parameters = update_parameters(parameters,grads,learning_rate)

            if print_cost and i % 100 == 0:
                print ("Cost after iteration %i: %f" %(i, cost))
            if print_cost and i % 100 == 0:
                costs.append(cost)
            

In [2]:
X

array([[ 3,  1,  3, ...,  3,  1,  3],
       [ 1,  0,  0, ...,  0,  1,  1],
       [22, 38, 26, ..., 28, 26, 32],
       ...,
       [ 0,  0,  0, ...,  2,  0,  0],
       [ 7, 71,  8, ..., 23, 30,  8],
       [ 0,  1,  0, ...,  0,  1,  2]], dtype=int64)

In [3]:
Y.shape

(891, 1)

In [4]:
def initialize_parameters_deep(layers_dims):
    parameters={}
    L= len(layers_dims)
    for l in range(1,L):
        parameters['W'+str(l)]= np.random.randn(layers_dims[l],layers_dims[l-1])*0.01
        parameters['b'+str(l)]= np.zeros((layers_dims[l],1))        
        
    return parameters 

# layers_dims=[7,12,6,4,1]
# parameters=initialize_parameters_deep(layers_dims)
# parameters

In [5]:
def linear_forward(A,W,b):
    Z= np.dot(W,A) +b
    cache={
        'A':A,
        'W':W,
        'b':b
    }
    return Z,cache

def relu(Z):
    A= np.maximum(0.1,Z)
    cache= Z
    return A,cache

def sigmoid(Z):
    A= 1 / (1 + np.exp(-Z))
    return A,Z


def linear_activation_forward(A,W,b,activation):
    if activation == 'relu':
        Z,linear_cache = linear_forward(A,W,b)
        A,activation_cache= relu(Z)    
    elif activation =='sigmoid':
        Z,linear_cache= linear_forward(A,W,b)
        A,activation_cache= sigmoid(Z)
        
    cache= (linear_cache,activation_cache)
    return A,cache # cache contain A,W,b,Z


def L_model_forward(A, parameters):
    L= len(parameters)//2
    caches=[]
    for l in range(1,L):
        A_prev= A
        
        A, cache = linear_activation_forward(A_prev, parameters['W'+str(l)],parameters['b'+str(l)],'relu')
        caches.append(cache)


    AL,cache= linear_activation_forward(A,parameters['W'+str(L)],parameters['b'+str(L)],'sigmoid')
    caches.append(cache)
    
    return AL, caches
# AL,caches = L_model_forward(X.T, parameters)
# print(Y.shape)


In [6]:
def compute_cost(AL,Y):
    m= Y.shape[0]
    cost= -(1/m) * ( np.dot(np.log(AL),Y) + np.dot(np.log(1-AL), 1-Y)  )
    cost= float(np.squeeze(cost))
    return cost               

# cost= compute_cost(AL,Y)
# cost

In [7]:
def linear_backward(dZ,cache):
    A_prev= cache['A']
    W = cache['W']
    b = cache['b']
    
    dW= np.dot(dZ,A_prev.T)
    dB= np.sum(dZ,axis=1,keepdims=True)
    dA_prev= np.dot(W.T,dZ)
    
    return dA_prev,dW,dB

def sigmoid_backward(dAL,Z):
    A = 1/ (1+ np.exp(-Z))
    g_prime= A * (1-A)
    dZ= dAL * g_prime
    return dZ

def relu_backward(dAL,Z):
    A= np.maximum(0,Z)
    g_prime= np.where(A <= 0, 0, 1)
    dZ= A * g_prime

    return dZ
    
def linear_activation_backward(dAL, caches, activation):
    linear_cache,activation_cache = caches
    
    if activation == 'relu':
        dZ= relu_backward(dAL,activation_cache)
        dA_prev,dW,dB= linear_backward(dZ,linear_cache)
        
    elif activation == 'sigmoid':
        dZ= sigmoid_backward(dAL,activation_cache)
        dA_prev,dW,dB= linear_backward(dZ,linear_cache)
        
    return dA_prev,dW,dB
        
def L_model_backward(AL,Y,caches):
    L= len(caches)
    grads={}
    dAL= - np.divide(Y.T,AL)- np.divide(1-Y.T,1-AL)
    grads['dA'+str(L)],grads['dW'+str(L)],grads['dB'+str(L)] =linear_activation_backward(dAL,caches[L-1],'sigmoid')
    
    for l in reversed(range(1,L)):
        dA_prev,dW,dB= linear_activation_backward(grads['dA'+str(l+1)],caches[l-1],'relu')
        grads['dA'+str(l)]= dA_prev
        grads['dW'+str(l)]= dW
        grads['dB'+str(l)]= dB
    return grads

    
    
# grads= L_model_backward(AL,Y,caches)
# grads

In [6]:
def update_parameters(parameters,grads,learning_rate):
    L= len(parameters)//2
    for l in range(L-1):
        parameters['W'+str(l+1)] = parameters['W'+str(l+1)] - learning_rate * grads['dW'+str(l+1)]
        parameters['b'+str(l+1)] = parameters['b'+str(l+1)] - learning_rate * grads['dB'+str(l+1)]
    return parameters


# update_parameters(parameters,grads,2)

In [11]:
%xmode Plain

Exception reporting mode: Plain


In [7]:
def L_layer_model(X,Y,layer_dims,learning_rate,num_iteration):
    parameters= initialize_parameters_deep(layer_dims)
    
    for i in range(num_iteration):
        AL,caches= L_model_forward(X,parameters)
        
        cost= compute_cost(AL,Y)
        
        grads= L_model_backward(AL,Y,caches)
        parameters= update_parameters(parameters,grads,learning_rate)
        print(i,cost)
#         if i ==1:
#             print('parameters :',parameters.keys())
#             print('grads :',grads.keys())
#             break
#         else:

#             print('parameters :',parameters.keys())
#             print('grads :',grads.keys())
#             print('-'*100)
            
    

layer_dims=[7,12,6,4,1]
L_layer_model(X,Y,layer_dims,2.1,10)

0 0.692686444239014
1 0.692686444239014
2 0.692686444239014
3 0.692686444239014
4 0.692686444239014
5 0.692686444239014
6 0.692686444239014
7 0.692686444239014
8 0.692686444239014
9 0.692686444239014
