this is classification using reusable code

In [21]:
import pandas as pd
import numpy as np
data=pd.read_csv('train_X.csv')
data.dropna()
data2=pd.read_csv('train_Y.csv')
data2.dropna()
data.info()
X=data.to_numpy()
Y=data2.to_numpy()
Y=np.delete(Y,0,axis=1) #deleting the id column
X=np.delete(X,0,axis=1) #deleting the id column

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Id        891 non-null    int64  
 1   Pclass    891 non-null    int64  
 2   Sex       891 non-null    int64  
 3   Age       891 non-null    float64
 4   SibSp     891 non-null    int64  
 5   Parch     891 non-null    int64  
 6   Fare      891 non-null    float64
 7   Embarked  891 non-null    int64  
dtypes: float64(2), int64(6)
memory usage: 55.8 KB


In [12]:
def relu(x):
    return np.maximum(0, x)
def derrelu(x):
    return np.where(x > 0, 1, 0)
def tanh(x):
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))
def dertanh(x):
    return 1-np.power(np.tanh(x),2)
def sig(x):
    x = np.clip(x, -500, 500)
    return 1/(1+np.exp(-x))
def softmax(x):
    expX = np.exp(x)
    return expX/np.sum(expX, axis = 1,keepdims=True)

In [13]:
def initializeparameters(n0,n1,n2):
    W1=np.random.random((n1,n0))
    B1 = np.zeros((n1, 1))
    W2=np.random.random((n2,n1))
    B2 = np.zeros((n2, 1))
    parameters={
        'W1' :W1,
        'B1' :B1,
        'W2' :W2,
        'B2' :B2
    }
    return parameters
def forwardpropogation(X,parameters):
    W1=parameters['W1']
    W2=parameters['W2']
    B1=parameters['B1']
    B2=parameters['B2']
    Z1=(np.dot(W1,X.T)+B1).T
    A1=relu(Z1)
    Z2=(np.dot(W2,A1.T)+B2).T
    A2=sig(Z2)
    A2 = np.clip(A2, 1e-10, 1 - 1e-10)
    A2 = np.nan_to_num(A2, nan=1e-10)
    forward_cache={
        'Z1' :Z1,
        'A1' :A1,
        'Z2' :Z2,
        'A2' :A2

    }
    return forward_cache
def costfunction(Y,A2):
    m=Y.shape[0]
    cost = -(1/m)*np.sum(Y*np.log(A2)+(1-Y)*np.log(1-A2))
    return cost
def backwardpropagation(X,Y,parameters,forward_cache):
    W1=parameters['W1']
    W2=parameters['W2']
    B1=parameters['B1']
    B2=parameters['B2']
    Z1=forward_cache['Z1']
    A1=forward_cache['A1']
    Z2=forward_cache['Z2']
    A2=forward_cache['A2']
    m=Y.shape[0]
    dZ2=A2-Y
    dW2=(1/m)*np.dot(dZ2.T,A1)
    dB2=(1/m)*np.sum(dZ2,axis=0,keepdims=True).T
    dA1=np.dot(dZ2,W2)
    dZ1=dA1*derrelu(Z1)
    dW1=(1/m)*np.dot(dZ1.T,X)
    dB1=(1/m)*np.sum(dZ1,axis=0,keepdims=True).T
    gradients={
        'dW1' :dW1,
        'dB1' :dB1,
        'dW2' :dW2,
        'dB2' :dB2
    }
    return gradients
def updateparameters(parameters,gradients,learningrate):
    W1=parameters['W1']
    W2=parameters['W2']
    B1=parameters['B1']
    B2=parameters['B2']
    dW1=gradients['dW1']
    dW2=gradients['dW2']
    dB1=gradients['dB1']
    dB2=gradients['dB2']
    W1-=learningrate*dW1
    B1-=learningrate*dB1
    W2-=learningrate*dW2
    B2-=learningrate*dB2
    parameters={
        'W1' :W1,
        'B1' :B1,
        'W2' :W2,
        'B2' :B2
    }
    return parameters
def model(X,Y,n1,learningrate,iterations):
    n0=X.shape[1]
    n2=Y.shape[1]
    costlist=[]
    parameters=initializeparameters(n0,n1,n2)
    for i in range(iterations):
        forward_cache=forwardpropogation(X,parameters)
        cost=costfunction(Y,forward_cache['A2'])
        gradients=backwardpropagation(X,Y,parameters,forward_cache)
        parameters=updateparameters(parameters,gradients,learningrate)
        if(i%(iterations/10)==0):
            costlist.append(cost)
            print('cost after ',i,'th iterations is ',cost)
    return parameters,costlist

In [14]:
parameters,costlist=model(X,Y,1000,0.05,900)


cost after  0 th iterations is  14.18764547154551
cost after  90 th iterations is  0.8588512078245093
cost after  180 th iterations is  0.5845212743534993
cost after  270 th iterations is  0.5674319326477093
cost after  360 th iterations is  0.5497062148383618
cost after  450 th iterations is  0.5296028811094453
cost after  540 th iterations is  0.5225452409781578
cost after  630 th iterations is  0.5146653516530769
cost after  720 th iterations is  0.5126694609751375
cost after  810 th iterations is  0.5144049492346676


In [19]:
def accuracy(X,Y,parameters):
    forward_cache=forwardpropogation(X,parameters)
    A2=forward_cache['A2']
    for i in range(0,A2.shape[0]):
        if A2[i]<0.5:
            A2[i]=0
        else:
            A2[i]=1
    correct=0
    for i in range(0,A2.shape[0]):
        if(A2[i]==Y[i]):
            correct+=1
    return (correct/A2.shape[0])*100

print('The accuracy of the model on training set is ',round(accuracy(X,Y,parameters),2),'%')

The accuracy of the model on training set is  78.79 %


In [20]:
data_t=pd.read_csv('test_X.csv')
data_t.dropna()
data2_t=pd.read_csv('test_Y.csv')
data2_t.dropna()
X_test=data_t.to_numpy()
Y_test=data2_t.to_numpy()
Y_test=np.delete(Y_test,0,axis=1) 
X_test=np.delete(X_test,0,axis=1)  
forward_cache=forwardpropogation(X_test,parameters)
A2=forward_cache['A2']
for i in range(0,A2.shape[0]):
    if A2[i]<0.5:
        A2[i]=0
    else:
        A2[i]=1
print('The accuracy of the model on test set is ',round(accuracy(X_test,Y_test,parameters),2),'%')

The accuracy of the model on test set is  76.08 %
