In [195]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [196]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Task 1

In [238]:
class MLP:
    def __init__(self, activation='relu', hidden_layer_shape=(4,), learning_rate=0.1, max_iter=10000):
        self.X = None
        self.y = None
        self.parameters = None
        
        self.n_h = hidden_layer_shape
        self.activation = activation #sigmoid, relu
        self.learning_rate = learning_rate
        self.max_iter = max_iter
    
    def __sigmoid(self, Z):
        A = 1 / (1 + np.exp(-Z))
        cache = Z

        return A, cache

    def __relu(self, Z):
        A = np.maximum(0, Z)
        cache = Z 
        
        return A, cache
    
    def __sigmoid_backward(self, dA, cache):
        Z = cache
        A = 1 / (1 + np.exp(-Z))
        dZ = dA * A * (1 - A)

        return dZ

    def __relu_backward(self, dA, cache):
        Z = cache
        dZ = np.array(dA, copy=True)
        dZ[Z <= 0] = 0

        return dZ
    
    def __initialize_parameters(self, layer_dims):
        parameters = {}
        L = len(layer_dims)

        for l in range(1, L):
            parameters["W" + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
            parameters["b" + str(l)] = np.zeros((layer_dims[l], 1))

        return parameters
    
    def __forward_propagation(self, X, parameters):
        
        def linear_forward(A, W, b):
            Z = np.dot(W, A) + b
            cache = (A, W, b)

            return Z, cache
    
        def linear_activation_forward(A_prev, W, b, activation):
            if activation == "sigmoid":
                Z, linear_cache = linear_forward(A_prev, W, b)
                A, activation_cache = self.__sigmoid(Z)

            if activation == "relu":
                Z, linear_cache = linear_forward(A_prev, W, b)
                A, activation_cache = self.__relu(Z)

            cache = (linear_cache, activation_cache)

            return A, cache
    
        caches = []
        A = X.T
        L = len(parameters) // 2

        for l in range(1, L):
            A_prev = A 

            A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], 'relu')
            caches.append(cache)

        AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], 'sigmoid')
        caches.append(cache)

        return AL, caches
    
    def __compute_cost(self, AL, Y):
        m = Y.shape[0]
        
        cost = -np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / m
        cost = np.squeeze(cost)
        
        return cost
    
    def __backward_propagation(self, AL, Y, caches):

        def linear_backward(dZ, cache):
            A_prev, W, b = cache
            m = A_prev.shape[0]

            dW = np.dot(dZ, A_prev.T) / m
            db = np.sum(dZ, axis=1, keepdims=True) / m
            dA_prev = np.dot(W.T, dZ)

            return dA_prev, dW, db
    
        def linear_activation_backward(dA, cache, activation):
            linear_cache, activation_cache = cache

            if activation == "relu":
                dZ = self.__relu_backward(dA, activation_cache)
                dA_prev, dW, db = linear_backward(dZ, linear_cache)

            elif activation == "sigmoid":
                dZ = self.__sigmoid_backward(dA, activation_cache)
                dA_prev, dW, db = linear_backward(dZ, linear_cache)

            return dA_prev, dW, db
        
        grads = {}
        L = len(caches)
        m = AL.shape[0]
        Y = Y.reshape(AL.shape)        

        dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

        current_cache = caches[L-1]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dAL, current_cache, 'sigmoid')
        grads["dA" + str(L-1)] = dA_prev_temp
        grads["dW" + str(L)] = dW_temp
        grads["db" + str(L)] = db_temp

        for l in reversed(range(L-1)):
            current_cache = caches[l]
            dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, 'relu')
            grads["dA" + str(l)] = dA_prev_temp
            grads["dW" + str(l + 1)] = dW_temp
            grads["db" + str(l + 1)] = db_temp

        return grads
    
    def __update_parameters(self, params, grads):
        parameters = params.copy()
        L = len(parameters) // 2 # number of layers in the neural network

        for l in range(L):
            parameters["W" + str(l+1)] = parameters['W' + str(l+1)] - self.learning_rate * grads['dW' + str(l+1)]
            parameters["b" + str(l+1)] = parameters['b' + str(l+1)] - self.learning_rate * grads['db' + str(l+1)]

        return parameters
    
    def fit(self, X, y, verbose=False):
        self.X = X
        self.y = y
        
        self.n_x = X.shape[1]
        self.n_y = y.shape[1]
        
        parameters = self.__initialize_parameters([self.n_x, *self.n_h, self.n_y])
        
        for i in range(self.max_iter):
            AL, caches = self.__forward_propagation(X, parameters)
            cost = self.__compute_cost(AL, y)
            grads = self.__backward_propagation(AL, y, caches)
            parameters = self.__update_parameters(parameters, grads)
            
            if verbose: print ("Cost after iteration %i: %f" %(i, cost))
            
        self.parameters = parameters
    
    def predict(self, X):
        AL, cache = self.__forward_propagation(X, self.parameters)
        predictions = (AL > 0.5)

        return predictions

    def get_parameters(self):
        return self.parameters

In [231]:
titanic_df_train = pd.read_csv("../data/train.csv")
titanic_df_test = pd.read_csv("../data/test.csv")

iris_names = ['sepal length in cm', 'sepal width in cm', 'petal length in cm', 'petal width in cm', 'class']
iris_df = pd.read_csv("../data/iris.csv", names=iris_names)

In [199]:
titanic_df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [200]:
titanic_df_train.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [201]:
titanic_df = titanic_df_train.fillna(titanic_df_train.mean()).dropna()

  titanic_df = titanic_df_train.fillna(titanic_df_train.mean()).dropna()


In [202]:
titanic_df = titanic_df.drop(columns=["Name", "Ticket", "Cabin"])

In [203]:
titanic_df['Sex'] = pd.Categorical(titanic_df.Sex).codes
titanic_df['Embarked'] = pd.Categorical(titanic_df.Embarked).codes

In [204]:
titanic_df

Unnamed: 0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
1,2,1,1,0,38.0,1,0,71.2833,0
3,4,1,1,0,35.0,1,0,53.1000,2
6,7,0,1,1,54.0,0,0,51.8625,2
10,11,1,3,0,4.0,1,1,16.7000,2
11,12,1,1,0,58.0,0,0,26.5500,2
...,...,...,...,...,...,...,...,...,...
871,872,1,1,0,47.0,1,1,52.5542,2
872,873,0,1,1,33.0,0,0,5.0000,2
879,880,1,1,0,56.0,0,1,83.1583,0
887,888,1,1,0,19.0,0,0,30.0000,2


In [245]:
X = titanic_df.drop(columns=["Survived"]).values
y = titanic_df["Survived"].values.reshape(-1, 1)

In [246]:
X.shape

(202, 8)

In [247]:
y.reshape(y.shape[0], -1).shape

(202, 1)

In [248]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [249]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [261]:
model = MLP(max_iter=150, learning_rate=0.1)
model.fit(X_train, y_train)

In [262]:
model.get_parameters()

{'W1': array([[ 0.18673325, -0.04535739, -0.43497418,  0.07825206,  0.21340303,
         -0.41565209,  0.27722727, -0.12015795],
        [ 0.42942834, -0.23572337, -1.42097177,  0.24339099,  0.01546947,
         -0.56208501,  0.3015441 , -0.56991861],
        [-0.17808746, -0.11924571,  0.21197937, -0.60312165, -0.10709167,
          0.69004087, -1.27442768, -0.14075304],
        [-0.12711274,  0.21382724,  0.51965707,  0.70452361, -0.73847232,
          0.0552335 ,  0.03402168, -0.35768595]]),
 'b1': array([[ 0.30254501],
        [ 0.86184599],
        [-0.13546544],
        [ 0.30045981]]),
 'W2': array([[ 0.65144395,  1.35956055,  1.20995567, -1.17584818]]),
 'b2': array([[-0.63452546]])}

In [263]:
model.predict(X_test)

array([[ True,  True, False, False, False,  True, False,  True,  True,
         True, False,  True,  True,  True,  True, False,  True,  True,
        False,  True,  True, False,  True,  True, False,  True, False,
        False,  True, False, False,  True, False, False,  True,  True,
         True,  True, False,  True,  True]])

In [268]:
iris_df["class"] = pd.Categorical(iris_df["class"]).codes

In [269]:
iris_df

Unnamed: 0,sepal length in cm,sepal width in cm,petal length in cm,petal width in cm,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [295]:
X = iris_df.drop(columns=["class"]).values
y = iris_df["class"].values.reshape(-1, 1)

In [296]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

In [299]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [300]:
model = MLP()
model.fit(X_train, y_train)

  cost = -np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / m
  cost = -np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / m
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
  dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
  dZ = dA * A * (1 - A)


In [301]:
model.predict(X_test)

array([[False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False, False,
        False, False, False]])

In [302]:
print(y_test)

[[0]
 [2]
 [1]
 [0]
 [2]
 [2]
 [1]
 [2]
 [0]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [1]
 [2]
 [2]
 [1]
 [2]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [2]
 [1]
 [2]
 [2]]


# Task 2

In [212]:
water_df = pd.read_csv("../data/water_potability_preprocessed.csv").iloc[:, 1:]
auto_df = pd.read_csv('../data/automobile_preprocessed.csv').iloc[:, 1:]