# Divorce predictor
The data is from: https://archive.ics.uci.edu/ml/datasets/Divorce+Predictors+data+set, project created for #30Days10AIprojects by: https://doingai.tech/2020/01/30days10aiprojects/

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, log_loss
from sklearn.model_selection import train_test_split

In [5]:
# Importing data
X = np.array(pd.read_csv('data.csv', sep=';'))
Y = X[:, 54] # Classes 

X = np.delete(X, 54, 1)

# Number of training examples
m = X.shape[0]
# Number of features
n = X.shape[1]

print(f'm={m}, n={n}, Y={Y.shape}')

m=170, n=54, Y=(170,)


### Deep Neural Network Architecture
The model is trained with a Deep Neural Network with 3 hidden layers.

In [6]:
# Split training and test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

In [98]:
# Initializing parameters
class Model():
    def __init__(self, n, n_epochs = 10):
        self.parameters = {}
        self.n_hidden_layers = 3
        self.n_neurons = [54, 4, 3, 1]
        self.gradients = {}
        self.n_epochs = n_epochs
        np.random.seed(0)
        
        # Xavier initialization
        for i in range(1, self.n_hidden_layers + 1):
            self.parameters["W"+str(i)] = np.random.randn(self.n_neurons[i-1], self.n_neurons[i]) * np.sqrt(1 / self.n_neurons[i-1])
            self.parameters["B"+str(i)] = np.zeros([1, self.n_neurons[i]])
        
    # Activation for output layer    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    def sigmoid_backward(self, x):
        return x * (1 - x)
    
    # Activation for neurons
    def relu(self, x):
        return np.maximum(0, x)
    def relu_backward(self, x):
        return 1.0 * (x > 0)
    
    # Cost function
    def cost(self, Y, y_hat):
        m = Y.shape[0]
        result = np.sum((Y * np.log(y_hat) + (1 - Y) * np.log(1 - y_hat))) / -m
        result = np.squeeze(result)
        return result
    
    # Forward propagation
    def forward_propagation(self, X, parameters = None):
        if parameters is None:
            parameters = self.parameters
            
        self.A1 = np.dot(X, parameters["W1"]) + parameters["B1"]
        self.Z1 = self.relu(self.A1)
        
        self.A2 = np.dot(self.Z1, parameters["W2"]) + parameters["B2"]
        self.Z2 = self.relu(self.A2)
        
        self.A3 = np.dot(self.Z2, parameters["W3"]) + parameters["B3"]
        self.y_hat = self.sigmoid(self.A3)
        return self.y_hat
    
    # Backward propagation
    def backward_propagation(self, X, Y, parameters = None):
        if parameters is None:
            parameters = self.parameters 
      
        self.forward_propagation(X, parameters)
        
        self.gradients["dA3"] = self.y_hat - Y
        self.gradients["dW3"] = np.dot(self.Z2.T, self.gradients["dA3"])
        self.gradients["dB3"] = np.sum(self.gradients["dA3"], axis=0).reshape(1, -1)
        
        self.gradients["dZ2"] = np.dot(self.gradients["dA3"], parameters["W3"].T)
        self.gradients["dA2"] = np.multiply(self.gradients["dZ2"], self.relu_backward(self.Z2))
        self.gradients["dW2"] = np.dot(self.Z1.T, self.gradients["dA2"])
        self.gradients["dB2"] = np.sum(self.gradients["dA2"], axis=0).reshape(1, -1)
        
        self.gradients["dZ1"] = np.dot(self.gradients["dA2"], parameters["W2"].T)
        self.gradients["dA1"] = np.multiply(self.gradients["dZ1"], self.relu_backward(self.Z1))
        self.gradients["dW1"] = np.dot(X.T, self.gradients["dA1"])
        self.gradients["dB1"] = np.sum(self.gradients["dA1"], axis=0).reshape(1, -1)
        
    # Gradient descent
    def fit(self, X, Y, learning_rate = 0.1):
        costs = {}
        Y_pred = self.predict(X)
        costs[0] = self.cost(np.argmax(Y, axis=1), Y_pred)
        
        
        print(f"Cost after iteration 0: {costs[0]}")
        for num_epoch in range(self.n_epochs):
            m = X.shape[0]
            self.backward_propagation(X, Y)
            # Parameters update
            for i in range(1, self.n_hidden_layers + 1):
                self.parameters["W"+str(i)] -= learning_rate * (self.gradients["dW"+str(i)] / m)
                self.parameters["B"+str(i)] -= learning_rate * (self.gradients["dB"+str(i)] / m)

            # Calculate cost    
            Y_pred = self.predict(X)
            costs[num_epoch + 1] = self.cost(np.argmax(Y, axis=1), Y_pred)

            # Plot cost
            print(f"Cost after iteration {num_epoch + 1}: {costs[num_epoch + 1]}")
    
    # Predict labels
    def predict(self, X):
        prediction = self.forward_propagation(X)
        return prediction.squeeze()

In [99]:
classifier = Model(n, 119)

In [100]:
Y_train = Y_train.reshape(119, 1)
classifier.fit(X_train, Y_train)

Cost after iteration 0: 0.30782763387843276
Cost after iteration 1: 0.7056250570255529
Cost after iteration 2: 0.7055087851954146
Cost after iteration 3: 0.705395194953746
Cost after iteration 4: 0.7052842130031091
Cost after iteration 5: 0.7051757677972126
Cost after iteration 6: 0.7050697894926569
Cost after iteration 7: 0.704966209901875
Cost after iteration 8: 0.7048649624472407
Cost after iteration 9: 0.704765982116293
Cost after iteration 10: 0.7046690723150812
Cost after iteration 11: 0.7045529277457385
Cost after iteration 12: 0.7044379131186833
Cost after iteration 13: 0.7043302921074259
Cost after iteration 14: 0.7042237907247869
Cost after iteration 15: 0.7041152424496074
Cost after iteration 16: 0.7040127219309423
Cost after iteration 17: 0.7039073719374822
Cost after iteration 18: 0.7038085154349942
Cost after iteration 19: 0.7037055583427185
Cost after iteration 20: 0.7036078384706278
Cost after iteration 21: 0.7035112978848026
Cost after iteration 22: 0.7034099659993149


In [101]:
# Training accuracy
predictions = classifier.predict(X_train)
print("Training accuracy: ", accuracy_score(Y_train, predictions > 0.5) * 100, '%')

Training accuracy:  97.47899159663865 %


In [102]:
# Test accuracy
predictions = classifier.predict(X_test)
print("Test accuracy: ", accuracy_score(Y_test, predictions > 0.5) * 100, '%')

Test accuracy:  94.11764705882352 %
