In [232]:
import numpy as np
from sklearn.model_selection import train_test_split
import copy
import pandas as pd

In [233]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num',
    'marital-status', 'occupation', 'relationship', 'race', 'sex',
    'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income'
]

df = pd.read_csv(url, names=column_names, sep=', ', na_values='?', engine='python')

df.dropna(inplace=True)

X_raw = df.drop('income', axis=1)

y = (df['income'] == '>50K').astype(int).values

X_processed = pd.get_dummies(X_raw, drop_first=True)
X = X_processed.values

y = y.reshape(-1, 1)

X = X.astype(float)
y = y.astype(float)
X = (X-X.mean())/X.std()
cache = {}
parameters = {}
grads = {}
X, X_test, y, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

X = X.T
y = y.T
X_test = X_test.T
y_test = y_test.T
y.shape, X.shape

((1, 24129), (96, 24129))

In [234]:
'''
Layers' grads:
To change the amounth of layers and their sizes, apply it to return tuple
'''
def layer_sizes(X, y):
  n_x = X.shape[0]
  n_y = y.shape[0]
  return (n_x, 500, 100, 10, n_y)

In [235]:
'''
Initialization of parameteres:
The function generates dictionary parameters, where key is the name of the variable used in computations and value is randomly generated np.array.
Weight parameters are scaled by 0.01 to asure that Gradient Descent won't receive extreme values, and will work optimally fast.
'''
def initialize_parameters(layers_grads):
  size = len(layers_grads)
  for i in range(1, size):
    wkey = f'W{i}'
    parameters[wkey] = np.random.randn(layers_grads[i], layers_grads[i-1]) * 0.01
    bkey = f'b{i}'
    parameters[bkey] = np.zeros((layers_grads[i], 1))
  return parameters

In [236]:
'''
Activation functions:
'''
def sigmoid(Z):
  s = 1 / (1 + np.exp(-Z))
  return s
def relu(Z):
  return np.maximum(0, Z)
def leaky_relu(Z):
  return np.maximum(0.01*Z, Z)

In [237]:
'''
Derivatives of Activation functions:
'''
def sigmoid_derivative(A):
  return A*(1-A)
def tanh_derivation(A):
  return 1 - A*A
def relu_derivative(A):
    dA = np.array(A, copy=True)
    dA[A <= 0] = 0
    dA[A > 0] = 1
    return dA
def leaky_relu_derivative(A):
    dA = np.array(A, copy=True)
    dA[A <= 0] = 0.01
    dA[A > 0] = 1
    return dA

In [238]:
'''
Forward Propagation:
Zl = np.dot(Wl, A{l-1}) + bl
Al = gl(Zl)
'''
def forward_propagation(X, parameters):
  L = len(parameters)//2
  A_curr = X
  for l in range(1, L):
    W = parameters[f'W{l}']
    b = parameters[f'b{l}']
    Z = np.dot(W, A_curr) + b
    cache[f'W{l}'] = W
    cache[f'b{l}'] = b
    cache[f'Z{l}'] = Z
    A_curr = relu(Z)
    cache[f'A{l}'] = A_curr
  W = parameters[f'W{L}']
  b = parameters[f'b{L}']
  Z = np.dot(W, A_curr) + b
  A_curr = sigmoid(Z)
  cache[f'W{L}'] = W
  cache[f'b{L}'] = b
  cache[f'Z{L}'] = Z
  cache[f'A{L}'] = A_curr
  return cache[f'A{L}'], cache


In [239]:
'''
Computation of cost function:
Current formula is: -(y*log(y_pred)+ (1-y)*log(1-y_pred))/m
'''
def compute_cost(y_pred, y):
  m = y_pred.shape[1]
  logprobs = -(np.sum(np.multiply(np.log(y_pred), y)) + np.sum(np.multiply(np.log(1-y_pred),(1-y))))
  cost = logprobs/m
  cost = float(np.squeeze(cost))
  return cost

In [240]:
'''
Backward Propagation:
GD
'''

def backward_propagation(parameters, cache, X, y):
  m = X.shape[1]
  L = len(parameters)//2
  cache["A0"] = X
  A_prev = cache [f'A{L-1}']
  A = cache[f'A{L}']
  Z = cache[f'Z{L}']
  W = cache[f'W{L}']
  dA = -y/A + (1-y)/(1-A)
  dZ = dA * sigmoid_derivative(Z)
  dW = np.dot(dZ, A_prev.T) / m ## (1,30162) x (8, 30162).T
  db = np.sum(dZ, axis=1, keepdims=True)/m
  grads[f'dA{L}'] = dA
  grads[f'dZ{L}'] = dZ
  grads[f'dW{L}'] = dW
  grads[f'db{L}'] = db
  dA = np.dot(W.T, dZ)
  for l in range(L-1, 0, -1):
    A_prev = cache [f'A{l-1}']
    W = cache[f'W{l}']
    Z = cache[f'Z{l}']
    dZ = dA*relu_derivative(Z)
    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    grads[f'dW{l}'] = dW
    grads[f'db{l}'] = db
    grads[f'dA{L}'] = dA
    grads[f'dZ{L}'] = dZ
    dA = np.dot(W.T, dZ)
  return grads

In [241]:
def update_parameters(parameters, grads, learning_rate=1.2):
  L = (len(parameters)//2)+1
  for l in range(1, L):
    W = copy.deepcopy(parameters[f'W{l}'])
    b = copy.deepcopy(parameters[f'b{l}'])
    dW = grads[f'dW{l}']
    db = grads[f'db{l}']
    W = W - learning_rate * dW
    b = b - learning_rate * db
    parameters[f'W{l}']=W
    parameters[f'b{l}']=b
  return parameters

In [242]:
def nn_model(X, y, layers, num_iterations=2000, print_cost=False):
  np.random.seed(3)
  parameters = initialize_parameters(layers)

  for i in range(0, num_iterations):
    A_last, cache = forward_propagation(X, parameters)
    cost = compute_cost(A_last, y)
    grads = backward_propagation(parameters, cache, X, y)
    parameters = update_parameters(parameters, grads, learning_rate=1.2)
    if print_cost and i % 10 == 0:
      print("Cost after iteration %i: %f" %(i, cost))

  return parameters

In [243]:
def predict(parameters, X_test):
  A_last, cache = forward_propagation(X_test, parameters)
  predictions = (A_last>=0.5).astype(int)
  return predictions

In [244]:
parameters = nn_model(X, y, layer_sizes(X, y))
predictions = predict(parameters, X_test)

In [245]:
print ('Accuracy: %d' % float((np.dot(y_test, predictions.T) + np.dot(1 - y_test, 1 - predictions.T)) / float(y_test.size) * 100) + '%')

Accuracy: 46%


  print ('Accuracy: %d' % float((np.dot(y_test, predictions.T) + np.dot(1 - y_test, 1 - predictions.T)) / float(y_test.size) * 100) + '%')
