In [None]:
import tensorflow as tf
import numpy as np

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
column_names = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num',
    'marital-status', 'occupation', 'relationship', 'race', 'sex',
    'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income'
]

df = pd.read_csv(url, names=column_names, sep=', ', na_values='?', engine='python')
df.dropna(inplace=True)

y = (df['income'] == '>50K').astype(np.int32).values
X_raw = df.drop('income', axis=1)

X_processed = pd.get_dummies(X_raw, drop_first=True)
X = X_processed.values

epsilon = 1e-7
X = X.astype(np.float32)
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)
X = (X - X_mean) / (X_std + epsilon)

X_train_np, X_test_np, y_train_np, y_test_np = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train = tf.constant(X_train_np, dtype=tf.float32)
X_test = tf.constant(X_test_np, dtype=tf.float32)

y_train = tf.constant(y_train_np.reshape(-1, 1), dtype=tf.float32)
y_test = tf.constant(y_test_np.reshape(-1, 1), dtype=tf.float32)

In [None]:
'''
def normalize(image):
  image = tf.cast(image, tf.float32) / 256
  image = tf.reshape(image, [-1,])
  return image
'''

'\ndef normalize(image):\n  image = tf.cast(image, tf.float32) / 256\n  image = tf.reshape(image, [-1,])\n  return image\n'

In [None]:
def create_layers(k):
  assert isinstance(k, list) and all(isinstance(i, int) for i in k) and len(k)>1, "input must be list of integers of size bigger than 1"
  return k

In [None]:
def initialize_parameters(layers_list):
  parameters = {}
  layers = len(layers_list)
  initializer = tf.keras.initializers.GlorotNormal(seed=1)
  for l in range(1, layers):
    parameters['W' + str(l)] = tf.Variable(initializer(shape=(layers_list[l], layers_list[l-1])))
    parameters['b' + str(l)] = tf.Variable(initializer(shape=(layers_list[l], 1)))
  assert len(parameters) == (layers-1) * 2
  return parameters


In [None]:
def forward_step_sig(W, A, b):
  Z = tf.math.add(tf.matmul(W, A), b)
  A = tf.keras.activations.sigmoid(Z)
  return Z, A
def forward_step_relu(W, A, b):
  Z = tf.math.add(tf.matmul(W, A), b)
  A = tf.keras.activations.relu(Z)
  return Z, A

In [None]:
def forward_propagation(X, parameters):
  L = len(parameters) // 2
  A = X
  for l in range(1,L):
    W = parameters['W' + str(l)]
    b = parameters['b' + str(l)]
    Z, A = forward_step_relu(W, A, b)
  W = parameters['W' + str(L)]
  b = parameters['b' + str(L)]
  Z, A = forward_step_sig(W, A, b)
  return Z


In [None]:
# def compute_loss(logits, labels):
#   loss = tf.keras.losses.categorical_crossentropy(tf.transpose(labels), tf.transpose(logits), from_logits = True)
#   loss = tf.reduce_sum(loss)
#   return loss
def compute_loss(logits, labels):
  loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
  t_loss = loss(labels, logits)
  t_loss = tf.reduce_sum(t_loss)
  return t_loss

In [None]:
def model(X_train, Y_train, X_test, Y_test, layers, learning_rate = 0.0001, num_epochs = 1000, minibatch_size = 32, print_cost = True):

  parameters = initialize_parameters(create_layers(layers))

  optimizer = tf.keras.optimizers.Adam(learning_rate)

  dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train)) # tworzy pary z obiektow tf
  test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))

  m = dataset.cardinality().numpy() ## ilosc par
  m_test = test_dataset.cardinality().numpy()

  minibatches = dataset.batch(minibatch_size).prefetch(8)
  test_minibatches = test_dataset.batch(minibatch_size).prefetch(8)

  for epoch in range(num_epochs):
        epoch_total_loss = 0.
        for (minibatch_X, minibatch_Y) in minibatches:
            with tf.GradientTape() as tape:
                Z = forward_propagation(tf.transpose(minibatch_X), parameters)
                loss = compute_loss(Z, tf.transpose(minibatch_Y))
                trainable_variables = list(parameters.values())
                grads = tape.gradient(loss, trainable_variables)
                optimizer.apply_gradients(zip(grads, trainable_variables))
                epoch_total_loss += loss
        epoch_total_loss /= m

        if print_cost == True and epoch  % 10 == 0:
            print ("Cost after epoch %i: %f" % (epoch, epoch_total_loss*minibatch_size))
            test_cost=0
            for (minibatch_X, minibatch_Y) in test_minibatches:
                Z = forward_propagation(tf.transpose(minibatch_X), parameters)
                test_cost += compute_loss(Z, tf.transpose(minibatch_Y))
            print(test_cost/m_test*minibatch_size)

  return parameters

In [None]:
parameters = model(X_train, y_train, X_test, y_test, [96, 20, 10, 50, 10, 1], learning_rate = 0.01, num_epochs = 100, minibatch_size = 512, print_cost = True)

Cost after epoch 0: 0.460544
tf.Tensor(0.38501754, shape=(), dtype=float32)
Cost after epoch 10: 0.312791
tf.Tensor(0.3385814, shape=(), dtype=float32)
Cost after epoch 20: 0.295781
tf.Tensor(0.34277514, shape=(), dtype=float32)
Cost after epoch 30: 0.288914
tf.Tensor(0.34843925, shape=(), dtype=float32)
Cost after epoch 40: 0.284098
tf.Tensor(0.3522492, shape=(), dtype=float32)
Cost after epoch 50: 0.279097
tf.Tensor(0.3625417, shape=(), dtype=float32)
Cost after epoch 60: 0.274955
tf.Tensor(0.36333913, shape=(), dtype=float32)
Cost after epoch 70: 0.267152
tf.Tensor(0.3722102, shape=(), dtype=float32)
Cost after epoch 80: 0.267665
tf.Tensor(0.37839615, shape=(), dtype=float32)
Cost after epoch 90: 0.264053
tf.Tensor(0.38691053, shape=(), dtype=float32)


In [None]:
def predict(X, parameters):
  Z_final = forward_propagation(tf.transpose(X_test), parameters)
  A_final = tf.nn.sigmoid(Z_final)
  predictions = tf.cast(A_final >= 0.5, dtype=tf.float32)
  return predictions

In [None]:
pred = predict(X_test, parameters)

In [None]:
def calculate_accuracy(predictions, labels):
    correct_predictions = tf.equal(predictions, labels)
    accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))
    return accuracy


In [None]:
accuracy = calculate_accuracy(pred, tf.transpose(y_test))

In [None]:
print(accuracy)

tf.Tensor(0.84535056, shape=(), dtype=float32)
