In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tempfile
import os
import sklearn.metrics

from matplotlib import pyplot as plt


In [None]:
df = pd.read_csv('../data/Wisconsin_Breast_Cancer_Data.csv')


In [None]:
train_df = df.sample(frac=0.75, random_state=23)
test_df = df.drop(index=train_df.index)

train_df.drop(columns=['id'], inplace=True)
test_df.drop(columns=['id'], inplace=True)

X_train, y_train = train_df.iloc[:, 1:], train_df.iloc[:, 0]
X_test, y_test = test_df.iloc[:, 1:], test_df.iloc[:, 0]


In [None]:
y_train = y_train.map({'B': 0, 'M': 1})
y_test = y_test.map({'B': 0, 'M': 1})

X_train_tf = tf.convert_to_tensor(X_train, dtype=tf.float32)
X_test_tf = tf.convert_to_tensor(X_test, dtype=tf.float32)

y_train_tf = tf.convert_to_tensor(y_train, dtype=tf.float32)
y_test_tf = tf.convert_to_tensor(y_test, dtype=tf.float32)


In [None]:
class Normalize(tf.Module):
    def __init__(self, x):
        self.mean = tf.Variable(tf.math.reduce_mean(x, axis=0), name='mean')
        self.std = tf.Variable(tf.math.reduce_std(x, axis=0), name='std')

    def norm(self, x):
        return (x - self.mean) / self.std

    def unnorm(self, x):
        return x * self.std + self.mean


In [None]:
norm_x = Normalize(X_train_tf)

X_train_norm = norm_x.norm(X_train_tf)
X_test_norm = norm_x.norm(X_test_tf)


In [None]:
def log_loss(labels, logits):
    ce = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
    return tf.math.reduce_mean(ce)


class LogisticRegression(tf.Module):
    def __init__(self):
        self.built = False

    @tf.function
    def __call__(self, x, train=True):
        if not self.built:
            rand_w = tf.random.uniform(shape=[x.shape[-1], 1], seed=23)
            rand_b = tf.random.uniform(shape=[], seed=23)

            self.w = tf.Variable(rand_w, name='rand_w')
            self.b = tf.Variable(rand_b, name='rand_b')

            self.built = True

        y = tf.add(tf.matmul(x, self.w), self.b)
        y = tf.squeeze(y, axis=1)

        if train:
            return y

        return tf.sigmoid(y)


In [None]:
def predict_class(y_pred, threshold=0.5):
    return tf.cast(y_pred > threshold, tf.float32)


def accuracy(y_pred, y):
    y_pred = tf.sigmoid(y_pred)
    y_pred_class = predict_class(y_pred)

    check_equal = tf.cast(tf.abs(y_pred_class - y) < 0.0001, tf.float32)

    return tf.reduce_mean(check_equal)


In [None]:
batch_size = 64

train_dataset = tf.data.Dataset.from_tensor_slices((X_train_norm, y_train_tf))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test_norm, y_test_tf))

train_dataset = train_dataset.shuffle(
    buffer_size=X_train_norm.shape[0]).batch(batch_size=batch_size)
test_dataset = test_dataset.shuffle(
    buffer_size=X_test_norm.shape[0]).batch(batch_size=batch_size)


In [None]:
epochs = 200
learning_rate = 0.01

train_losses, test_losses = [], []
train_accs, test_accs = [], []
train_f1s, test_f1s = [], []

log_reg = LogisticRegression()

for epoch in range(epochs):
    batch_train_losses = []
    batch_test_losses = []
    batch_train_accs = []
    batch_test_accs = []
    batch_train_f1s = []
    batch_test_f1s = []

    for x_train_fit, y_train_fit in train_dataset:
        with tf.GradientTape() as tape:
            y_preds = log_reg(x_train_fit)
            batch_train_loss = log_loss(
                labels=y_train_fit, logits=y_preds)

        batch_train_acc = accuracy(y_pred=y_preds, y=y_train_fit)
        grads = tape.gradient(batch_train_loss, log_reg.variables)

        for g, v in zip(grads, log_reg.variables):
            v.assign_sub(learning_rate * g)

        batch_train_f1s.append(sklearn.metrics.f1_score(
            y_true=tf.cast(y_train_fit, tf.int32), y_pred=tf.cast(y_preds, tf.int32), average='weighted'))

        batch_train_losses.append(batch_train_loss)
        batch_train_accs.append(batch_train_acc)

    for x_test_fit, y_test_fit in test_dataset:
        y_preds = log_reg(x_test_fit)

        batch_test_loss = log_loss(labels=y_test_fit, logits=y_preds)
        batch_test_acc = accuracy(y_pred=y_preds, y=y_test_fit)

        batch_test_f1s.append(sklearn.metrics.f1_score(
            y_true=tf.cast(y_test_fit, tf.int32), y_pred=tf.cast(y_preds, tf.int32), average='weighted'))

        batch_test_losses.append(batch_test_loss)
        batch_test_accs.append(batch_test_acc)

    train_loss = tf.reduce_mean(batch_train_losses)
    train_acc = tf.reduce_mean(batch_train_accs)
    train_f1 = tf.reduce_mean(batch_train_f1s)

    test_loss = tf.reduce_mean(batch_test_losses)
    test_acc = tf.reduce_mean(batch_test_accs)
    test_f1 = tf.reduce_mean(batch_test_f1s)

    train_losses.append(train_loss)
    train_accs.append(train_acc)
    train_f1s.append(train_f1)

    test_losses.append(test_loss)
    test_accs.append(test_acc)
    test_f1s.append(test_f1)

    if epoch % 10 == 0:
        print(
            f'Epoch: {epoch} Train Loss {train_loss} Train Accuracy {train_acc} Train F1 {train_f1}')
        print(
            f'Epoch: {epoch} Test Loss {test_loss} Test Accuracy {test_acc} Test F1 {test_f1}')

plt.plot(range(epochs), train_losses, label='Train Loss')
plt.plot(range(epochs), test_losses, label='Test Loss')

plt.plot(range(epochs), train_accs, label='Train Accuracy')
plt.plot(range(epochs), test_accs, label='Test Accuracy')

plt.legend()


In [None]:
y_pred_train, y_pred_test = log_reg(X_train_norm, train=False), log_reg(X_test_norm, train=False)
y_train_classes, y_test_classes = predict_class(y_pred_train), predict_class(y_pred_test)

print(sklearn.metrics.confusion_matrix(y_true=tf.cast(y_train, tf.int32), y_pred=tf.cast(y_pred_train, tf.int32)))
print(sklearn.metrics.confusion_matrix(y_true=tf.cast(y_test, tf.int32), y_pred=tf.cast(y_pred_test, tf.int32)))


In [None]:
class ExportModule(tf.Module):
    def __init__(self, model, norm_x, class_pred):
        self.model = model
        self.norm_x = norm_x
        self.class_pred = class_pred

    @tf.function
    def __call__(self, x):
        x = self.norm_x.norm(x)
        y = self.model(x, train=False)
        y = self.class_pred(y)

        return y


log_reg_export = ExportModule(model=log_reg, norm_x=norm_x, class_pred=predict_class)

In [None]:
accuracy(y_pred=log_reg_export(X_test_tf), y=y_test_tf)