In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf



In [2]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

n = 10000
d = 5
noise_factor = 0.

# Create (noisy) testing data for binary classification.
X, y = make_classification(
    n_samples=n, 
    n_features=d,
    n_informative=d,
    n_redundant=0, 
    n_classes=2,
    class_sep=-1,
    flip_y=noise_factor
)

# We will work with label values -1, +1 and not 0, +1 (convert)
y[y == 0] = -1

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)


In [3]:

# Convert the data to TensorFlow tensors
X_train_tensor = tf.constant(X_train, dtype=tf.float32)
y_train_tensor = tf.constant(y_train, dtype=tf.float32)
X_test_tensor = tf.constant(X_test, dtype=tf.float32)
y_test_tensor = tf.constant(y_test, dtype=tf.float32)

In [4]:

# Combine the X and y tensors into a single dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_tensor, y_train_tensor))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test_tensor, y_test_tensor))

In [5]:

NUM_CLIENTS = 10
NUM_EPOCHS = 5
BATCH_SIZE = 5
SHUFFLE_BUFFER = 1000
PREFETCH_BUFFER = 32

def preprocess(dataset):
    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch(BATCH_SIZE).prefetch(PREFETCH_BUFFER)

In [6]:

# Preprocess the training dataset
preprocessed_train_dataset = preprocess(train_dataset)

# Preprocess the testing dataset
preprocessed_test_dataset = preprocess(test_dataset)

## PA-Classiers (binary classification)

![PA](images/PA_binary_classifiers.png)

In [7]:
@tf.function
def train_on_batch(model, batch, C=0.01):
    
    @tf.function
    def t_pa1(x_batch, loss_batch, C):
        # shape=(batchsize,1) where each instance is ||x||^2, x in x_batch
        norm_batch = tf.expand_dims(tf.reduce_sum(tf.square(x_batch), axis=1), axis=1)

        # PA-1 : Learning rate t for each instance x, with shape=(batchsize,1)
        t_batch = tf.maximum(C, tf.divide(loss_batch, norm_batch))

        return t_batch
    
    @tf.function
    def t_pa2(x_batch, loss_batch, C):
        # shape=(batchsize,1) where each instance is ||x||^2, x in x_batch
        norm_batch = tf.expand_dims(tf.reduce_sum(tf.square(x_batch), axis=1), axis=1)
        
        # PA-2 : Learning rate t for each instance x, with shape=(batchsize,1)
        t_batch = tf.divide(loss_batch, tf.add(0.5/C, norm_batch))
        
        return t_batch
    
    x_batch, y_batch = batch

    # from shape (d,) make it (d,1)
    y_batch = tf.expand_dims(y_batch, axis=1)

    # dot(w, x) for the batch (each instance of x in x_batch) with with shape=(batchsize, 1)
    weights_dot_x_batch = tf.matmul(x_batch, model)

    # Prediction batch with shape=(batchsize, 1)
    y_pred_batch = tf.sign(weights_dot_x_batch)

    # Suffer loss for each prediction (of instance) in the batch with shape=(batchsize,1)
    loss_batch = tf.maximum(0., 1. - tf.multiply(y_batch, weights_dot_x_batch))

    # PA-1 : Learning rate t for each instance x, with shape=(batchsize,1)
    t_batch = t_pa1(x_batch, loss_batch, C)

    # each instance is y*t*x, where y,t scalars and x in x_batch. shape=(batchsize,d)
    t_y_x_batch = tf.multiply(t_batch, tf.multiply(y_batch, x_batch))
    
    # !!!! Update with mean t*y*x
    t_y_x_update = tf.expand_dims(tf.reduce_mean(t_y_x_batch, axis=0) ,axis=1)

    # Update
    model.assign_add(t_y_x_update)
    

In [8]:
#%%timeit -n 1 -r 1

In [None]:
model = tf.Variable(tf.zeros(shape=(d, 1)), trainable=True, name='weights', dtype=tf.float32)

for epoch in range(NUM_EPOCHS):
    for batch in iter(preprocessed_train_dataset):
        train_on_batch(model, batch, C=0.1)

In [None]:

# Initialize the accuracy accumulator
test_accuracy = tf.keras.metrics.Accuracy()

# Evaluate the model on each batch of the preprocessed testing dataset
for batch in iter(preprocessed_test_dataset):
    x_batch, y_batch = batch

    # from shape (d,) make it (d,1)
    y_batch = tf.expand_dims(y_batch, axis=1)

    # Compute the predicted labels for the batch using the trained model
    predictions = tf.sign(tf.matmul(x_batch, model))

    test_accuracy.update_state(y_batch, predictions)

# Compute the overall accuracy
print(f'Test accuracy: {test_accuracy.result().numpy()}')