In [20]:
import numpy as np
import sklearn
import torch.optim

from rlace import solve_adv_game

def create_synthetic_binary_classification_dataset(n_samples, dimensionality, noise=1.0):

    # sample binary labels
    y = np.random.randint(2, size=n_samples)
    # create X based on y + some noise
    X = []
    for i,yval in enumerate(y):
        X.append(np.random.normal(loc=yval, scale=noise, size=dimensionality))
    X = np.array(X)
    return X, y

def apply_random_reversible_nonlinear_transformation(X):
    np.random.seed(0)
    W1 = np.random.normal(loc=0, scale=1, size=(dim, dim))
    W2 = np.random.normal(loc=0, scale=1, size=(dim, dim))
    H = (X@W1)**3
    out = X@W2
    return out

def train_linear_classifier_and_report_accuracy(X,y):
    # train a linear classifier
    clf = sklearn.linear_model.LogisticRegression(max_iter=5000)
    clf.fit(X,y)
    # report accuracy
    predicted_y = clf.predict(X)
    accuracy = np.mean(predicted_y == y)
    return accuracy

## generate synthetic dataset

In [21]:
dim = 16
X,y = create_synthetic_binary_classification_dataset(n_samples=10000, dimensionality=dim)
X = apply_random_reversible_nonlinear_transformation(X)

## train a linear classifier

In [22]:
acc = train_linear_classifier_and_report_accuracy(X,y)
print("Accuracy of linear classifier on the original dataset:",acc)

Accuracy of linear classifier on the original dataset: 0.9763


### Learn a projection matrix that reduces the data's rank by 1.

In [24]:

# run rlace
optimizer_class = torch.optim.SGD
optimizer_params_P = {"lr": 0.0075, "weight_decay": 1e-4}
optimizer_params_predictor = {"lr": 0.0075, "weight_decay": 1e-4}
epsilon = 0.001
batch_size = 64
output = solve_adv_game(X,y, X,y, rank=1, device="cpu", out_iters=75000,
                        optimizer_class=optimizer_class, optimizer_params_P=optimizer_params_P,
                        optimizer_params_predictor=optimizer_params_predictor, epsilon=epsilon, batch_size=batch_size)

projection_matrix = output["P"]

5000/75000. Acc post-projection: 50.050%; best so-far: 50.050%; Maj: 50.040%; Ga


## project & train a classifier on the projected data

In [25]:
# project the data
X_projected = X@projection_matrix

# train a linear classifier on the projected data
acc = train_linear_classifier_and_report_accuracy(X_projected,y)
print("Accuracy of linear classifier on projected dataset:",acc)
print("Majority accuracy:", max(np.mean(y), 1-np.mean(y)))

Accuracy of linear classifier on projected dataset: 0.5134
Majority accuracy: 0.5004
