In [4]:
import numpy as np
import sklearn
import torch.optim
from sklearn.neural_network import MLPClassifier
from rlace import solve_adv_game, init_classifier

def create_synthetic_binary_classification_dataset(n_samples, dimensionality, noise=1.0):

    X = np.random.randn(n_samples, dimensionality)
    u = np.random.randn(dimensionality)
    y = (X@u > 0).astype("int")
    
    return X, y


def train_linear_classifier_and_report_accuracy(X,y):
    # train a linear classifier
    clf = init_classifier() #sklearn.linear_model.LogisticRegression(max_iter=5000)
    clf.fit(X,y)
    # report accuracy
    predicted_y = clf.predict(X)
    accuracy = np.mean(predicted_y == y)
    return accuracy

## generate synthetic dataset

In [5]:
dim = 16
X,y = create_synthetic_binary_classification_dataset(n_samples=5000, dimensionality=dim)


In [6]:
y.mean()

0.521

## train a linear classifier

In [7]:
acc = train_linear_classifier_and_report_accuracy(X,y)
print("Accuracy of linear classifier on the original dataset:",acc)

Accuracy of linear classifier on the original dataset: 0.9968


### Learn a projection matrix that reduces the data's rank by 1.

In [8]:

# run rlace
optimizer_class = torch.optim.SGD
optimizer_params_P = {"lr": 0.0002, "momentum": 0.8}
optimizer_params_predictor = {"lr": 0.001, "weight_decay": 1e-5}
epsilon = 0.001
batch_size = 64
output = solve_adv_game(X,y, X,y, rank=1, device="cpu", out_iters=75000,
                        optimizer_class=optimizer_class, optimizer_params_P=optimizer_params_P,
                        optimizer_params_predictor=optimizer_params_predictor, epsilon=epsilon, batch_size=batch_size)

projection_matrix = output["P"]

13000/75000. Acc post-projection: 52.040%; best so-far: 52.040%; Maj: 52.100%; G


## project & train a classifier on the projected data

In [9]:
# project the data
X_projected = X@projection_matrix

# train a linear classifier on the projected data
acc = train_linear_classifier_and_report_accuracy(X_projected,y)
print("Accuracy of linear classifier on projected dataset:",acc)
print("Majority accuracy:", max(np.mean(y), 1-np.mean(y)))

Accuracy of linear classifier on projected dataset: 0.5204
Majority accuracy: 0.521


## nonlinear classification

In [11]:
mlp = MLPClassifier()
mlp.fit(X_projected, y)
mlp.score(X_projected, y)



0.733