In [44]:
import numpy as np
import sklearn
import torch.optim
from sklearn.neural_network import MLPClassifier
from rlace import solve_adv_game, init_classifier

def create_synthetic_binary_classification_dataset(n_samples, dimensionality, u, noise=1.0):

    X = np.random.randn(n_samples, dimensionality)
    y = (X@u > 0).astype("int")
    
    return X, y


def train_linear_classifier_and_report_accuracy(X_train, y_train, X_dev, y_dev):
    # train a linear classifier
    clf = init_classifier() #sklearn.linear_model.LogisticRegression(max_iter=5000)
    clf.fit(X_train,y_train)
    # report accuracy
    predicted_y = clf.predict(X_dev)
    accuracy = np.mean(predicted_y == y_dev)
    return accuracy

## generate synthetic dataset

In [45]:
dim = 16
n=10000
train_size = int(n * 0.8)
u = np.random.randn(dim) 
X,y = create_synthetic_binary_classification_dataset(n_samples=n, dimensionality=dim, u=u)

X_train, y_train, X_dev, y_dev = X[:train_size], y[:train_size], X[train_size:], y[train_size:]

In [46]:
y.mean()

0.5033

## train a linear classifier

In [48]:
acc = train_linear_classifier_and_report_accuracy(X_train,y_train, X_dev, y_dev)
print("Accuracy of linear classifier on the original dataset:",acc)

Accuracy of linear classifier on the original dataset: 0.997


### Learn a projection matrix that reduces the data's rank by 1.

In [54]:

# run rlace
optimizer_class = torch.optim.SGD
optimizer_params_P = {"lr": 0.0002, "momentum": 0.8}
optimizer_params_predictor = {"lr": 0.001, "weight_decay": 1e-5}
epsilon = 0.001
batch_size = 64
output = solve_adv_game(X,y, X,y, rank=1, device="cpu", out_iters=75000,
                        optimizer_class=optimizer_class, optimizer_params_P=optimizer_params_P,
                        optimizer_params_predictor=optimizer_params_predictor, epsilon=epsilon, batch_size=batch_size)

projection_matrix = output["P"]

13000/75000. Acc post-projection: 50.360%; best so-far: 50.360%; Maj: 50.330%; G


In [49]:
#u_unit = u / np.linalg.norm(u)
#projection_matrix = np.eye(dim) - np.outer(u_unit, u_unit)

## project & train a classifier on the projected data

In [55]:
# project the data
X_train_projected = X_train@projection_matrix
X_dev_projected = X_dev@projection_matrix

# train a linear classifier on the projected data
acc = train_linear_classifier_and_report_accuracy(X_train_projected,y_train, X_dev_projected, y_dev)
print("Accuracy of linear classifier on projected dataset:",acc)
print("Majority accuracy:", max(np.mean(y), 1-np.mean(y)))

Accuracy of linear classifier on projected dataset: 0.494
Majority accuracy: 0.5033


## nonlinear classification

In [57]:
mlp = MLPClassifier()
mlp.fit(X_train_projected, y_train)
mlp.score(X_train_projected, y_train), mlp.score(X_dev_projected, y_dev)



(0.71775, 0.512)