In [15]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from models import IrisClassifier
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=42, test_size=0.33
)
X_train, X_test, y_train, y_test = (
    torch.tensor(X_train, device=device, dtype=torch.float32),
    torch.tensor(X_test, device=device, dtype=torch.float32),
    torch.tensor(y_train, device=device, dtype=torch.long),
    torch.tensor(y_test, device=device, dtype=torch.long),
)

In [19]:
import torch.optim as optim
from sklearn.metrics import accuracy_score
from collections import namedtuple
from torch.utils.data import DataLoader
import torch.nn as nn

ModelInfo = namedtuple("ModelInfo", ["state_dict", "matrix"])

def train_model(train_loader: DataLoader):
    model = IrisClassifier().to(device=device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.005)
    model.train()
    num_epochs = 200
    for _ in range(num_epochs):
        for inputs, targets in train_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()    
    W = model.linear.weight.data
    b = model.linear.bias.data
    b = b.view(-1, 1)
    Wb_matrix = torch.cat((W, b), dim=1)
    return ModelInfo(state_dict=model.state_dict(), matrix=Wb_matrix)

In [26]:
from torch.utils.data import TensorDataset, DataLoader

NUM_OF_MODELS = 500

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

real_models = []
for i in range(NUM_OF_MODELS):
    model_info = train_model(train_loader)
    real_models.append(model_info)
    if (i + 1) % 50 == 0:
        print("trained {} models".format(i + 1))

trained 49 models
trained 99 models
trained 149 models
trained 199 models
trained 249 models
trained 299 models
trained 349 models
trained 399 models
trained 449 models
trained 499 models


In [27]:
scrambled_models = []
for i in range(NUM_OF_MODELS):
    scramble_indices = torch.randperm(y_train.size(0), device=device)
    y_train_scrambled = y_train[scramble_indices]

    train_dataset_scrambled = TensorDataset(X_train, y_train_scrambled)
    train_loader_scrambled = DataLoader(
        train_dataset_scrambled, batch_size=16, shuffle=True
    )
    model_info = train_model(train_loader_scrambled)
    scrambled_models.append(model_info)
    if (i + 1) % 50 == 0:
        print("trained {} models".format(i + 1))

trained 50 models
trained 100 models
trained 150 models
trained 200 models
trained 250 models
trained 300 models
trained 350 models
trained 400 models
trained 450 models
trained 500 models


In [33]:
import pickle
edit_pickle = False

real_matrix_list = [real.matrix for real in real_models]
scrambled_matrix_list = [s.matrix for s in scrambled_models]

if edit_pickle == True:
    with open('scrambled_matrix_list.pickle', 'wb') as f:
        pickle.dump(scrambled_matrix_list, f)
        print("Saved scrambled models")

    with open('real_matrix_list.pickle', 'wb') as f:
        pickle.dump(real_matrix_list, f)
        print("Saved trained models")

Saved scrambled models
Saved trained models
