In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"


class VAE(nn.Module):
    def __init__(self, input_dim):
        super(VAE, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
        )
        self.decoder = nn.Sequential(
            nn.Linear(8, 16),
            nn.ReLU(),
            nn.Linear(16, 32),
            nn.ReLU(),
            nn.Linear(32, input_dim),
            nn.Sigmoid(),
        )

        self.mu = nn.Linear(8, 8)
        self.logvar = nn.Linear(8, 8)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5*logvar)
        eps = torch.randn_like(std)
        return mu + eps*std

    def forward(self, x):
        x = self.encoder(x)
        mu = self.mu(x)
        logvar = self.logvar(x)
        z = self.reparameterize(mu, logvar)
        return self.decoder(z), mu, logvar

def loss_function(recon_x, x, mu, logvar, dim):
    MSE = nn.MSELoss(reduction="sum")
    reconstruction_loss = MSE(recon_x, x.view(-1, dim))
    KL_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return reconstruction_loss + KL_divergence

In [2]:
import pickle
import io
class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == "torch.storage" and name == "_load_from_bytes":
            return lambda b: torch.load(io.BytesIO(b), map_location="cpu")
        else:
            return super().find_class(module, name)


with open('2_layer_real_models.pickle', 'rb') as f:
    real_models = CPU_Unpickler(f).load()

def state_dict_to_vec(state_dict):
    flat_params = []
    for param in state_dict.values():
        flat_params.append(param.view(-1))
    return torch.cat(flat_params)

tensor_list = [state_dict_to_vec(x) for x in real_models]

data = torch.stack(tensor_list).to(device)
dim = data[0].size(0)
# Normalize the data
data_min = data.min()
data_max = data.max()
data_normalized = (data - data_min) / (data_max - data_min)

In [3]:
vae = VAE(dim).to(device)
optimizer = optim.Adam(vae.parameters(), lr=1e-3)

epochs = 100
for epoch in range(epochs):
    vae.train()
    train_loss = 0
    for tensor in data_normalized:
        optimizer.zero_grad()
        recon_batch, mu, logvar = vae(tensor.view(-1, dim))
        loss = loss_function(recon_batch, tensor, mu, logvar, dim)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print("Epoch: {}, Loss: {:.4f}".format(epoch, train_loss / len(data_normalized)))

Epoch: 0, Loss: 0.5437
Epoch: 1, Loss: 0.5067
Epoch: 2, Loss: 0.5069
Epoch: 3, Loss: 0.5043
Epoch: 4, Loss: 0.5035
Epoch: 5, Loss: 0.5038
Epoch: 6, Loss: 0.5036
Epoch: 7, Loss: 0.5032
Epoch: 8, Loss: 0.5026
Epoch: 9, Loss: 0.5025
Epoch: 10, Loss: 0.5023
Epoch: 11, Loss: 0.5021
Epoch: 12, Loss: 0.5018
Epoch: 13, Loss: 0.5027
Epoch: 14, Loss: 0.5018
Epoch: 15, Loss: 0.5021
Epoch: 16, Loss: 0.5014
Epoch: 17, Loss: 0.5025
Epoch: 18, Loss: 0.5019
Epoch: 19, Loss: 0.5023
Epoch: 20, Loss: 0.5017
Epoch: 21, Loss: 0.5016
Epoch: 22, Loss: 0.5017
Epoch: 23, Loss: 0.5017
Epoch: 24, Loss: 0.5015
Epoch: 25, Loss: 0.5020
Epoch: 26, Loss: 0.5020
Epoch: 27, Loss: 0.5016
Epoch: 28, Loss: 0.5014
Epoch: 29, Loss: 0.5014
Epoch: 30, Loss: 0.5015
Epoch: 31, Loss: 0.5013
Epoch: 32, Loss: 0.5013
Epoch: 33, Loss: 0.5013
Epoch: 34, Loss: 0.5016
Epoch: 35, Loss: 0.5013
Epoch: 36, Loss: 0.5014
Epoch: 37, Loss: 0.5014
Epoch: 38, Loss: 0.5012
Epoch: 39, Loss: 0.5011
Epoch: 40, Loss: 0.5011
Epoch: 41, Loss: 0.5011
Ep

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
# device = "cpu"

iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.33)
X_train, X_test, y_train, y_test = (
    torch.tensor(X_train, device=device, dtype=torch.float32),
    torch.tensor(X_test, device=device, dtype=torch.float32),
    torch.tensor(y_train, device=device, dtype=torch.long),
    torch.tensor(y_test, device=device, dtype=torch.long),
)

In [5]:
from sklearn.metrics import accuracy_score
from model import Iris2LayerClassifier


NUM_OF_MODELS = 500


# Generate weight matrices from the VAE model
generated_tensors = []
with torch.no_grad():
    vae.eval()
    for _ in range(NUM_OF_MODELS):
        z = torch.randn(1, 8).to(device)  
        g = vae.decoder(z)
        generated_tensors.append(g.view(-1))


def vec_to_state_dict(model, flat_params):
    target_state_dict = model.state_dict()
    current_idx = 0

    new_state_dict = {}
    for name, param in target_state_dict.items():
        num_elements = param.numel()
        new_param = flat_params[current_idx : current_idx + num_elements].view(
            param.shape
        )
        new_state_dict[name] = new_param
        current_idx += num_elements

    return new_state_dict
temp_model = Iris2LayerClassifier()
generated_models = [vec_to_state_dict(temp_model, v) for v in generated_tensors]


for state_dict in generated_models:
    model = Iris2LayerClassifier().to(device)
    model.load_state_dict(state_dict)
    model.eval()
    with torch.inference_mode():
        y_pred = model(X_test)
        _, labels = torch.max(y_pred, 1)
        accuracy = accuracy_score(y_test.cpu().numpy(), labels.cpu().numpy())
        print(accuracy)


0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3
0.3


In [6]:
with open("2_layer_generated_models.pickle", "wb") as f:
    pickle.dump(generated_models, f)