In [1]:
from __future__ import annotations

import json

import numpy as np

batch_size = [32, 1024]
optimizer_types = ["AdamW", "Adam", "RMSprop", "SGD"]
seed = [0, 2**32 - 1]

In [11]:
small_mnist_model = [
    (
        "CONV2D",
        {"in_channels": 1, "out_channels": 32, "kernel_size": 3},
    ),
    ("RELU", {}),
    (
        "CONV2D",
        {"in_channels": 32, "out_channels": 64, "kernel_size": 3},
    ),
    ("RELU", {}),
    ("POOLING", {"kernel_size": 2}),
    ("DROPOUT", {"p": 0.25}),
    ("FLATTEN", {}),
    ("LINEAR", {"in_features": 9216, "out_features": 128}),
    ("RELU", {}),
    ("DROPOUT", {"p": 0.5}),
    ("LINEAR", {"in_features": 128, "out_features": 10}),
    ("LOGSOFTMAX", {"dim": 1}),
]
small_cifar_model = [
    (
        "CONV2D",
        {"in_channels": 3, "out_channels": 6, "kernel_size": 5},
    ),
    ("POOLING", {"kernel_size": 2}),
    (
        "CONV2D",
        {"in_channels": 6, "out_channels": 16, "kernel_size": 5},
    ),
    ("POOLING", {"kernel_size": 2}),
    ("FLATTEN", {}),
    ("LINEAR", {"in_features": 16 * 5 * 5, "out_features": 120}),
    ("RELU", {}),
    ("LINEAR", {"in_features": 120, "out_features": 84}),
    ("RELU", {}),
    ("LINEAR", {"in_features": 84, "out_features": 10}),
    ("LOGSOFTMAX", {"dim": 1}),
]

with open("small_mnist_model.json", "w+") as f:
    json.dump(small_mnist_model, f)
with open("small_cifar_model.json", "w+") as f:
    json.dump(small_cifar_model, f)

In [2]:
model_files = ["small_mnist_model.json", "small_cifar_model.json"]

In [14]:
def sample_cifar_instances(file_name, num_instances):
    dataset = "CIFAR10"
    train_validation_ratio = 0.8
    fraction_of_dataset = 1
    rng = np.random.default_rng()

    with open(file_name, "w+") as f:
        header_string = "id,dataset,model_type,model_kwargs,optimizer,optimizer_params,seed,batch_size,train_validation_ratio,fraction_of_dataset\n"
        f.write(header_string)
        for i in range(num_instances):
            instance_string = f"{i},{dataset},from_file,small_cifar_model.json,{rng.choice(optimizer_types)},,{rng.choice(seed)},{rng.choice(batch_size)},{train_validation_ratio},{fraction_of_dataset}\n"
            f.write(instance_string)

In [15]:
sample_cifar_instances("sgd_cifar10_variations_train.csv", 10)
sample_cifar_instances("sgd_cifar10_variations_test.csv", 10)

In [5]:
def sample_resnet_instances(file_name, num_instances):
    model_kwargs = "'pytorch/vision:v0.10.0-resnet18-False"
    datasets = ["CIFAR10", "MNIST", "FashionMNIST"]
    train_validation_ratio = 0.8
    fraction_of_dataset = 1
    rng = np.random.default_rng()

    with open(file_name, "w+") as f:
        header_string = "id,dataset,model_type,model_kwargs,optimizer,optimizer_params,seed,batch_size,train_validation_ratio,fraction_of_dataset\n"
        f.write(header_string)
        for i in range(num_instances):
            instance_string = f"{i},{rng.choice(datasets)},from_torchhub,{model_kwargs},{rng.choice(optimizer_types)},,{rng.choice(seed)},{rng.choice(batch_size)},{train_validation_ratio},{fraction_of_dataset}\n"
            f.write(instance_string)

In [6]:
sample_resnet_instances("sgd_resnet18_variations_train.csv", 10)
sample_resnet_instances("sgd_resnet18_variations_test.csv", 10)