# Generate Experiments


In [1]:
import os, json, glob
import numpy as np

experiment_results_directory_name = 'results'

In [2]:
cfg = {
    "batch_size": 1024,
    "epochs": 10,
    "baseline": False,
    "num_base_examples": 20,
    "target_set_size": 50000,
    "num_inner_steps": 10,
    "inner_batch_size": 10,
    "expname": "expname",
    "source": "emnist",
    "target": "mnist",
    "balanced_source": True,
    "resnet": False,
    "inner_lr": 0.01,
    "random_seed": 1234,
    "num_steps_analysis": False,
    "test_various_models": False,
    "label_smoothing": 0
}

In [3]:
def save_cfg(cfg):
    experiment_name = cfg['expname']
    with open(os.path.join('..', 'experiment_configs', experiment_name + '.json'), 'w') as f:
        json.dump(cfg, f, sort_keys=True)
        
    with open(os.path.join('..', 'experiment_configs', 'configs.txt'), 'a') as f:
        f.write(experiment_name + '\n')

In [4]:
def get_experiment_name(cfg, method="svm", ld=True):
    if ld:
        experiment_name = "ld_{}be_{}ib".format(cfg['num_base_examples'], cfg['inner_batch_size'])
    else:
        experiment_name = "dd_{}be_{}ib".format(cfg['num_base_examples'], cfg['inner_batch_size'])

    if cfg['target_set_size'] not in [45000, 50000]:
        experiment_name += "_{}ts".format(cfg['target_set_size'])
    
    dataset_map = {"emnist": "emn", "mnist": "mn", "fake": "fake", "cub": "cub",
                   "svhn": "svhn", "cifar10": "c10", "kmnist": "km", "imagenet": "imn", "cifar100": "c100", "k49": "k49"}
    
    if cfg['baseline']:
        experiment_name += "_baseline"
    experiment_name += "_" + dataset_map[cfg['source']] + "_to_" + dataset_map[cfg['target']]

    if cfg['balanced_source']:
        experiment_name += "_bl"
    else:
        experiment_name += "_ubl"

    if cfg['label_smoothing']:
        experiment_name += "_ls"
          
    if cfg['resnet']:
        experiment_name += "_rn"
    else:    
        experiment_name += "_cnn"

    if not cfg['baseline']:
        experiment_name += "_{}e".format(cfg['epochs'])
    
    if cfg['num_steps_analysis']:
        experiment_name += "_nsa"

    if cfg['test_various_models']:
        experiment_name += "_tvm"
    
    experiment_name += "_" + method
    experiment_name += "_" + str(cfg['random_seed']) + "s"
    
    experiment_name += "_v"
    
    experiment_configs_pattern = os.path.join('..', 'experiment_configs', experiment_name + '*')
    previous_experiment_configs = glob.glob(experiment_configs_pattern)
    
    version_num = 0
        
    if previous_experiment_configs:
        version_num_cfgs = max(sorted(map(lambda experiment_name: int(experiment_name[len(experiment_configs_pattern)-1:-5]), previous_experiment_configs))) + 1
        if version_num_cfgs > version_num:
            version_num = version_num_cfgs
                   
    experiment_name = experiment_name + str(version_num)
    
    return experiment_name

## Basic Experiments

In [50]:
def ld_create_experiments_2(cfg):
    cfg['test_various_models'] = False
    cfg['baseline'] = False
    cfg['epochs'] = 400
    cfg['balanced_source'] = True
    cfg['resnet'] = False
    cfg['inner_lr'] = 0.01
    cfg['random_seed'] = 1234
    cfg['num_steps_analysis'] = False
    cfg['batch_size'] = 1024

    # LD MNIST
    cfg['source'] = "mnist"
    cfg['target'] = "mnist"
    ld_create_experiments_1(cfg) 

    # LD CIFAR-10
    cfg['source'] = "cifar10"
    cfg['target'] = "cifar10"
    ld_create_experiments_1(cfg)

    # baselines without label smoothing
    cfg['baseline'] = True
    # # MNIST Baseline
    cfg['source'] = "mnist"
    cfg['target'] = "mnist"
    ld_create_experiments_1(cfg)

    # # CIFAR-10 Baseline
    cfg['source'] = "cifar10"
    cfg['target'] = "cifar10"
    ld_create_experiments_1(cfg) 

    cfg['baseline'] = False

    # baselines with label smoothing
    cfg['baseline'] = True
    # # MNIST Baseline
    cfg['label_smoothing'] = 0.1
    cfg['source'] = "mnist"
    cfg['target'] = "mnist"
    ld_create_experiments_1(cfg)

    # # CIFAR-10 Baseline
    cfg['source'] = "cifar10"
    cfg['target'] = "cifar10"
    ld_create_experiments_1(cfg) 

    cfg['baseline'] = False
    cfg['label_smoothing'] = 0

    # for cross-dataset cases, the source does not need to be balanced
    cfg['balanced_source'] = False

    # LD E to M
    cfg['source'] = "emnist"
    cfg['target'] = "mnist"
    ld_create_experiments_1(cfg)

    # LD E to K
    cfg['source'] = "emnist"
    cfg['target'] = "kmnist"
    ld_create_experiments_1(cfg)

    # LD B to C
    cfg['source'] = "cub"
    cfg['target'] = "cifar10"
    ld_create_experiments_1(cfg)

    # LD E to K49
    cfg['source'] = "emnist"
    cfg['target'] = "k49"
    ld_create_experiments_1(cfg)

    cfg['balanced_source'] = True


def ld_create_experiments_1(cfg):
    be_list = [10, 20, 50, 100, 200, 500]
    for be_num in be_list:
        cfg['num_base_examples'] = be_num
        if cfg['num_base_examples'] == 10:
            cfg['inner_batch_size'] = 10
        elif cfg['num_base_examples'] == 20:
            cfg['inner_batch_size'] = 10
        elif cfg['num_base_examples'] == 50:
            cfg['inner_batch_size'] = 25
        elif cfg['num_base_examples'] == 100:
            cfg['inner_batch_size'] = 50
        else:
            cfg['inner_batch_size'] = 50

        if cfg['target'] == "k49":
            if cfg['num_base_examples'] > 100:
                cfg['epochs'] = 200
            else:
                cfg['epochs'] = 100
        else:
            if cfg['num_base_examples'] > 100:
                cfg['epochs'] = 800
            else:
                cfg['epochs'] = 400
        
        cfg['expname'] = get_experiment_name(cfg, method="or2")  # choose or2 or rr
        print(cfg['expname'])
        save_cfg(cfg) 

In [32]:
ld_create_experiments_2(cfg)

ld_10be_10ib_mn_to_mn_bl_cnn_400e_or2_1234s_v0
ld_20be_10ib_mn_to_mn_bl_cnn_400e_or2_1234s_v0
ld_50be_25ib_mn_to_mn_bl_cnn_400e_or2_1234s_v0
ld_100be_50ib_mn_to_mn_bl_cnn_400e_or2_1234s_v0
ld_200be_50ib_mn_to_mn_bl_cnn_800e_or2_1234s_v0
ld_500be_50ib_mn_to_mn_bl_cnn_800e_or2_1234s_v0
ld_10be_10ib_c10_to_c10_bl_cnn_400e_or2_1234s_v0
ld_20be_10ib_c10_to_c10_bl_cnn_400e_or2_1234s_v0
ld_50be_25ib_c10_to_c10_bl_cnn_400e_or2_1234s_v0
ld_100be_50ib_c10_to_c10_bl_cnn_400e_or2_1234s_v0
ld_200be_50ib_c10_to_c10_bl_cnn_800e_or2_1234s_v0
ld_500be_50ib_c10_to_c10_bl_cnn_800e_or2_1234s_v0
ld_10be_10ib_baseline_mn_to_mn_bl_cnn_or2_1234s_v0
ld_20be_10ib_baseline_mn_to_mn_bl_cnn_or2_1234s_v0
ld_50be_25ib_baseline_mn_to_mn_bl_cnn_or2_1234s_v0
ld_100be_50ib_baseline_mn_to_mn_bl_cnn_or2_1234s_v0
ld_200be_50ib_baseline_mn_to_mn_bl_cnn_or2_1234s_v0
ld_500be_50ib_baseline_mn_to_mn_bl_cnn_or2_1234s_v0
ld_10be_10ib_baseline_c10_to_c10_bl_cnn_or2_1234s_v0
ld_20be_10ib_baseline_c10_to_c10_bl_cnn_or2_1234s_v0
ld_

In [51]:
def c100_create_experiments_2(cfg):
    cfg['test_various_models'] = False
    cfg['baseline'] = False
    cfg['epochs'] = 800
    cfg['balanced_source'] = True
    cfg['resnet'] = False
    cfg['inner_lr'] = 0.01
    cfg['random_seed'] = 1234
    cfg['num_steps_analysis'] = False
    cfg['batch_size'] = 1024

    # LD CIFAR-100
    cfg['source'] = "cifar100"
    cfg['target'] = "cifar100"
    c100_create_experiments_1(cfg)

    # also do baselines
    cfg['baseline'] = True

    cfg['source'] = "cifar100"
    cfg['target'] = "cifar100"
    c100_create_experiments_1(cfg)

    cfg['label_smoothing'] = 0.1
    cfg['source'] = "cifar100"
    cfg['target'] = "cifar100"
    c100_create_experiments_1(cfg)

    cfg['baseline'] = False
    cfg['label_smoothing'] = 0


def c100_create_experiments_1(cfg):
    cfg['num_base_examples'] = 100
    cfg['inner_batch_size'] = 100
    cfg['expname'] = get_experiment_name(cfg, method="or2")  # choose or2 or rr
    print(cfg['expname'])
    save_cfg(cfg) 

In [34]:
c100_create_experiments_2(cfg)

ld_100be_100ib_c100_to_c100_bl_cnn_800e_or2_1234s_v0
ld_100be_100ib_baseline_c100_to_c100_bl_cnn_or2_1234s_v0
ld_100be_100ib_baseline_c100_to_c100_bl_ls_cnn_or2_1234s_v0


## Experiments for Analysis

In [57]:
def ld_analysis_create_experiments_2(cfg):
    cfg['test_various_models'] = False
    cfg['baseline'] = False
    cfg['epochs'] = 400
    cfg['balanced_source'] = True
    cfg['resnet'] = False
    cfg['inner_lr'] = 0.01
    cfg['random_seed'] = 1234
    cfg['num_steps_analysis'] = False
    cfg['batch_size'] = 1024

    # Analysis of using various base examples
    # use 5 random seeds, do LD MNIST 100 base examples with this and also LD E to M.
    random_seeds = [0, 465, 1234, 5439, 89432]
    for random_seed in random_seeds:
        cfg['random_seed'] = random_seed
        cfg['source'] = "mnist"
        cfg['target'] = "mnist"
        ld_analysis_create_experiment(cfg) 

        cfg['balanced_source'] = False
        cfg['source'] = "emnist"
        cfg['target'] = "mnist"
        ld_analysis_create_experiment(cfg) 
        cfg['balanced_source'] = True

    cfg['random_seed'] = 1234

    # Analysis of variable number of examples from the target set
    # this makes sense only for the cross dataset version, so do it just with LD E to M 100 base examples
    # Try the following numbers: [100, 500, 1000, 5000, 10000, 20000, all]
    ts_num_list = [100, 500, 1000, 5000, 10000, 20000]
    cfg['balanced_source'] = False
    for ts_num in ts_num_list:
        cfg['target_set_size'] = ts_num
        cfg['source'] = "emnist"
        cfg['target'] = "mnist"
        ld_analysis_create_experiment(cfg) 
    
    cfg['balanced_source'] = True
    cfg['target_set_size'] = 50000

    # Analysis of the impact of using different numbers of steps for training a test model
    # LD MNIST 100 base examples with this and also LD E to M.
    cfg['num_steps_analysis'] = True
    cfg['source'] = "mnist"
    cfg['target'] = "mnist"
    ld_analysis_create_experiment(cfg) 

    cfg['balanced_source'] = False
    cfg['source'] = "emnist"
    cfg['target'] = "mnist"
    ld_analysis_create_experiment(cfg)  
    cfg['balanced_source'] = True

    cfg['num_steps_analysis'] = False
    

def ld_analysis_create_experiment(cfg):
    cfg['num_base_examples'] = 100
    cfg['inner_batch_size'] = 50
    cfg['expname'] = get_experiment_name(cfg, method="or2")  # choose or2 or rr
    print(cfg['expname'])
    save_cfg(cfg) 


def ld_arch_analysis_create_experiments_2(cfg):
    cfg['test_various_models'] = False
    cfg['baseline'] = False
    cfg['epochs'] = 400
    cfg['balanced_source'] = True
    cfg['resnet'] = False
    cfg['inner_lr'] = 0.01
    cfg['random_seed'] = 1234
    cfg['num_steps_analysis'] = False
    cfg['batch_size'] = 1024

    cfg['test_various_models'] = True

    # LD CIFAR-10
    cfg['source'] = "cifar10"
    cfg['target'] = "cifar10"
    ld_arch_analysis_create_experiment(cfg) 

    cfg['baseline'] = True

    # CIFAR-10 Baseline
    cfg['source'] = "cifar10"
    cfg['target'] = "cifar10"
    ld_arch_analysis_create_experiment(cfg) 

    cfg['baseline'] = False
    # for cross-dataset cases, the source does not need to be balanced
    cfg['balanced_source'] = False

    # LD B to C
    cfg['source'] = "cub"
    cfg['target'] = "cifar10"
    ld_arch_analysis_create_experiment(cfg)

    cfg['balanced_source'] = True

    cfg['test_various_models'] = False


def ld_arch_analysis_create_experiment(cfg):
    be_list = [10, 20, 50, 100, 200, 500]
    for be_num in be_list:
        cfg['num_base_examples'] = be_num
        if cfg['num_base_examples'] == 10:
            cfg['inner_batch_size'] = 10
        elif cfg['num_base_examples'] == 20:
            cfg['inner_batch_size'] = 10
        elif cfg['num_base_examples'] == 50:
            cfg['inner_batch_size'] = 25
        elif cfg['num_base_examples'] == 100:
            cfg['inner_batch_size'] = 50
        else:
            cfg['inner_batch_size'] = 50
        
        cfg['expname'] = get_experiment_name(cfg, method="or2")  # choose or2 or rr
        print(cfg['expname'])
        save_cfg(cfg) 


In [43]:
ld_analysis_create_experiments_2(cfg)

ld_100be_50ib_mn_to_mn_bl_cnn_400e_rr_0s_v0
ld_100be_50ib_emn_to_mn_ubl_cnn_400e_rr_0s_v0
ld_100be_50ib_mn_to_mn_bl_cnn_400e_rr_465s_v0
ld_100be_50ib_emn_to_mn_ubl_cnn_400e_rr_465s_v0
ld_100be_50ib_mn_to_mn_bl_cnn_400e_rr_1234s_v1
ld_100be_50ib_emn_to_mn_ubl_cnn_400e_rr_1234s_v1
ld_100be_50ib_mn_to_mn_bl_cnn_400e_rr_5439s_v0
ld_100be_50ib_emn_to_mn_ubl_cnn_400e_rr_5439s_v0
ld_100be_50ib_mn_to_mn_bl_cnn_400e_rr_89432s_v0
ld_100be_50ib_emn_to_mn_ubl_cnn_400e_rr_89432s_v0
ld_100be_50ib_100ts_emn_to_mn_ubl_cnn_400e_rr_1234s_v0
ld_100be_50ib_500ts_emn_to_mn_ubl_cnn_400e_rr_1234s_v0
ld_100be_50ib_1000ts_emn_to_mn_ubl_cnn_400e_rr_1234s_v0
ld_100be_50ib_5000ts_emn_to_mn_ubl_cnn_400e_rr_1234s_v0
ld_100be_50ib_10000ts_emn_to_mn_ubl_cnn_400e_rr_1234s_v0
ld_100be_50ib_20000ts_emn_to_mn_ubl_cnn_400e_rr_1234s_v0
ld_100be_50ib_mn_to_mn_bl_cnn_400e_nsa_rr_1234s_v0
ld_100be_50ib_emn_to_mn_ubl_cnn_400e_nsa_rr_1234s_v0


In [58]:
ld_arch_analysis_create_experiments_2(cfg)

ld_10be_10ib_c10_to_c10_bl_cnn_400e_tvm_or2_1234s_v0
ld_20be_10ib_c10_to_c10_bl_cnn_400e_tvm_or2_1234s_v0
ld_50be_25ib_c10_to_c10_bl_cnn_400e_tvm_or2_1234s_v0
ld_100be_50ib_c10_to_c10_bl_cnn_400e_tvm_or2_1234s_v0
ld_200be_50ib_c10_to_c10_bl_cnn_400e_tvm_or2_1234s_v0
ld_500be_50ib_c10_to_c10_bl_cnn_400e_tvm_or2_1234s_v0
ld_10be_10ib_baseline_c10_to_c10_bl_cnn_tvm_or2_1234s_v0
ld_20be_10ib_baseline_c10_to_c10_bl_cnn_tvm_or2_1234s_v0
ld_50be_25ib_baseline_c10_to_c10_bl_cnn_tvm_or2_1234s_v0
ld_100be_50ib_baseline_c10_to_c10_bl_cnn_tvm_or2_1234s_v0
ld_200be_50ib_baseline_c10_to_c10_bl_cnn_tvm_or2_1234s_v0
ld_500be_50ib_baseline_c10_to_c10_bl_cnn_tvm_or2_1234s_v0
ld_10be_10ib_cub_to_c10_ubl_cnn_400e_tvm_or2_1234s_v0
ld_20be_10ib_cub_to_c10_ubl_cnn_400e_tvm_or2_1234s_v0
ld_50be_25ib_cub_to_c10_ubl_cnn_400e_tvm_or2_1234s_v0
ld_100be_50ib_cub_to_c10_ubl_cnn_400e_tvm_or2_1234s_v0
ld_200be_50ib_cub_to_c10_ubl_cnn_400e_tvm_or2_1234s_v0
ld_500be_50ib_cub_to_c10_ubl_cnn_400e_tvm_or2_1234s_v0


# Dataset distillation
Change the name for this first

In [38]:
def dd_create_experiments_2(cfg):
    cfg['test_various_models'] = False
    cfg['baseline'] = False
    cfg['epochs'] = 400
    cfg['balanced_source'] = True
    cfg['resnet'] = False
    cfg['inner_lr'] = 0.01
    cfg['random_seed'] = 1234
    cfg['num_steps_analysis'] = False
    cfg['batch_size'] = 1024

    # DD MNIST - 10, 20, 50 and 100 base examples
    cfg['source'] = "mnist"
    cfg['target'] = "mnist"
    dd_create_experiment(cfg) 

    # DD CIFAR-10 - 10, 20, 50 and 100 base examples
    cfg['source'] = "cifar10"
    cfg['target'] = "cifar10"
    dd_create_experiment(cfg)

def dd_create_experiment(cfg):
    be_list = [10, 20, 50, 100]
    for be_num in be_list:
        cfg['num_base_examples'] = be_num
        if cfg['num_base_examples'] == 10:
            cfg['inner_batch_size'] = 10
        elif cfg['num_base_examples'] == 20:
            cfg['inner_batch_size'] = 10
        elif cfg['num_base_examples'] == 50:
            cfg['inner_batch_size'] = 25
        elif cfg['num_base_examples'] == 100:
            cfg['inner_batch_size'] = 50
        else:
            cfg['inner_batch_size'] = 50
        
        cfg['expname'] = get_experiment_name(cfg, method="or2", ld=False)  # choose or2 or rr
        print(cfg['expname'])
        save_cfg(cfg) 

In [39]:
dd_create_experiments_2(cfg)

dd_10be_10ib_mn_to_mn_bl_cnn_400e_or2_1234s_v0
dd_20be_10ib_mn_to_mn_bl_cnn_400e_or2_1234s_v0
dd_50be_25ib_mn_to_mn_bl_cnn_400e_or2_1234s_v0
dd_100be_50ib_mn_to_mn_bl_cnn_400e_or2_1234s_v0
dd_10be_10ib_c10_to_c10_bl_cnn_400e_or2_1234s_v0
dd_20be_10ib_c10_to_c10_bl_cnn_400e_or2_1234s_v0
dd_50be_25ib_c10_to_c10_bl_cnn_400e_or2_1234s_v0
dd_100be_50ib_c10_to_c10_bl_cnn_400e_or2_1234s_v0
