In [None]:
from vime_train import encodertrain, MLPperf, encoderperf, mixencodertrain, combinedencodertrain
from vime_utils import preloader, dia_preloader, aucplot, mask_generator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

In [None]:
# show tensorboard
%reload_ext tensorboard
%tensorboard --logdir=MixEncoder_logs/

In [None]:
# load dataset
N0data = preloader("N0_data.csv")
print(N0data.labels.dtype)

diadata = dia_preloader("diabetes_data.csv")
print(diadata.labels.dtype)

In [None]:
# setup seeds and sizes array
seeds = [2127, 10291, 61691, 912811, 44444, 7562, 5678910, 192927, 58517, 5607] # can be one value or multiple depending on how many runs you want to analyze
sizes = [0.1, 0.2625, 0.425, 0.5875, 0.75] # dia #[0.25, 0.375, 0.5, 0.625, 0.75] # N0

# setup result array
results = []

In [None]:
# train MLP

# variable size, seed, and L2
MLP_steps = 4000
MLP_batch_size = 200
MLP_weight_decay = [0.063, 0.062, 0.061, 0.03, 0] #dia #[0.06, 0.058, 0.056, 0.054, 0.04] N0

MLP_epochs = [int((MLP_steps*MLP_batch_size)/(x*800)) for x in sizes]
MLP_results = []

for k in range(len(sizes)):
    MLP_sizeresults = []
    for i in seeds:
        MLP_seedresults, _ = MLPperf(diadata, kfold=False, epochs=MLP_epochs[k], lr=0.0001, seed=i, batch_size=MLP_batch_size, trainsize=sizes[k], weight_decay=MLP_weight_decay[k])
        MLP_sizeresults.append(MLP_seedresults)
    MLP_results.append(MLP_sizeresults)

In [None]:
# append to results
results.append(MLP_results)

In [None]:
# plot results of different sizes and seeds (if used above)
aucplot(results, sizes, title="MLP", save=True)

In [None]:
# train encoder
encodertrain(diadata, kfold=False, epochs=7000, lr=0.0001, alpha=3.0, p_m=0.2, batch_size=200, trainsize=0.9, valsize=0.1)

In [None]:
# train mix encoder
mixencodertrain(diadata, kfold=False, epochs=50000, lr=0.0001, folds=2, batch_size=200, trainsize=0.9, valsize=0.1)

In [None]:
# train VIME encoder-MLP
VIME_checkpoint = "add your VIME checkpoint here"

# variable size and seed
VIME_steps = 4000 # 12000
VIME_batch_size = 200
VIME_weight_decay = [0.05, 0.03, 0.02, 0.005, 0.005] #dia # [0.038, 0.028, 0.028, 0.026, 0.02] #N0
VIME_epochs = [int((VIME_steps*VIME_batch_size)/(x*800)) for x in sizes]
VIME_results = []

for k in range(len(sizes)):
    VIME_sizeresults = []
    for i in seeds:
        VIME_seedresults, _ = encoderperf(diadata, kfold=False, checkpoint=VIME_checkpoint, epochs=VIME_epochs[k], lr=0.0001, seed=i, batch_size=VIME_batch_size, trainsize=sizes[k], weight_decay=VIME_weight_decay[k], en_weight_decay=0.01, encoder_type="VIME")
        VIME_sizeresults.append(VIME_seedresults)
    VIME_results.append(VIME_sizeresults)

In [None]:
# append to results
results.append(VIME_results)

In [None]:
# plot results of different sizes and seeds (if used above)
aucplot(results, sizes, title="VIME Encoder", save=True)

In [None]:
# train Mix encoder-MLP
MIX_checkpoint = "add your MIX checkpoint here"

# variable size and seed
MIX_steps = 1500 # 12000
MIX_batch_size = 200
MIX_weight_decay = [0.08, 0.06, 0.01, 0.005, 0.005] #[0.08, 0.06, 0.05, 0.035, 0.02] #dia #[0.06, 0.059, 0.058, 0.057, 0.055] #N0
MIX_epochs = [int((MIX_steps*MIX_batch_size)/(x*800)) for x in sizes]
MIX_results = []

for k in range(len(sizes)):
    MIX_sizeresults = []
    for i in seeds:
        MIX_seedresults, _ = encoderperf(diadata, kfold=False, checkpoint=MIX_checkpoint, epochs=MIX_epochs[k], lr=0.0001, seed=i, batch_size=MIX_batch_size, trainsize=sizes[k], weight_decay=MIX_weight_decay[k], en_weight_decay=0.01, encoder_type="mix")
        MIX_sizeresults.append(MIX_seedresults)
    MIX_results.append(MIX_sizeresults)

In [None]:
# append to results
results.append(MIX_results)

In [None]:
aucplot(results, sizes, title="Mix Encoder", save=True)

In [None]:
# train combined encoder
checkpoint1 = "add your VIME checkpoint here"
checkpoint2 = "add your MIX checkpoint here"


# variable size and seed
COM_steps = 4000 #3000 # 12000
COM_batch_size = 200
COM_weight_decay = [0.005, 0.005, 0.005, 0.005, 0] # dia #[0.05, 0.049, 0.048, 0.047, 0.045] # N0
COM_encoder_decay = [0, 0, 0, 0, 0] # dia #[0.05, 0.045, 0.035, 0.025, 0.015] # N0
COM_epochs = [int((COM_steps*COM_batch_size)/(x*800)) for x in sizes]
COM_results = []

for k in range(len(sizes)):
    COM_sizeresults = []
    for i in seeds:
        COM_seedresults, _ = combinedencodertrain(diadata, checkpoint=checkpoint1, checkpoint2=checkpoint2, batch_size=COM_batch_size, lr=0.0001, epochs=COM_epochs[k], trainsize=sizes[k], seed=i, weight_decay=COM_weight_decay[k], en_weight_decay=COM_encoder_decay[k])
        COM_sizeresults.append(COM_seedresults)
    COM_results.append(COM_sizeresults)

In [None]:
# append to results
results.append(COM_results)

In [None]:
aucplot(results, sizes, title="Combined Encoder", save=True)