In [None]:
# automatically upload modules
# %load_ext autoreload
# %autoreload 2

In [1]:
import numpy as np

In [2]:
from argparse import Namespace
import os, sys
import pandas as pd
#import pickle
from ray import tune
import ConfigSpace as CS
from ray.tune.suggest.bohb import TuneBOHB
from ray.tune.schedulers import HyperBandForBOHB
import torch
import argparse # for ray distributed training


from genome_embeddings import data_viz
from genome_embeddings import evaluate
from genome_embeddings import models
from genome_embeddings import train_test
from genome_embeddings import util
from genome_embeddings import trainable # import before ray (?)
import ray

running memcache server
memcache server started


In [None]:
os.system("rm file_out")
os.system("rm file_err")

sys.stdout = open('file_out', 'w')
sys.stderr = open('file_err', 'w')

In [3]:
# settings = Namespace(
#     DATA_FP = '/home/ndudek/projects/def-dprecup/ndudek/',
#     SAVE_FP = '/home/ndudek/projects/def-dprecup/ndudek/',
#     num_epochs = 2,
#     num_cpus=1)

settings = Namespace(
    DATA_FP = '/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/data/', 
    SAVE_FP = '/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/',
    num_epochs = 2,
    num_cpus=5)

In [4]:
flags = Namespace(
    KEGG_FP = '/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/kegg_dataset/',
    data_source = 'kegg', #['get_homologues' | 'kegg']
    n_test = 0.1, # train-test split, n * 100 = % of data that goes into test set (e.g.: 0.1 -> 10%)
    batch_size = 128,
    lr = 1e-3,
    kfolds = 10, # number of folds for cross-validation
    print_every = 50, # print loss every n batches during training (5)
    replacement_threshold = 0.5, # probability over which binarizer converts to a 1
    num_corruptions = 100, # number of corrupted versions of a genome to produce
    corruption_fraction = 0.5, # fraction of genes to retain during corruption process
    phy_mode = "bacteria_only", # training with only bacteria vs also euk/arch
    cirriculum = False, # implement cirriculum learning based on gene count
    rare_threshold = 10, # drop features that occur fewer than this times in training ds 
    weight_decay=0.1 # L2 regularization
    )

In [None]:
print('done loading modules and setting Namespace variables')

### Data exploration + preprocessing 

In [None]:
# # First create genome representations (very slow)
# # Each genome is a list of KO's and/or KEGG modules
# if os.path.isfile(settings.DATA_FP+'genome_to_mod.csv'):
#     print("Genome representations already exist")
# else:
#     genome_rep.genome_kos(flags.KEGG_FP)
#     print("Must generate genome representations from scratch. This will take several hours.")

In [None]:
df, cluster_names = util.load_data(settings.DATA_FP, flags.data_source)
genome_to_tax = util.genome_to_tax(df)
np.save(settings.DATA_FP+'genome_to_tax.npy', genome_to_tax) 

In [None]:
#data_viz.tax_distrib(df, genome_to_tax)

In [None]:
#data_viz.module_stats(df)

In [None]:
#data_viz.genes_per_genome(df)

In [None]:
# genome_to_tax = util.genome_to_tax(df)

In [None]:
# train_orig = pd.read_csv(settings.DATA_FP+"uncorrupted_train_balanced.csv", index_col=0)

In [None]:
# train_genomes = list(train_orig.index)

In [None]:
# Split train-test sets in a phylogenetically balanced manner 
if os.path.isfile(settings.DATA_FP+'uncorrupted_train_balanced.csv'):
    print("Train-test split already exists, loading from file")
    train_orig = pd.read_csv(settings.DATA_FP+"uncorrupted_train_balanced.csv", index_col=0)    
    test_orig = pd.read_csv(settings.DATA_FP+"uncorrupted_test_balanced.csv", index_col=0)    

else:
    # Create dict mapping each genome to a unique numerical ID
    genome_to_num ={}
    for i,genome in enumerate(df.index):
        genome_to_num[genome] = i

    num_to_genome = {v: k for k, v in genome_to_num.items()}
        
    print("Generating train-test split")
    train_orig, test_orig = util.balanced_split(df, flags.n_test, genome_to_tax, 
                                                num_to_genome, settings.DATA_FP)    
    train_orig.to_csv(settings.DATA_FP+'uncorrupted_train_balanced.csv')
    test_orig.to_csv(settings.DATA_FP+'uncorrupted_test_balanced.csv')

In [None]:
#data_viz.hist_prob_ko(train_orig)

In [None]:
if flags.phy_mode == "bacteria_only":
    train_genomes = train_orig.index.to_list()
    test_genomes = test_orig.index.to_list()
    
    unf_train_data, train_tax_dict = util.bacteria_only(train_orig, train_genomes, genome_to_tax)
    unf_test_data, test_tax_dict = util.bacteria_only(test_orig, test_genomes, genome_to_tax)

In [None]:
# Remove rare features from train + test datasets
# Rare = fewer than n occurences in training dataset
# Last argument specifies n, set to correspond to 1% of genomes (3432 genomes -> n = 34)
# Remove genes occuring in <1.1% of genomes ---> extra 0.1 is to make there be an even number of features
#     An even number of features is essential for having the autoencoder layers work out properly
train_data, test_data, cluster_names = util.remove_rare(unf_train_data, unf_test_data, 
                                                        cluster_names, unf_train_data.shape[0]*0.01)

In [None]:
# Produce corrupted genomes
# Could eventually do re-sampling / extra-corrupting to have more examples of "rare" genome types
#    e.g.: those from underrepresented groups M00003   

if os.path.isfile(settings.DATA_FP+'corrupted_train_07-17-20.pt'):
    print("Corrupted genomes already exist")
    train_data = torch.load(settings.DATA_FP+"corrupted_train_07-17-20.pt")
    test_data = torch.load(settings.DATA_FP+"corrupted_test_07-17-20.pt")
    genome_idx_train = torch.load(settings.DATA_FP+"genome_idx_train_07-17-20.pt")
    genome_idx_test = torch.load(settings.DATA_FP+"genome_idx_test_07-17-20.pt")
else:
    print("Generating corrupted dataset from scratch with",flags.num_corruptions,"corrupted versions of each genome")
    train_data, genome_idx_train = util.corrupt(train_data, flags.num_corruptions, flags.corruption_fraction, 
                                                    cluster_names, "train", settings.DATA_FP)
    print("Finished training data, starting test")
    test_data, genome_idx_test = util.corrupt(test_data, flags.num_corruptions, flags.corruption_fraction, 
                                                  cluster_names, "test", settings.DATA_FP)

In [None]:
sys.stdout.flush()
sys.stderr.flush()

In [None]:
#import numpy as np # this is slooooow
#np.savetxt(settings.DATA_FP+"corrupted_train_1407.txt", train_data.numpy())
#np.savetxt(settings.DATA_FP+"corrupted_test_1407.txt", test_data.numpy())

In [None]:
# print(("There are %s genomes and %s features in the training dataset") % 
#       (train_data.shape[0],int(train_data.shape[1]/2)))

# print(("There are %s genomes and %s features in the test dataset") % 
#       (test_data.shape[0],int(test_data.shape[1]/2)))

### Define and train network

In [None]:
# print(train_data.shape[1])

In [None]:
# num_features = int(train_data.shape[1]/2) # Number of features in the entire dataset (train + test)

In [None]:
# # define the network
# model = models.AutoEncoder(num_features, 2)
# print(model)

In [None]:
sys.stdout.flush()
sys.stderr.flush()

In [5]:
config_space = CS.ConfigurationSpace()

config_space.add_hyperparameter(
    CS.CategoricalHyperparameter(name='nn_layers', choices=[1, 2])) #, 3, 4])) 

config_space.add_hyperparameter(
    CS.CategoricalHyperparameter(name='batch_size', choices=[32])) #, 64, 128, 256]))

#Optimizer = Adam -- LR less important
config_space.add_hyperparameter(
    CS.UniformFloatHyperparameter('lr', lower=1e-4, upper=1e-1, log=True))

config_space.add_hyperparameter(
    CS.UniformFloatHyperparameter('weight_decay', lower=1e-5, upper=1e-2, log=True))

algo = TuneBOHB(
    config_space, max_concurrent=4, metric='test_f1', mode='max')

bohb = HyperBandForBOHB(
    time_attr='training_iteration',
    metric='test_f1',
    mode='max',
    max_t=100, 
    reduction_factor=3)

In [6]:
memory = 2000 * 1024 * 1024
object_store_memory = 200 * 1024 * 1024
driver_object_store_memory=100 * 1024 * 1024
ray.shutdown()
ray.init(local_mode=True, memory=memory, 
        object_store_memory=object_store_memory,
        driver_object_store_memory=driver_object_store_memory,
        num_cpus=1)

# redis_password = sys.argv[1]
# num_cpus = int(sys.argv[2])

# ray.init(address=os.environ["ip_head"], redis_password=redis_password)

{}

In [7]:
config = {"num_workers": 1,
         "num_epochs":settings.num_epochs,
         "kfolds":flags.kfolds,
         "replacement_threshold": flags.replacement_threshold}

In [8]:
# num_epochs = 10

In [9]:
analysis = tune.run(
    trainable.train_AE, 
    name="exp_1",
    config=config,
    search_alg=algo,
    verbose=2, 
    resources_per_trial={
            "cpu": settings.num_cpus,
            "gpu": 0
    },
    num_samples=10,  #BUMP UP TO 1000
    scheduler=bohb,
    local_dir=settings.SAVE_FP+"TUNE_RESULT_DIR",
    stop=trainable.EarlyStopping("test_f1") # if search results aren't improving anymore
    )

#print("Best config is:", analysis.get_best_config(metric="test_f1"))

Trial name,status,loc,batch_size,weight_decay,nn_layers,lr
train_AE_491eb01e,RUNNING,,,,,
train_AE_491ede7c,PENDING,,,,,
train_AE_491f0140,PENDING,,,,,
train_AE_491f2ab2,PENDING,,,,,


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr
train_AE_491eb01e,RUNNING,,,,,
train_AE_491ede7c,RUNNING,,,,,
train_AE_491f0140,PENDING,,,,,
train_AE_491f2ab2,PENDING,,,,,


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,3.84208,1.0
train_AE_491ede7c,PAUSED,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,RUNNING,,,,,,,
train_AE_491f2ab2,RUNNING,,,,,,,
train_AE_491f500a,PENDING,,,,,,,
train_AE_6167d786,PENDING,,,,,,,


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,3.84208,1.0
train_AE_491ede7c,PAUSED,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,PAUSED,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,RUNNING,,,,,,,
train_AE_491f500a,RUNNING,,,,,,,
train_AE_6167d786,PENDING,,,,,,,
train_AE_624eec16,PENDING,,,,,,,


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,3.84208,1.0
train_AE_491ede7c,PAUSED,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,PAUSED,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,RUNNING,,,,,,,
train_AE_491f500a,PAUSED,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,RUNNING,,,,,,,
train_AE_624eec16,PENDING,,,,,,,
train_AE_659075e8,PENDING,,,,,,,


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,3.84208,1.0
train_AE_491ede7c,PAUSED,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,PAUSED,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,PAUSED,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PAUSED,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,RUNNING,,,,,,,
train_AE_624eec16,RUNNING,,,,,,,
train_AE_659075e8,PENDING,,,,,,,
train_AE_7628b12c,PENDING,,,,,,,


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,3.84208,1.0
train_AE_491ede7c,PAUSED,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,PAUSED,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,PAUSED,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PAUSED,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,RUNNING,,,,,,,
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,RUNNING,,,,,,,
train_AE_7628b12c,PENDING,,,,,,,
train_AE_8a292b0c,PENDING,,,,,,,

Trial name,# failures,error file
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,3.84208,1.0
train_AE_491ede7c,PAUSED,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,PAUSED,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,PAUSED,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PAUSED,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,PAUSED,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,RUNNING,,,,,,,
train_AE_7628b12c,RUNNING,,,,,,,
train_AE_8a292b0c,PENDING,,,,,,,

Trial name,# failures,error file
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,3.84208,1.0
train_AE_491ede7c,PAUSED,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,PAUSED,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,PAUSED,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PAUSED,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,PAUSED,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PAUSED,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,RUNNING,,,,,,,
train_AE_8a292b0c,RUNNING,,,,,,,

Trial name,# failures,error file
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,RUNNING,,32.0,0.000429243,1.0,0.00717318,3.84208,1.0
train_AE_491ede7c,PENDING,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,PENDING,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,PENDING,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PENDING,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,PENDING,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,RUNNING,192.168.0.107:72087,32.0,0.000429243,1.0,0.00717318,37.5264,1.0
train_AE_491ede7c,RUNNING,,32.0,7.6929e-05,1.0,0.00526103,36.712,1.0
train_AE_491f0140,PENDING,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,PENDING,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PENDING,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,PENDING,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,RUNNING,192.168.0.107:72087,32.0,0.000429243,1.0,0.00717318,65.4835,2.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,1.96999,1.0
train_AE_491f0140,PENDING,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,PENDING,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PENDING,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,PENDING,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,1.96999,1.0
train_AE_491f0140,RUNNING,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,PENDING,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PENDING,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,PENDING,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,1.96999,1.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,RUNNING,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,PENDING,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,PENDING,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,1.96999,1.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,RUNNING,,32.0,0.00566997,1.0,0.0731945,27.8273,1.0
train_AE_6167d786,PENDING,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,1.96999,1.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,RUNNING,192.168.0.107:72087,32.0,0.00566997,1.0,0.0731945,10.8028,1.0
train_AE_6167d786,PENDING,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,1.96999,1.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,ERROR,,32.0,0.00566997,1.0,0.0731945,10.8028,1.0
train_AE_6167d786,RUNNING,,32.0,0.00079759,2.0,0.000497296,33.4056,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_491f500a,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f500a_5_batch_size=32,kfolds=10,lr=0.073194,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-31-43515h60f8/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,1.96999,1.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,ERROR,,32.0,0.00566997,1.0,0.0731945,10.8028,1.0
train_AE_6167d786,RUNNING,192.168.0.107:72087,32.0,0.00079759,2.0,0.000497296,13.4093,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,PENDING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_491f500a,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f500a_5_batch_size=32,kfolds=10,lr=0.073194,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-31-43515h60f8/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,1.96999,1.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,ERROR,,32.0,0.00566997,1.0,0.0731945,10.8028,1.0
train_AE_6167d786,ERROR,,32.0,0.00079759,2.0,0.000497296,13.4093,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,RUNNING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_491f500a,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f500a_5_batch_size=32,kfolds=10,lr=0.073194,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-31-43515h60f8/error.txt"
train_AE_6167d786,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_6167d786_6_batch_size=32,kfolds=10,lr=0.0004973,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-32-11onciscfi/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,63.4746,2.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,ERROR,,32.0,0.00566997,1.0,0.0731945,10.8028,1.0
train_AE_6167d786,ERROR,,32.0,0.00079759,2.0,0.000497296,13.4093,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,RUNNING,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,PENDING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_491f500a,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f500a_5_batch_size=32,kfolds=10,lr=0.073194,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-31-43515h60f8/error.txt"
train_AE_6167d786,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_6167d786_6_batch_size=32,kfolds=10,lr=0.0004973,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-32-11onciscfi/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,63.4746,2.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,ERROR,,32.0,0.00566997,1.0,0.0731945,10.8028,1.0
train_AE_6167d786,ERROR,,32.0,0.00079759,2.0,0.000497296,13.4093,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,ERROR,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,RUNNING,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,PENDING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_491f500a,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f500a_5_batch_size=32,kfolds=10,lr=0.073194,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-31-43515h60f8/error.txt"
train_AE_6167d786,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_6167d786_6_batch_size=32,kfolds=10,lr=0.0004973,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-32-11onciscfi/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"
train_AE_659075e8,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_659075e8_8_batch_size=32,kfolds=10,lr=0.0026055,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-33-20a_pi4azi/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,63.4746,2.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,ERROR,,32.0,0.00566997,1.0,0.0731945,10.8028,1.0
train_AE_6167d786,ERROR,,32.0,0.00079759,2.0,0.000497296,13.4093,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,ERROR,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,ERROR,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,RUNNING,,32.0,0.000130895,1.0,0.0403537,37.4428,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_491f500a,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f500a_5_batch_size=32,kfolds=10,lr=0.073194,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-31-43515h60f8/error.txt"
train_AE_6167d786,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_6167d786_6_batch_size=32,kfolds=10,lr=0.0004973,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-32-11onciscfi/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"
train_AE_659075e8,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_659075e8_8_batch_size=32,kfolds=10,lr=0.0026055,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-33-20a_pi4azi/error.txt"
train_AE_7628b12c,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_7628b12c_9_batch_size=32,kfolds=10,lr=0.015703,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-33-2764hfsck3/error.txt"


Trial name,status,loc,batch_size,weight_decay,nn_layers,lr,total time (s),iter
train_AE_491eb01e,PAUSED,,32.0,0.000429243,1.0,0.00717318,70.1596,3.0
train_AE_491ede7c,RUNNING,192.168.0.107:72087,32.0,7.6929e-05,1.0,0.00526103,63.4746,2.0
train_AE_491f0140,ERROR,,32.0,0.000135728,2.0,0.00201257,1.49926,1.0
train_AE_491f2ab2,ERROR,,32.0,0.00247233,1.0,0.0189697,5.45071,1.0
train_AE_491f500a,ERROR,,32.0,0.00566997,1.0,0.0731945,10.8028,1.0
train_AE_6167d786,ERROR,,32.0,0.00079759,2.0,0.000497296,13.4093,1.0
train_AE_624eec16,ERROR,,,,,,,
train_AE_659075e8,ERROR,,32.0,4.12061e-05,1.0,0.00260545,6.72662,1.0
train_AE_7628b12c,ERROR,,32.0,9.4749e-05,2.0,0.0157029,35.1312,1.0
train_AE_8a292b0c,RUNNING,192.168.0.107:72087,32.0,0.000130895,1.0,0.0403537,5.4411,1.0

Trial name,# failures,error file
train_AE_491f0140,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f0140_3_batch_size=32,kfolds=10,lr=0.0020126,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-31-37e0kuzr_q/error.txt"
train_AE_491f2ab2,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f2ab2_4_batch_size=32,kfolds=10,lr=0.01897,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-31-38bjjwo34c/error.txt"
train_AE_491f500a,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_491f500a_5_batch_size=32,kfolds=10,lr=0.073194,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-31-43515h60f8/error.txt"
train_AE_6167d786,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_6167d786_6_batch_size=32,kfolds=10,lr=0.0004973,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-32-11onciscfi/error.txt"
train_AE_624eec16,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_624eec16_7_batch_size=32,kfolds=10,lr=0.01129,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_dec_2020-07-24_10-32-45kyhu8ujy/error.txt"
train_AE_659075e8,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_659075e8_8_batch_size=32,kfolds=10,lr=0.0026055,nn_layers=1,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_d_2020-07-24_10-33-20a_pi4azi/error.txt"
train_AE_7628b12c,1,"/Users/natasha/Desktop/mcgill_postdoc/ncbi_genomes/genome_embeddings/TUNE_RESULT_DIR/exp_1/train_AE_7628b12c_9_batch_size=32,kfolds=10,lr=0.015703,nn_layers=2,num_epochs=2,num_workers=1,replacement_threshold=0.5,weight_de_2020-07-24_10-33-2764hfsck3/error.txt"


KeyboardInterrupt: 

In [None]:
print("Best config is:", analysis.get_best_config(metric="test_f1"))

In [None]:
# for i in train_vars:
#     if isinstance(train_vars[i], dict):
#         print("Best "+i+":", analysis.get_best_config(metric="test_f1")[i])
for i in config_space:
    print("Best "+i+":", analysis.get_best_config(metric="test_f1")[i])

In [None]:
# analysis = tune.run(
#     trainable.train_AE, 
#     name="exp_1",
#     config=train_vars, 
#     verbose=2, 
#     resources_per_trial={
#             "cpu": 2,
#             "gpu": 0
#     },
#     num_samples=2,
#     scheduler=ASHAScheduler(metric="test_f1", mode="max", grace_period=1, time_attr="n_batch"),
#     local_dir=settings.SAVE_FP+"TUNE_RESULT_DIR"
#     )

# print("Best config is:", analysis.get_best_config(metric="test_f1"))

In [None]:
# analysis.dataframe()["logdir"][0]

In [None]:
# train the model
# train_losses, test_losses, train_f1_scores, test_f1_scores = train_test.train_model(loaders, 
#         model, settings.num_epochs, flags.print_every,
#         settings.SAVE_FP, flags.replacement_threshold, cluster_names, flags.cirriculum, train_data[:,:len(cluster_names)],
#         search_space)
#train_losses, test_losses, train_f1_scores, test_f1_scores = train_test.train_model(train_vars, hyperparams)

### Evaluate model performance

In [None]:
# # evaluate model performance
# perf_lc = data_viz.learning_curve(train_f1_scores, test_f1_scores, "performance", flags.cirriculum)

In [None]:
# # evaluate model performance
# optim_lc = data_viz.learning_curve(train_losses, test_losses, "optimization", flags.cirriculum)

In [None]:
# # first convert test_data from subset -> tensor, split corrupt vs target sets
# tensor_test_data = torch.tensor([i.numpy() for i in test_data]).float()
# corrupt_test_data = tensor_test_data[:,:num_features]
# target = tensor_test_data[:,num_features:].detach().numpy()

In [None]:
# # Generate probabilities for ROC curve
# model.eval()
# with torch.no_grad():
#     y_probas = model(corrupt_test_data) # predicted probabilities generated by model

In [None]:
# roc = data_viz.my_roc_curve(target, y_probas.numpy())

In [None]:
# util.log_results(roc, optim_lc, perf_lc, flags, model)

In [None]:
# # create embeddings for test set
# #uncorrupt_test_data = tensor_test_data[:,len(cluster_names):]
# #tensor_test_data = torch.tensor([i.numpy() for i in test_data]).float()
# embeddings = train_test.generate_embeddings(model, corrupt_test_data)

In [None]:
#data_viz.plot_tSNE(embeddings, test_data, num_to_genome, genome_to_tax, test_tax_dict)

In [None]:
# # tSNE for corrupted genomes passed through untrained model
# untrained_model = models.AutoEncoder(len(cluster_names))
# untr_embeddings = train_test.generate_embeddings(untrained_model, corrupt_test_data)

In [None]:
#data_viz.plot_tSNE(untr_embeddings, test_data, num_to_genome, genome_to_tax, test_tax_dict)
# data_viz.plot_tSNE(untr_embeddings, test_data, num_to_genome, genome_to_tax, genome_idx_test)

In [None]:
# # Evaluate model and compare against baselines
# # Get corrupted input set, target set, and predictions set (binarized to 1's and 0's)
# #corrupt_test_data = tensor_test_data[:,:len(cluster_names)]

# model.eval()
# with torch.no_grad():
#     pred = model.forward(corrupt_test_data).detach().numpy()
# b_pred = train_test.binarize(pred, flags.replacement_threshold)

In [None]:
# # Generate confusion matrix
# cm = evaluate.dom_confusion_matrix(b_pred, target, num_to_genome, genome_to_tax, test_tax_dict, genome_idx_test)

In [None]:
# util.log_results(roc, optim_lc, perf_lc, flags, model, cm)

In [None]:
# # Baseline 1: untrained DAE
# # Generate predictions using an untrained DAE model
# model.eval()
# with torch.no_grad():
#     untr_pred = untrained_model.forward(corrupt_test_data).detach().numpy()
# untr_b_preds = train_test.binarize(untr_pred, flags.replacement_threshold)

In [None]:
# # if os.path.isfile(settings.DATA_FP+"rand_b_pred.pt"):
# #     print("Loading random predictions from file")
# #     rand_b_pred = torch.load(settings.DATA_FP+"rand_b_pred.pt")
# # else: 
# #     # This is slow
# #     print("Generating random predictions, this will take a while (~30 min)")
# #     rand_b_pred = evaluate.generate_baseline(num_features, train_data, 
# #                                              corrupt_test_data, "base_random", cluster_names)
# #     torch.save(rand_b_pred, settings.DATA_FP+"rand_b_pred.pt")

# rand_b_pred = evaluate.generate_baseline(num_features, train_data, 
#                                          corrupt_test_data, "base_random", cluster_names)

In [None]:
# torch.save(rand_b_pred, settings.DATA_FP+"rand_b_pred.pt")

In [None]:
# # if os.path.isfile(settings.DATA_FP+"smart_b_pred.pt"):
# #     print("Loading smart random predictions from file")
# #     smart_b_pred = torch.load(settings.DATA_FP+"smart_b_pred.pt")
# # else:
# #     print("Generating smart random predictions, this will take a while (~30 min)")
# #     smart_b_pred = evaluate.generate_baseline(num_features, train_data, 
# #                                           corrupt_test_data, "smart_random", cluster_names)
# #     torch.save(smart_b_pred, settings.DATA_FP+"smart_b_pred.pt")

# smart_b_pred = evaluate.generate_baseline(num_features, train_data, 
#                                       corrupt_test_data, "smart_random", cluster_names)

In [None]:
# torch.save(smart_b_pred, settings.DATA_FP+"smart_b_pred.pt")

In [None]:
# import numpy as np
# np.sum(smart_b_pred == rand_b_pred), np.sum(smart_b_pred != rand_b_pred)

In [None]:
# import pandas as pd
# hs = evaluate.hamming(target, b_pred)
# hs_stats = [round(sum(hs)/len(hs),2), round(min(hs),2), round(max(hs),2)]

# untr_hs = evaluate.hamming(target, untr_b_preds)
# untr_hs_stats = [round(sum(untr_hs)/len(untr_hs),2), round(min(untr_hs),2), round(max(untr_hs),2)]

# rand_hs = evaluate.hamming(target, rand_b_pred)
# rand_hs_stats = [round(sum(rand_hs)/len(rand_hs),2), round(min(rand_hs),2), round(max(rand_hs),2)]

# smart_hs = evaluate.hamming(target, smart_b_pred)
# smart_hs_stats = [round(sum(smart_hs)/len(smart_hs),2), round(min(smart_hs),2), round(max(smart_hs),2)]


# hamming_df = pd.DataFrame([hs_stats, untr_hs_stats, rand_hs_stats, smart_hs_stats], columns=['mean', 'min', 'max'], 
#                             index=["DAE trained", "DAE untrained", "Random chance", "Smart random chance"])
# hamming_df

In [None]:
sys.stdout.flush()
sys.stderr.flush()