In [None]:
import os
import subprocess as sp
import json
import glob
from bayes_opt import BayesianOptimization

In [None]:
EXPERIMENT = os.environ["EXPERIMENT"]
ARCH_NAME = os.environ["ARCH_NAME"]
SEQGEN_NAME = os.environ["SEQGEN_NAME"]
SPLITS_FILE_PATH = os.environ["SPLITS_FILE_PATH"]
PEAKS = os.environ["PEAKS"]
LEARNING_RATE = os.environ["LEARNING_RATE"]
COUNTS_LOSS_WEIGHT = os.environ["COUNTS_LOSS_WEIGHT"]
EPOCHS = os.environ["EPOCHS"]
REFERENCE_DIR = os.environ["REFERENCE_DIR"]
TUNING_DIR = os.environ["TUNING_DIR"]
CHROMS = os.environ["CHROMS"]


In [None]:
def train_model(learning_rate,counts_loss_weight):
    comm = ["train"]
    comm += ["--input-data", EXPERIMENT+".json"]
    comm += ["--stranded"]
    comm += ["--output-dir", TUNING_DIR]
    comm += ["--reference-genome", REFERENCE_DIR+"/genome.fa"]
    comm += ["--chrom-sizes", REFERENCE_DIR+"/chrom.sizes"]
    comm += ["--chroms", CHROMS]
    comm += ["--shuffle"]
    comm += ["--epochs", "5"]
    comm += ["--splits", SPLITS_FILE_PATH]
    comm += ["--model-arch-name", ARCH_NAME]
    comm += ["--sequence-generator-name", SEQGEN_NAME]
    comm += ["--model-output-filename", EXPERIMENT+f'_lr_{str(learning_rate)}_cw_{str(counts_loss_weight)}']
    comm += ["--input-seq-len", "2114"]
    comm += ["--output-len", "1000"]
    comm += ["--filters", "64"]
    comm += ["--threads", "2"]
    comm += ["--learning-rate", str(learning_rate)]
    comm += ["--counts-loss-weight", str(counts_loss_weight)]

    proc = sp.Popen(" ".join(comm),stderr=sp.PIPE,shell=True)
    return proc.communicate()
    
def get_model_loss(history_file):
    data = json.load(open(history_file, 'r'))
    loss=data['val_profile_predictions_loss']["4"]+(100*data['val_logcount_predictions_loss']["4"])
    return -loss

def train_model_and_return_model_loss(learning_rate,counts_loss_weight):
    res = train_model(learning_rate,counts_loss_weight)
    history_file=glob.glob(TUNING_DIR+"/"+EXPERIMENT+f'_lr_{str(learning_rate)}_cw_{str(counts_loss_weight)}'+"*.history.json")[0]
    loss = get_model_loss(history_file)
    print(EXPERIMENT+f'_lr_{str(learning_rate)}_cw_{str(counts_loss_weight)}')
    print(loss)
    return loss

In [None]:
#Bounded region of parameter space
pbounds = {'learning_rate': (0.00001, 0.01), 'counts_loss_weight': (10, 10000)}

optimizer = BayesianOptimization(
    f=train_model_and_return_model_loss,
    pbounds=pbounds,
    random_state=1,
)


optimizer.probe(
    params={"learning_rate": LEARNING_RATE, "counts_loss_weight": COUNTS_LOSS_WEIGHT},
    lazy=True,
)

optimizer.maximize(
    init_points=5,
    n_iter=30,
)

params_dict = optimizer.max
params_dict['params']['counts_loss_weight'] = int(params_dict['params']['counts_loss_weight'])

with open("tuning_output.json", "w") as outfile:  
    json.dump(params_dict['params'], outfile, indent='\t')



In [None]:
# for learning_rate in LEARNING_RATE:
#     for counts_loss_weight in COUNTS_LOSS_WEIGHT:
#         train_model(learning_rate,counts_loss_weight)

# models_losses = []
# for history_file in glob.glob("TUNING_DIR+*history*"):
#     data = json.load(open(history_file, 'r'))
#     loss=data['val_loss']['9']
#     lr = os.path.basename(history_file).split("lr_")[1].split("_cw_")[0]
#     cw = os.path.basename(history_file).split("lr_")[1].split("_cw_")[1].split(".")[0]
#     models_losses.append({'learning_rate':lr,'counts_loss_weight':cw,'loss':loss})

#params_dict = sorted(models_losses, key=lambda k: k['loss'])[0]

# params_dict = {'learning_rate': '0.0004', 'counts_loss_weight': '100', 'loss': 155.077495575}
# write python dictionary to json file



# loss = train_model_and_return_model_loss(LEARNING_RATE,COUNTS_LOSS_WEIGHT)

# params={"learning_rate": LEARNING_RATE, "counts_loss_weight": COUNTS_LOSS_WEIGHT, "loss":loss}

    
# with open("tuning_output.json", "w") as outfile:  
#     json.dump(params, outfile, indent='\t')

In [None]:
# comm = "paste -s -d ' ' /mnt/lab_data3/zahoor/pipeline_outputs/ENCSR000EBS_tuning/ENCSR000EBS_lr_0.00004_cw_1000/reference/chroms.txt | tr -d '\n'"

# proc = sp.Popen(comm,stderr=sp.PIPE,stdout=sp.PIPE,shell=True)
# proc.communicate()

