# Imports


In [1]:
import os

cwd = os.getcwd()

import sys
import time
import multiprocessing as mp

# Import the code for codon opt
sys.path.insert(1, "/grid/home/nbourgeois/codonOpt")
from optimizer import *
from general_functions import *

# for interfacing /w sge
from dask.distributed import Client
import dask

import sys
sys.setrecursionlimit(50000)


In [2]:
client = Client('tcp://IPADDRESS')

# Variables

In [5]:
wdir = '/dummy/path'

amino_acid_seq_locs =  os.path.join(wdir,"aa_seq" ) 


In [6]:
os.listdir(wdir)

['TNF-alpha.pep.fas',
 '.ipynb_checkpoints',
 'vectors',
 'final.fa',
 'metrics.csv',
 'selected_metrics.csv',
 'selected_final.fa']

# Data

In [None]:
# read amino acid sequence in from fasta
(aa_keys, aa_seqs) = readFasta(amino_acid_seq_locs)
aa_seqs  = [str(aa_seq) for aa_seq in aa_seqs]
input_data = (aa_keys, aa_seqs)

# set optimization tissue type
tissues = 'Liver'

In [8]:
def dask_wrapper(parameters):
    # submit a job to dask 
    sys.setrecursionlimit(50000)
    sys.path.insert(1, "/grid/home/nbourgeois/codonOpt")
    import optimizer
    import metrics

    wdir = parameters['wdir']
    del parameters['wdir']
    
    seqid = parameters['seqid']
    del parameters['seqid']  
    
    # output name
    otype = parameters['opt_type']
    tissue = parameters['tissues']
    aa_seq = parameters['aa_seq']
    
    # add optimization specific parameters to output name
    if 'target'  in parameters.keys():
        otype= otype + str(parameters['target'])
    if 'target_range' in parameters.keys():
        otype= otype + str(parameters['target_range'])
    if 'depth' in parameters.keys():
        otype= otype + '_d'+str(parameters['depth'])
        
    optimizer_obj = optimizer.Optimizer( **parameters)
    
    if 'depth' in parameters.keys():
        optimizer_obj.depth=int(parameters['depth'])
    seq = optimizer_obj.optimize()
    cpg_perc = round(1 - metrics.get_cpg(seq),2)
    
    if 'cpg_thresh' in parameters.keys():
        otype= 'cpg' + str(parameters['cpg_thresh']) + '_' + str(cpg_perc) + '_' + otype
        
    # write out optimization sequence
    name = f'{seqid}_{otype}_bai'
    print( f'{wdir}/vectors/{name}.fa')
    with open(f'{wdir}/vectors/{name}.fa','w') as fileo:
        fileo.write(f'>{name}\n{seq}\n')

    # return optimization seq
    return(seq)

## Optimize 



In [10]:
all_parameters = []


for seq_id, aa_seq in zip(*input_data):
    parameters = {
        'seqid' : seq_id,
        'wdir' : wdir,
        'aa_seq'   :   aa_seq,
        'opt_type' :   'max',
        'cpg_thresh': 0,
        'tissues'  :   tissues,
    }
    all_parameters.append(parameters)

    # loop over several different parameters of interest for optimization
    for depth in [5, 6, 7, 8, 9]:
        for target in [.45,.5,.55,.6,.65,.7,.75, .8, .85,.9]:
            parameters = {
            'seqid' : seq_id,
            'wdir' : wdir,
                'aa_seq'   :   aa_seq,
                "opt_type" : 'target', 
                "depth"  : depth,
                'tissues'  :   tissues,
                'cpg_thresh': 0,
                "target"  : target
            }
            all_parameters.append(parameters)

# Run

In [None]:
# submit all optimizations
jobs = client.map(dask_wrapper,all_parameters)
results = client.gather(jobs)