In [None]:
import pandas as pd
import numpy as np
import scipy
import scipy.stats
from keras.models import Sequential, load_model, Model 
from keras.layers import Dense, Activation, Flatten, Dropout, Input, BatchNormalization, Activation, add, MaxPooling1D, Cropping1D 
from keras.layers.convolutional import Conv1D
from keras.optimizers import Adam
#from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
#import hyperopt.fmin as hypfmin
import keras
import tensorflow as tf
import random
import os
import pickle
import re
from math import ceil
from Bio import SeqIO
import warnings 
import datetime
import keras.backend as kb
from sklearn.metrics import average_precision_score, precision_recall_curve
import pybedtools as bt
import h5py
from keras.layers.merge import concatenate
from keras.layers.core import Lambda

In [1]:
#get transcript sequences into one-hot encoded form 
def getInputMtxs_DNA(genomeFasta,m6ASites,TrxtsGenomeFlank):
    fasta_sequences = SeqIO.parse(open(genomeFasta),'fasta')
    seqs={}
    for seq in fasta_sequences:
        if seq.name[:3]=='chr':
            seqs[seq.name]=seq  
    trxts=pd.read_csv(m6ASites,sep='\t',comment='#',header=None,skiprows=1)
    trxts=trxts.copy()
    avg=((trxts[2]+trxts[3])/2).astype('int32')
    trxts[2]=(avg-TrxtsGenomeFlank)
    trxts[3]=(avg+TrxtsGenomeFlank)
    trnMtxs=[]
    negBases=dict(zip(['A','C','G','T'],range(4)))
    posBases=dict(zip(['T','G','C','A'],range(4)))
    for r in range(len(trxts)):
        row=trxts.iloc[r,:]
        trxtW=row[3]-row[2]+1
        trxtSeq=seqs[row[0]].seq[row[2]-1:row[3]]
        trnMtx = np.matrix(np.zeros((4,trxtW)))
        if row[1] == '+':
            for bi in range(trxtW):
                try:
                    b = trxtSeq[bi]
                    trnMtx[posBases[b],bi] = 1
                except:
                    continue 
        else:
            for bi in range(trxtW):        
                try:
                    b = trxtSeq[bi]            
                    trnMtx[negBases[b],bi] = 1
                except:
                    continue 
            trnMtx = np.fliplr(trnMtx)
        trnMtxs.append(trnMtx)
    return trnMtxs

In [None]:
def randSubBlocks(InpBlocks,TgtBlocks):
    InpBlocks, TgtBlocks = pd.Series(InpBlocks), pd.Series(TgtBlocks)
    rand = np.random.permutation(len(InpBlocks))
    InpBlocks = list(InpBlocks.iloc[rand])
    TgtBlocks = list(TgtBlocks.iloc[rand])
    return InpBlocks,TgtBlocks

In [None]:
def Mtxs_toH5Blocks(InpMtxs,TgtMtxs,CHUNK_SIZE,train_or_test):
    num_CHUNKS=ceil(len(InpMtxs)/CHUNK_SIZE)
    h5f_Inp=h5py.File('Inp_'+train_or_test+'.h5', 'w')
    h5f_Tgt=h5py.File('Tgt_'+train_or_test+'.h5', 'w')
    for i in range(num_CHUNKS):
        Ib=InpMtxs[CHUNK_SIZE*i:CHUNK_SIZE*(i+1)]
        Tb=TgtMtxs[CHUNK_SIZE*i:CHUNK_SIZE*(i+1)]
        h5f_Inp[str(i)]=np.swapaxes(np.stack(np.array(Ib),axis=1).T,0,1).astype('int8')
        h5f_Tgt[str(i)]=np.expand_dims(np.expand_dims(np.array(Tb),1),2).astype('int8')
    print ('Inp_'+train_or_test+'.h5'+'and'+'Tgt_'+train_or_test+'.h5'+'created')

In [None]:
#create multiple resnet layers in model 
def resnet(x,fltrNumb,fltrW,dilr,numbRns):
    def oneLoop(x):
        z = BatchNormalization()(x)
        z = Activation('relu')(z)
        z = Conv1D(fltrNumb,kernel_size=(fltrW,),dilation_rate=dilr,padding='same')(z)
        return z
    full=x
    for n in range(numbRns):
        z=oneLoop(full)
        z=oneLoop(z)
        full = add([full,z]) 
    return full   

In [None]:
def make_parallel(model, gpu_count):

    def get_slice(data, idx, parts):

        shape = tf.shape(data)
        stride = tf.concat([shape[:1]//parts, shape[1:]*0], 0)
        start = stride * idx

        size = tf.concat([shape[:1]//parts, shape[1:]], 0) 
        # Split the batch into equal parts 

        return tf.slice(data, start, size)

    outputs_all = []
    for i in range(len(model.outputs)):
        outputs_all.append([])

    # Place a copy of the model on each GPU, each getting a slice of the batch
    for i in range(gpu_count):
        with tf.device('/gpu:%d' % i):
            with tf.name_scope('tower_%d' % i) as scope:

                inputs = []
                # Slice each input into a piece for processing on this GPU
                for x in model.inputs:
                    input_shape = tuple(x.get_shape().as_list())[1:]
                    slice_n = Lambda(get_slice, output_shape=input_shape,
                                  arguments={'idx': i, 'parts': gpu_count})(x)
                    inputs.append(slice_n)

                outputs = model(inputs)
                
                if not isinstance(outputs, list):
                    outputs = [outputs]
                
                # Save all the outputs for merging back together later
                for l in range(len(outputs)):
                    outputs_all[l].append(outputs[l])

    # Merge outputs on CPU
    with tf.device('/cpu:0'):
        
        merged = []
        for outputs in outputs_all:
            merged.append(concatenate(outputs, axis=0))
            
        return Model(inputs=model.inputs, outputs=merged)


In [None]:
def topK_auPRC(model,Inp,Tgt,numOut_Classes):
    p=model.predict(Inp)
    Tks=[]
    PRs=[]
    if numOut_Classes > 1:
        cRange=range(1,numOut_Classes)
    else:
        cRange=[0]
    for c in cRange:
        Fullc=Tgt[:,:,c].flatten()
        S=[i for i in range(len(Fullc)) if Fullc[i] == 1]
        pS=np.argsort(p[:,:,c].flatten())[-len(S):]
        pS=pd.Series(pS)
        nCrct=pS[pS.isin(S)].size
        Tk=nCrct/float(len(S))
        Tks.append(Tk)
        PR=average_precision_score(Fullc,p[:,:,c].flatten())
        PRs.append(PR)
    if numOut_Classes < 3:
        Tks.append(0.0)
        PRs.append(0.0)
    return(Tks[0],Tks[1],PRs[0],PRs[1])