In [7]:
from __future__ import print_function
import keras
from keras.models import Sequential, Model, load_model
from keras import backend as K
import tensorflow as tf
import isolearn.keras as iso
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from keras.utils import plot_model
import time

from aparent.predictor import *
##################################################
#import bioPython for working with FASTA files
from Bio import SeqIO
##################################################


#loading model
aparent_model = load_model('./saved_models/aparent_large_lessdropout_all_libs_no_sampleweights.h5')
plot_model(aparent_model, show_shapes = True, to_file='APARENTmodel.png')
aparent_encoder = get_aparent_encoder(lib_bias=4)

#setting up files, prediction cor chr 21
fastaDestination = "./fastas/"
fastaNames = ["chrY"]
predDestination = "./PredictionBinaries/"
#strideSizes = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,20,25,30,35,40,45,50]
strideSizes = [10]
increaseSize = 100000
#running files
for name in fastaNames:
    contigSeq = SeqIO.read(fastaDestination + name + ".fasta", "fasta")
    seq = contigSeq.seq #actual genomic sequence from the file
    #split seq into 100K portions
    print ("PREDICTING ", contigSeq.id, " with length ", len(seq))
    for stride in strideSizes:
            print ("Stride length is: ", stride)
            start = 0
            end = increaseSize - 1
            for i in range(0,int(len(seq)/stride)):
                startTime = time.time()
                sliceSeq = seq[start:end + 1]
                x,y = find_polya_peaks_memoryFriendly(
                    aparent_model,
                    aparent_encoder,
                    sliceSeq,
                    sequence_stride=stride,
                    conv_smoothing=False,
                    peak_min_height=0.01,
                    peak_min_distance=50,
                    peak_prominence=(0.01, None),
                )
                repPeriod = name.replace(".", "_")
                np.save(predDestination + name + "Predictions/" +repPeriod + "_StrideLen" + str(stride) + "Start" + str(start+ 1) + "End" + str(end + 1), y )
                secondsDiffs = time.time()-startTime
                print ("Time for ",start, "to", end, ":", str(int(secondsDiffs/60)) + " mins " + str(secondsDiffs%60.0))
                start += increaseSize
                end += increaseSize
            restSeq = seq[end:]
            x,y = find_polya_peaks_memoryFriendly(
                aparent_model,
                aparent_encoder,
                restSeq,
                sequence_stride=stride,
                conv_smoothing=False,
                peak_min_height=0.01,
                peak_min_distance=50,
                peak_prominence=(0.01, None),
            )
            repPeriod = name.replace(".", "_")
            np.save(predDestination + name + "Predictions/" +repPeriod + "_StrideLen" + str(stride) + "Start" + str(end + 1) + "End" + str(len(seq)), y )
    print ("FINISHED")




PREDICTING  CM000686.2  with length  57227415
Stride length is:  10
Time for  0 to 99999 : 0 mins 32.43858289718628
Time for  100000 to 199999 : 0 mins 32.58081316947937
Time for  200000 to 299999 : 0 mins 32.528916358947754
Time for  300000 to 399999 : 0 mins 32.76347517967224
Time for  400000 to 499999 : 0 mins 32.60949373245239
Time for  500000 to 599999 : 0 mins 32.55373954772949
Time for  600000 to 699999 : 0 mins 33.49790811538696
Time for  700000 to 799999 : 0 mins 33.230608224868774
Time for  800000 to 899999 : 0 mins 33.22089958190918
Time for  900000 to 999999 : 0 mins 35.267927169799805
Time for  1000000 to 1099999 : 0 mins 34.548213958740234
Time for  1100000 to 1199999 : 0 mins 34.63970470428467
Time for  1200000 to 1299999 : 0 mins 34.64574718475342
Time for  1300000 to 1399999 : 0 mins 33.84622502326965
Time for  1400000 to 1499999 : 0 mins 34.78647565841675
Time for  1500000 to 1599999 : 0 mins 39.16990828514099
Time for  1600000 to 1699999 : 0 mins 34.669469594955444
T

Time for  14400000 to 14499999 : 0 mins 30.466818809509277
Time for  14500000 to 14599999 : 0 mins 30.567980527877808
Time for  14600000 to 14699999 : 0 mins 30.324480056762695
Time for  14700000 to 14799999 : 0 mins 30.797489881515503
Time for  14800000 to 14899999 : 0 mins 30.36286950111389
Time for  14900000 to 14999999 : 0 mins 30.50639319419861
Time for  15000000 to 15099999 : 0 mins 30.797946214675903
Time for  15100000 to 15199999 : 0 mins 30.68962264060974
Time for  15200000 to 15299999 : 0 mins 30.40671133995056
Time for  15300000 to 15399999 : 0 mins 30.626139879226685
Time for  15400000 to 15499999 : 0 mins 30.70873498916626
Time for  15500000 to 15599999 : 0 mins 30.659972667694092
Time for  15600000 to 15699999 : 0 mins 30.90343713760376
Time for  15700000 to 15799999 : 0 mins 30.702932834625244
Time for  15800000 to 15899999 : 0 mins 30.583043098449707
Time for  15900000 to 15999999 : 0 mins 30.63931131362915
Time for  16000000 to 16099999 : 0 mins 30.705435276031494
Time

Time for  28500000 to 28599999 : 0 mins 39.42057704925537
Time for  28600000 to 28699999 : 0 mins 41.18198275566101
Time for  28700000 to 28799999 : 0 mins 36.850160360336304
Time for  28800000 to 28899999 : 0 mins 36.371318101882935
Time for  28900000 to 28999999 : 0 mins 34.75727844238281
Time for  29000000 to 29099999 : 0 mins 35.800395250320435
Time for  29100000 to 29199999 : 0 mins 36.218103885650635
Time for  29200000 to 29299999 : 0 mins 35.3485209941864
Time for  29300000 to 29399999 : 0 mins 35.92535853385925
Time for  29400000 to 29499999 : 0 mins 41.648407220840454
Time for  29500000 to 29599999 : 0 mins 37.030999183654785
Time for  29600000 to 29699999 : 0 mins 37.85262870788574
Time for  29700000 to 29799999 : 0 mins 33.18486928939819
Time for  29800000 to 29899999 : 0 mins 34.21526622772217
Time for  29900000 to 29999999 : 0 mins 36.32715964317322
Time for  30000000 to 30099999 : 0 mins 32.8005895614624
Time for  30100000 to 30199999 : 0 mins 35.03676629066467
Time for  

Time for  42600000 to 42699999 : 0 mins 37.333515644073486
Time for  42700000 to 42799999 : 0 mins 36.536688804626465
Time for  42800000 to 42899999 : 0 mins 38.16234016418457
Time for  42900000 to 42999999 : 0 mins 36.45511269569397
Time for  43000000 to 43099999 : 0 mins 36.844547271728516
Time for  43100000 to 43199999 : 0 mins 37.31886315345764
Time for  43200000 to 43299999 : 0 mins 37.74912905693054
Time for  43300000 to 43399999 : 0 mins 38.79906177520752
Time for  43400000 to 43499999 : 0 mins 37.112931966781616
Time for  43500000 to 43599999 : 0 mins 38.78974938392639
Time for  43600000 to 43699999 : 0 mins 37.47381067276001
Time for  43700000 to 43799999 : 0 mins 39.38045573234558
Time for  43800000 to 43899999 : 0 mins 35.35890340805054
Time for  43900000 to 43999999 : 0 mins 33.14405846595764
Time for  44000000 to 44099999 : 0 mins 38.10106635093689
Time for  44100000 to 44199999 : 0 mins 35.717422008514404
Time for  44200000 to 44299999 : 0 mins 36.17889904975891
Time for 

Time for  56700000 to 56799999 : 0 mins 30.66660761833191
Time for  56800000 to 56899999 : 0 mins 30.627344131469727
Time for  56900000 to 56999999 : 0 mins 30.939997911453247
Time for  57000000 to 57099999 : 0 mins 30.4766948223114
Time for  57100000 to 57199999 : 0 mins 30.681934118270874
Time for  57200000 to 57299999 : 0 mins 7.411458492279053


ValueError: cannot reshape array of size 820 into shape (1,200,4,1)