In [1]:
from __future__ import print_function
import matplotlib.pyplot as plt
%matplotlib inline

import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import numpy as np
from models.resnet import ResNet
from models.unet import UNet
from models.skip import skip
import torch
import torch.optim

from utils.inpainting_utils import *


        

#copyed from common_utils to change here
def optimize(optimizer_type, parameters, closure, LR, num_iter, inpaintinglog):
    """Runs optimization loop.

    Args:
        optimizer_type: 'LBFGS' of 'adam' #no change
        parameters: list of Tensors to optimize over ## 
        closure: function, that returns loss variable #no change
        LR: learning rate #no change
        num_iter: number of iterations  #no change
    """
    if optimizer_type == 'adam':
        print('Starting optimization with ADAM')
        optimizer = torch.optim.Adam(parameters, lr=LR)
        for j in range(num_iter):
            optimizer.zero_grad()
#             closure()
            closure(j, inpaintinglog)
            optimizer.step()
    else:
        assert False
        



def inpainting(container, cuda = False, iterations = 100, inpaintinglog = None):
    
    seq_np = container.seq_np
    mask_np = container.mask_np

    if cuda: 
        torch.backends.cudnn.enabled = True
        torch.backends.cudnn.benchmark =True
        dtype = torch.cuda.FloatTensor
    else:
        torch.backends.cudnn.enabled = False
        torch.backends.cudnn.benchmark = False
        dtype = torch.FloatTensor

    PLOT = False
    imsize = -1
    dim_div_by = 64

    NET_TYPE = 'skip_depth6'
    pad = 'reflection' # 'zero'
    OPT_OVER = 'net'
    OPTIMIZER = 'adam'

    INPUT = 'noise'
    input_depth = 32
    LR = 0.01 
    num_iter = iterations
    param_noise = False
    show_every = 5
    figsize = 5 #????
    reg_noise_std = 0.03
    
    num_channels_down = [128] * 3
    num_channels_up =   [128] * 3
    num_channels_skip =  [128] * 3 
    filter_size_up = 3
    filter_size_down = 3
    upsample_mode='nearest'
    filter_skip_size=1
    need_sigmoid=True
    need_bias=True 
    pad=pad
    act_fun='LeakyReLU'


    net = skip(input_depth, seq_np.shape[0], #change skip function in models/skip.py
               num_channels_down = [16, 32, 64, 64, 64],
               num_channels_up = [16, 32, 64, 64, 64],
               num_channels_skip = [16, 32, 64, 64, 64],  
               filter_size_up = 3, filter_size_down = 3, 
               upsample_mode = 'nearest', filter_skip_size = 1,
               need_sigmoid = True, need_bias = True, pad = pad, act_fun = act_fun).type(dtype)
    

    net = net.type(dtype) 
    net_input = get_noise(input_depth, INPUT, seq_np.shape[1]).type(dtype) #tensor 

    
    s  = sum(np.prod(list(p.size())) for p in net.parameters())
    print ('Number of params: %d' % s)
    
    if inpaintinglog != None:
        inpaintinglog.add_net_parameters([NET_TYPE, pad, OPT_OVER, OPTIMIZER, INPUT, 
                                     input_depth, LR, reg_noise_std, num_iter, cuda, s,
                                        num_channels_down,
                                       num_channels_up,
                                       num_channels_skip,  
                                       filter_size_up, filter_size_down, 
                                       upsample_mode, filter_skip_size,
                                       need_sigmoid, need_bias, pad, act_fun])

        inpaintinglog.init_log()


    # Loss
    mse = torch.nn.MSELoss().type(dtype)

    img_var = np_to_torch(seq_np).type(dtype)
    mask_var = np_to_torch(mask_np).type(dtype)
    
    def closure(i, inpaintinglog):
    #     if param_noise:
    #         for n in [x for x in net.parameters() if len(x.size()) == 4]:
    #             n = n + n.detach().clone().normal_() * n.std() / 50

        net_input = net_input_saved
        if reg_noise_std > 0:
            net_input = net_input_saved + (noise.normal_() * reg_noise_std)
        out = net(net_input)
  
        total_loss = mse(out * mask_var, img_var * mask_var)
        total_loss.backward()
        print ('Iteration %05d    Loss %f' % (i, total_loss.item()), '\r', end='')
        
        if inpaintinglog != None:
            inpaintinglog.loss.append(total_loss)
            if i % inpaintinglog.out_nps_every == 0:
                out_np = torch_to_np(out)
                inpaintinglog.compare_log(i, out_np)

        return total_loss
    
    net_input_saved = net_input.detach().clone()
    noise = net_input.detach().clone()
    p = get_params(OPT_OVER, net, net_input) # list of tensors to optimize over !! in optimize
    
    start_time = time.time()
    optimize(OPTIMIZER, p, closure, LR, num_iter, inpaintinglog) # optimize is in utils/common.utils
    elapsed_time = time.time() - start_time
    print("\ntime: {}s".format(elapsed_time))
    
    out_np = torch_to_np(net(net_input))
    inpaintinglog.end_log()
    
    return out_np

In [2]:
# from PIL import Image
# import PIL
import numpy as np
import IPython.display
import random 

from Bio import SeqIO
import math
from collections import Counter
import datetime
import time
import matplotlib.pyplot as plt

fasta_file = "data/myco_genome.fasta"
local_genome = "/Users/pochtalionizm/Projects/neuro/data/vibrio.gbff"
remote_genome = "data/myco_genome.gbff"
myco = "/Users/pochtalionizm/Projects/neuro/data/myco.gbff"
vibrio = "/Users/pochtalionizm/Projects/neuro/data/vibrio.gbff"
homo = "data/homos_2.fasta"


In [3]:
import sys
sys.path.append("/Users/pochtalionizm/Projects/neuro/tools/")
from sequence_for_nn import *

Human part

In [4]:
from Bio import SeqIO
iterator = SeqIO.parse("/Users/pochtalionizm/Projects/neuro/data/myco_non.fasta", "fasta")
record = next(iterator)


In [5]:
container = Container()

...container created


In [6]:
container.record = record

In [7]:
container.cut_seq()
container.generate_seq()

cuted seq for analysis, length = 464341, start = 0, part = 0.0-100.0
generated seq_np


In [8]:
container.generate_mask()

generated mask with 44157 spots of 1 bp


In [9]:
inpaintinglog = Inpaintinglog(container, every = 500)
i = 0
inpainting(container, iterations = 4001, inpaintinglog = inpaintinglog, cuda = False)

Number of params: 189300
Starting optimization with ADAM




Iteration 00002    Loss 0.238666 

KeyboardInterrupt: 

Bacterial part

In [None]:
container = Container()
container.read_seq(myco)

In [None]:
s = 9
container.generate_seq(length = 100000, seed = s)
container.generate_mask(seed = s)
print(container.title)

In [None]:
inpaintinglog = Inpaintinglog(container, every = 50)
i = 0
inpainting(container, inpaintinglog, cuda=False, iterations = 51)

In [None]:
# inpaintinglog.plot_loss(True)
# coding = [c['coding_part'] for c in inpaintinglog.counters]
# noncoding = [c['noncoding_part'] for c in inpaintinglog.counters]
# mask = [c['mask_part'] for c in inpaintinglog.counters]
# plot_part(coding, "coding")
# plot_part(noncoding, "noncoding")
# plot_part(mask, "mask")



In [None]:
# for inpaintinglog in read_log():
# #     print(inpaintinglog.seq_info)
#     coding = [c['coding_part'] for c in inpaintinglog.counters]
#     noncoding = [c['noncoding_part'] for c in inpaintinglog.counters]
#     mask = [c['mask_part'] for c in inpaintinglog.counters]
#     plot_part(coding, "coding")
#     plot_part(noncoding, "noncoding")
#     plot_part(mask, "mask")


In [None]:
coding = [c['coding_part'] for c in inpaintinglog.counters]
noncoding = [c['noncoding_part'] for c in inpaintinglog.counters]
mask = [c['mask_part'] for c in inpaintinglog.counters]


In [None]:
def plot_part(coding, name):
    fig = plt.plot(coding)
    plt.ylim(0.65, 0.75)
    plt.ylabel('mistakes in {} part'.format(name))
    plt.xlabel('iteration/{}'.format(inpaintinglog.out_nps_every))
    plt.title(inpaintinglog.plot_title)
    plt.tight_layout()
    plt.savefig("pics/{}_{}.png".format(inpaintinglog.file_title, name))
    plt.show()

In [None]:
plot_part(coding, "coding")

In [None]:
plot_part(noncoding, "noncoding")

In [None]:

plot_part(mask, "mask")

In [None]:
from Bio import SeqIO
iterator = SeqIO.parse(myco, "genbank")
record = next(iterator)
length = (len(record.seq))
# # print(record.annotations.keys())
# # print(record.features[0].type)
# # print(dir(record.features[5].location))
# # print(record.features[5].location.start)
          
coding = np.zeros(length)
for f in record.features:
    if f.type == "CDS":
        f_start = f.location.start
        f_end = f.location.end
        coding[f_start: f_end] = 1 

cds = []
non = []
previous_value = 2
last_switch_i = 0
for i in range(length + 1):
    if i == length:
        current_value = 2
    else:
        current_value = coding[i]
        
    if current_value != previous_value:
#         print("switch at {}".format(i))
        l = i - last_switch_i
#         print(l)
        if current_value == 1:
            cds.append(l)
        else:
            non.append(l)
        last_switch_i = i
    previous_value = current_value
              
# print('_'.join(record.description.split(' ')[0:3])
print(sum(coding))
print(sum(cds) + sum (non))
print(len(record.seq))
print(np.mean(cds), len(cds), sum(cds))
print(np.mean(non), len(non), sum(non))

In [None]:
from Bio import SeqIO
iterator = SeqIO.parse(homo, "fasta")
record = next(iterator)
print(record)

[16, 32, 64, 128, 128, 128]