### RawCTCNet Benchmark/Eval with trained model: CTCLoss of approx. 0.6 (Best: 0.5548)

In [1]:
# switch to toplevel dir:
%cd ~/Desktop/pytorch_models/wavenet-speech/
!pwd
%load_ext autoreload
%autoreload 2

/home/ptang/Desktop/pytorch_models/wavenet-speech
/home/ptang/Desktop/pytorch_models/wavenet-speech


In [2]:
# imports:
import torch
from torch.autograd import Variable
import numpy as np
from warpctc_pytorch import CTCLoss

In [3]:
# import gaussian model, RawCTCNet, sequential decoder:
from utils.gaussian_kmer_model import RawGaussianModelLoader
from modules.raw_ctcnet import RawCTCNet
from modules.sequence_decoders import argmax_decode, labels2strings, BeamSearchDecoder

### Construct data generator from gaussian model using the same parameters as we did during training:

In [4]:
# create artificial data model:
max_iterations = 1000000 # 1 million examples
num_epochs = 100
epoch_size = 10000
kmer_model_path = "utils/r9.4_450bps.5mer.template.npz"
batch_size = 6
upsample_rate = 6
min_sample_len = 80
max_sample_len = 90
dataset = RawGaussianModelLoader(max_iterations, num_epochs, epoch_size, kmer_model_path, batch_size=batch_size,
                                 upsampling=upsample_rate, random_upsample=True, lengths=(min_sample_len,max_sample_len))

In [5]:
# inspect dataset:
signals, bases, lengths = dataset.fetch()
signals[:,0:7] # ~ (batch x seq)

Variable containing:
  70.9675   67.6471   67.4835   69.5680   70.5203   83.8395   82.8195
 108.6278  102.6799  105.5920  106.7239   97.1245   95.3029  100.7416
  72.7675   74.2975   73.4715   75.8703   81.6017   80.8176   80.9267
  69.2394   70.4691   72.2388   69.6774   69.3489   70.6318   75.5842
  80.2959   83.7103   87.2674   83.1617   86.8340   83.4901   83.9313
 103.0298   99.4216   97.7675  100.8947  101.4734  100.2454  103.7194
[torch.FloatTensor of size 6x7]

### Construct model with same parameters as during training and load saved models:

In [6]:
# build model:
nfeats = 2048
nhid = 512
feature_kwidth = 3
num_labels = 5
num_dilation_blocks = 10
dilations = [1, 2, 4, 8, 16] * num_dilation_blocks
layers = [(nhid, nhid, 2, d) for d in dilations] + [(nhid, nhid, 3, d) for d in dilations]
out_dim = 512
is_causal = False
ctcnet = RawCTCNet(nfeats, feature_kwidth, num_labels, layers, out_dim, input_kernel_size=2, input_dilation=1,
                   softmax=False, causal=is_causal)
batch_norm = torch.nn.BatchNorm1d(1)

In [7]:
# load saved model parameters:
ctcnet_save_path = "./runs/gaussian-model/raw_ctc_net.model.adamax_lr2e_4.pth"
batchnorm_save_path = "./runs/gaussian-model/raw_ctc_net.batch_norm.adamax_lr2e_4.pth"
map_cpu = lambda storage, loc: storage # necessary to move weights from CUDA to CPU
ctcnet.load_state_dict(torch.load(ctcnet_save_path, map_location=map_cpu))
batch_norm.load_state_dict(torch.load(batchnorm_save_path, map_location=map_cpu))

In [8]:
# CTCLoss:
ctc_loss_fn = CTCLoss()

### Helper function to fetch & evaluate model on data:

In [9]:
def eval_model():
    # use volatile variables for better execution speed/memory usage:
    signals, sequences, lengths = dataset.fetch()
    signals_var = Variable(signals.data, volatile=True)
    sequences_var = Variable(sequences.data, volatile=True)
    lengths_var = Variable(lengths.data, volatile=True)
    # run networks:
    probas = ctcnet(batch_norm(signals_var.unsqueeze(1)))
    transcriptions = probas.permute(2,0,1) # need seq x batch x dim
    transcription_lengths = Variable(torch.IntTensor([transcriptions.size(0)] * batch_size))
    ctc_loss = ctc_loss_fn(transcriptions, sequences_var, transcription_lengths, lengths_var)
    avg_ctc_loss = (ctc_loss / transcriptions.size(0))
    return (transcriptions, ctc_loss, avg_ctc_loss, sequences.data, lengths.data)

In [10]:
def split_target_seqs(seqs, lengths):
    """Split a flattened array of target sequences into their constituents."""
    split_seqs = []
    labels_parsed = 0
    for ll in lengths:
        split_seqs.append( seqs[labels_parsed:labels_parsed+ll] )
        labels_parsed += ll
    return split_seqs

### evaluate results against true sequences with argmax and beam search (run these commands in sequence a few times):

In [11]:
scores, loss, avg_loss, true_seqs, true_seq_lengths = eval_model()
print("CTC Loss on whole sequence: {}".format(loss.data[0]))
print("CTC Loss, averaged per-logit: {}".format(avg_loss.data[0]))

CTC Loss on whole sequence: 288.2308044433594
CTC Loss, averaged per-logit: 0.6506338715553284


In [12]:
# print true sequences:
true_base_sequences = split_target_seqs(true_seqs, true_seq_lengths)
for k in range(len(true_base_sequences)):
    print(labels2strings(true_base_sequences[k].unsqueeze(0))[0])

TGCCATCCTCGGTAGCCCGACATTATACGCGCCATGCCTCCACATCTGTGTTCGTCGACGCTATCCAGTCCTGCTCCAGATGT
GACTTACTTCCTTATCGCCTCTAACTTAGGGGGGGGCGAAAACTAAGTGCATGTCCACTTGGTCACTTCTAGGGGCTTTTCGCG
AAGCATGAAGCGGGTTGTGCGGATCCCCTCGTATTACTTCGGCGGCGTCACTCGTCAAGTTCTTTGTCAACGATCTCATGGATCCTG
GGGTGGATGGGGCTCCCCGTGCTAGTATGCTCGGTTAAAGCCTCGACCCAATTTATTCAACGTCGAAAGGGAGTCGCGCCTTATACA
ACTGTACCGGAATATGGAGAGCCACCGTGAATCCTGCTATGGCAGATCAATTACGCCCCCACCGCCTTATCACCATATTG
TCGGAAAATGAATGACTGACCAGATGGGTCGCATCAGTTAATTACGATGATGCCTGTCCAGTTTTTCTCAGTCGACTTTCCTATAACT


In [13]:
# normalize probabilities with a softmax operation:
temperature = 1.0 # should set this between 0->infty
logits = scores / temperature
for k in range(len(logits)):
    logits[k,:,:] = torch.nn.functional.softmax(logits[k,:,:])

In [14]:
# argmax decoding: expects (batch, seq, dim) and returns (batch, seq)
argmax_decoded = argmax_decode(logits.permute(1,0,2).contiguous().data)
argmax_basecalls = labels2strings(argmax_decoded)
for k in range(len(argmax_decoded)):
    print(argmax_basecalls[k])

TTCCACTGCGTTAGGGGCCCGGCCCATTTATCCGCGCCATGGCCTCCCATCTGTTGTCGTTAGCCGCTCTCCAGTTACTGGCTCCAGCC
TTCGACTTTTACTTTTTATCGGACTCTAACTTTAGGGGGGGGGGGGGGGGGCGAAAACTAGGTTGGCCATGGTCCCCTTTTGGGTCCCTTTTCTAGGGGCTTTATCC
TTAAGGGAACGGCTGCGTCGTTGGATGGATCCCTCGTTTATTACTTTGCGAGGCGTTACCCTTACGTCCAGCTTTTTTTGCAAGGATCTCCTGGGGGCTCC
TAGGGGGATGGGGGCTCCCCGTTGGCTCGTTTATTGGGCTCTATGGAAGCCTAGACCCCATTTATACCGGCGACAGGGGAGGTTCGACGCCTTTTCCCC
TAAATTACCGTTATATGGGGAGGAGCCCAACGTTGGAAATACTGCCTTATGGGCAGGCTCCCCTAACGCCCCCCCCCGCCTTTCTCCACCCATCC
CTTAAAATGGGAATGGGACTGGGCCCCAGGATGGGGGTTGGGCCATCCAGTAATTTAACGATGGATGACTGGTCCAGGTTTTTCTCCCGTTCGGCCTTTTTTTAATCTCG


In [15]:
# beam search decoded: expects (batch, dim, seq)
beam_search_decoder = BeamSearchDecoder(batch_size=batch_size, num_labels=5, beam_width=7)
probas, hyp_seqs = beam_search_decoder.decode(logits.permute(1, 2, 0))

In [16]:
print("Normalized probabilities:")
for k in range(len(probas)):
    print(probas[k] / logits.size(0))

Normalized probabilities:
0.29908177222947235
0.2925582437967339
0.2959414238854402
0.2937506682178506
0.2954750491587895
0.2956686945585849


In [17]:
lookup_dict = {0: '', 1: 'A', 2: 'G', 3: 'C', 4: 'T', 5: '<SOS>', 6: '<EOS>'}
for ll in range(len(hyp_seqs)):
    print("".join([lookup_dict[lbl] for lbl in hyp_seqs[ll]]))

<SOS>TTCCACTGCGTTAGGGGCCCGGCCCATTTATCCGCGCCATGGCCTCCCATCTGTTGTCGTTAGCCGCTCTCCAGTTACTGGCTCCAGCC<EOS>
<SOS>TTCGACTTTTACTTTTTATCGGACTCTAACTTTAGGGGGGGGGGGGGGGGGCGAAAACTAGGTTGGCCATGGTCCCCTTTTGGGTCCCTTTTCTAGGGGCTTTATCC<EOS>
<SOS>TTAAGGGAACGGCTGCGTCGTTGGATGGATCCCTCGTTTATTACTTTGCGAGGCGTTACCCTTACGTCCAGCTTTTTTTGCAAGGATCTCCTGGGGGCTCC<EOS>
<SOS>TAGGGGGATGGGGGCTCCCCGTTGGCTCGTTTATTGGGCTCTATGGAAGCCTAGACCCCATTTATACCGGCGACAGGGGAGGTTCGACGCCTTTTCCCC<EOS>
<SOS>TAAATTACCGTTATATGGGGAGGAGCCCAACGTTGGAAATACTGCCTTATGGGCAGGCTCCCCTAACGCCCCCCCCCGCCTTTCTCCACCCATCC<EOS>
<SOS>CTTAAAATGGGAATGGGACTGGGCCCCAGGATGGGGGTTGGGCCATCCAGTAATTTAACGATGGATGACTGGTCCAGGTTTTTCTCCCGTTCGGCCTTTTTTTAATCTCG<EOS>


#### Examine the weights & values:

In [18]:
# check uniformity of logits:
for k in range(len(logits)):
    print(logits[k,:,:].data)


 1.9550e-06  1.8705e-01  2.8369e-01  1.3603e-01  3.9322e-01
 2.6202e-06  2.8942e-01  2.2078e-01  2.1835e-01  2.7145e-01
 6.2909e-05  2.3605e-01  2.5595e-01  2.6032e-01  2.4761e-01
 3.2773e-05  2.5476e-01  2.4001e-01  2.6153e-01  2.4366e-01
 1.7269e-06  2.7208e-01  2.2621e-01  2.2959e-01  2.7212e-01
 5.4279e-06  2.6783e-01  2.3155e-01  2.3417e-01  2.6645e-01
[torch.FloatTensor of size 6x5]


 0.6154  0.1026  0.0871  0.0860  0.1088
 0.8568  0.0349  0.0354  0.0341  0.0388
 0.7532  0.0502  0.0671  0.0722  0.0573
 0.9629  0.0080  0.0075  0.0102  0.0113
 0.7437  0.0597  0.0637  0.0643  0.0686
 0.7996  0.0463  0.0571  0.0509  0.0461
[torch.FloatTensor of size 6x5]


 1.0033e-05  2.5807e-01  1.9852e-01  2.2233e-01  3.2107e-01
 8.1800e-05  2.8993e-01  2.3175e-01  2.3274e-01  2.4550e-01
 3.1841e-04  2.5555e-01  2.4486e-01  2.6896e-01  2.3031e-01
 5.5402e-02  3.0194e-01  2.0385e-01  2.6196e-01  1.7685e-01
 8.3182e-06  2.4502e-01  2.4309e-01  2.3316e-01  2.7872e-01
 1.8683e-05  2.4225e-01  2.4921


 9.9994e-01  2.3730e-12  4.9463e-13  1.7382e-12  6.1551e-05
 9.9999e-01  1.6694e-06  6.1522e-06  5.0278e-08  2.1214e-08
 9.9858e-01  1.0469e-05  8.4803e-08  3.2888e-07  1.4061e-03
 9.9448e-01  2.9027e-07  5.1781e-03  3.0297e-08  3.4002e-04
 1.0000e+00  7.2130e-18  1.7452e-14  1.0850e-20  1.0607e-09
 6.8928e-06  1.3388e-10  8.2300e-01  5.2757e-09  1.7700e-01
[torch.FloatTensor of size 6x5]


 9.9857e-01  1.1031e-10  4.7393e-07  1.1965e-12  1.4266e-03
 9.9984e-01  2.1584e-06  1.5279e-04  1.1719e-06  5.4042e-08
 2.0063e-01  7.2871e-01  7.1448e-04  6.9536e-02  4.1363e-04
 9.9842e-01  9.6160e-07  1.5375e-03  4.2060e-08  4.1178e-05
 9.9968e-01  9.9397e-10  1.2354e-06  9.1583e-11  3.1632e-04
 9.9996e-01  1.4355e-07  3.2776e-07  2.4522e-08  4.1891e-05
[torch.FloatTensor of size 6x5]


 7.8628e-01  2.5762e-07  3.2161e-05  1.3298e-08  2.1369e-01
 7.7316e-01  1.0782e-02  2.1481e-01  1.2468e-03  1.5971e-06
 3.0905e-01  5.8795e-01  1.8572e-03  9.5561e-02  5.5737e-03
 9.9897e-01  1.6609e-07  1.0238

In [52]:
ex_signal, _, _ = dataset.fetch()
activations = ctcnet.feature_layer(ex_signal.unsqueeze(1))

In [58]:
activations[0,80:90,0]

Variable containing:
 1093.8724
   -8.1111
   -9.0903
  133.6261
 3143.1812
   -2.4575
   -5.3077
 1472.5194
 1051.2010
  -10.8844
[torch.FloatTensor of size 10]