Skip to content
This repository has been archived by the owner on Jan 13, 2022. It is now read-only.

Commit

Permalink
Pirates vs bioinformaticians
Browse files Browse the repository at this point in the history
  • Loading branch information
tmassingham-ont authored and Semen Trygubenko committed Sep 1, 2017
1 parent 23dae14 commit 53606ee
Show file tree
Hide file tree
Showing 17 changed files with 14,711 additions and 72,747 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Expand Up @@ -140,7 +140,8 @@ add_test(NAME unittest WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/src/test/ COMMAND
add_test(test_events_call scrappie events ${USE_THREADS} ${READSDIR})
add_test(test_raw_call scrappie raw --model raw_r94 ${USE_THREADS} ${READSDIR})
add_test(test_rawrgr_call scrappie raw --model rgr_r94 ${USE_THREADS} ${READSDIR})
add_test(test_rawrgrgr_call scrappie raw --model rgrgr_r95 ${USE_THREADS} ${READSDIR})
add_test(test_rawrgrgr_r94_call scrappie raw --model rgrgr_r94 ${USE_THREADS} ${READSDIR})
add_test(test_rawrgrgr_r95_call scrappie raw --model rgrgr_r95 ${USE_THREADS} ${READSDIR})
add_test(test_licence scrappie licence)
add_test(test_licence scrappie license)
add_test(test_help scrappie help)
Expand Down
2 changes: 1 addition & 1 deletion misc/parse_events.py
Expand Up @@ -57,7 +57,7 @@ def reshape_lstmV(mat):
sys.stdout.write("""#pragma once
#ifndef NANONET_EVENTS_MODEL_H
#define NANONET_EVENTS_MODEL_H
#include "util.h"
#include "../util.h"
""")

""" First LSTM layer
Expand Down
2 changes: 1 addition & 1 deletion misc/parse_raw.py
Expand Up @@ -57,7 +57,7 @@ def cformatV(fh, name, X):
#ifndef NANONET_RAW_MODEL_H
#define NANONET_RAW_MODEL_H
#include <assert.h>
#include "util.h"
#include "../util.h"
""")

""" First LSTM layer
Expand Down
2 changes: 1 addition & 1 deletion misc/parse_rgr.py
Expand Up @@ -59,7 +59,7 @@ def cformatV(fh, name, X):
#ifndef NANONET_RGR_MODEL_H
#define NANONET_RGR_MODEL_H
#include <assert.h>
#include "util.h"
#include "../util.h"
""")

""" Convolution layer
Expand Down
150 changes: 79 additions & 71 deletions misc/parse_rgrgr.py
@@ -1,10 +1,15 @@
#!/usr/bin/env python3
import argparse
import pickle
import math
import numpy as np
import re
import sys

parser = argparse.ArgumentParser()
parser.add_argument('--id', default='' , help='Identifier for model names')
parser.add_argument('model', help='Pickle to read model from')


model_file = sys.argv[1]

Expand Down Expand Up @@ -50,74 +55,77 @@ def cformatV(fh, name, X):
fh.write('const scrappie_matrix {} = &{};\n\n'.format(name, '_' + name))



with open(model_file, 'rb') as fh:
network = pickle.load(fh, encoding='latin1')
assert network.version == 1, "Sloika model must be version 1. Perhaps you need to run Sloika's model_upgrade.py"

sys.stdout.write("""#pragma once
#ifndef NANONET_RGRGR_MODEL_H
#define NANONET_RGRGR_MODEL_H
#include <assert.h>
#include "util.h"
""")

""" Convolution layer
"""

filterW = network.sublayers[0].W.get_value()
nfilter, _ , winlen = filterW.shape
cformatM(sys.stdout, 'conv_rgrgr_W', filterW.reshape(-1, 1), nr = winlen * 4 - 3, nc=nfilter)
cformatV(sys.stdout, 'conv_rgrgr_b', network.sublayers[0].b.get_value().reshape(-1))
sys.stdout.write("const int conv_rgrgr_stride = {};\n".format(network.sublayers[0].stride))
sys.stdout.write("""const size_t _conv_rgrgr_nfilter = {};
const size_t _conv_rgrgr_winlen = {};
""".format(nfilter, winlen))

""" Backward GRU (first layer)
"""
gru1 = network.sublayers[1].sublayers[0]
cformatM(sys.stdout, 'gruB1_rgrgr_iW', gru1.iW.get_value())
cformatM(sys.stdout, 'gruB1_rgrgr_sW', gru1.sW.get_value())
cformatM(sys.stdout, 'gruB1_rgrgr_sW2', gru1.sW2.get_value())
cformatV(sys.stdout, 'gruB1_rgrgr_b', gru1.b.get_value().reshape(-1))

""" Forward GRU (second layer)
"""
gru2 = network.sublayers[2]
cformatM(sys.stdout, 'gruF2_rgrgr_iW', gru2.iW.get_value())
cformatM(sys.stdout, 'gruF2_rgrgr_sW', gru2.sW.get_value())
cformatM(sys.stdout, 'gruF2_rgrgr_sW2', gru2.sW2.get_value())
cformatV(sys.stdout, 'gruF2_rgrgr_b', gru2.b.get_value().reshape(-1))

""" backward GRU(third layer)
"""
gru3 = network.sublayers[3].sublayers[0]
cformatM(sys.stdout, 'gruB3_rgrgr_iW', gru3.iW.get_value())
cformatM(sys.stdout, 'gruB3_rgrgr_sW', gru3.sW.get_value())
cformatM(sys.stdout, 'gruB3_rgrgr_sW2', gru3.sW2.get_value())
cformatV(sys.stdout, 'gruB3_rgrgr_b', gru3.b.get_value().reshape(-1))

""" Forward GRU (fourth layer)
"""
gru4 = network.sublayers[4]
cformatM(sys.stdout, 'gruF4_rgrgr_iW', gru4.iW.get_value())
cformatM(sys.stdout, 'gruF4_rgrgr_sW', gru4.sW.get_value())
cformatM(sys.stdout, 'gruF4_rgrgr_sW2', gru4.sW2.get_value())
cformatV(sys.stdout, 'gruF4_rgrgr_b', gru4.b.get_value().reshape(-1))

""" backward GRU(fifth layer)
"""
gru5 = network.sublayers[5].sublayers[0]
cformatM(sys.stdout, 'gruB5_rgrgr_iW', gru5.iW.get_value())
cformatM(sys.stdout, 'gruB5_rgrgr_sW', gru5.sW.get_value())
cformatM(sys.stdout, 'gruB5_rgrgr_sW2', gru5.sW2.get_value())
cformatV(sys.stdout, 'gruB5_rgrgr_b', gru5.b.get_value().reshape(-1))
""" Softmax layer
"""
nstate = network.sublayers[6].W.get_value().shape[0]
shuffle = np.append(np.arange(nstate - 1) + 1, 0)
cformatM(sys.stdout, 'FF_rgrgr_W', network.sublayers[6].W.get_value()[shuffle])
cformatV(sys.stdout, 'FF_rgrgr_b', network.sublayers[6].b.get_value()[shuffle])

sys.stdout.write('#endif /* NANONET_RGRGR_MODEL_H */')
if __name__ == '__main__':
args = parser.parse_args()
modelid = args.id + '_'

with open(args.model, 'rb') as fh:
network = pickle.load(fh, encoding='latin1')
assert network.version == 1, "Sloika model must be version 1. Perhaps you need to run Sloika's model_upgrade.py"

sys.stdout.write("""#pragma once
#ifndef NANONET_RGRGR_{}MODEL_H
#define NANONET_RGRGR_{}MODEL_H
#include <assert.h>
#include "../util.h"
""".format(modelid.upper(), modelid.upper()))

""" Convolution layer
"""

filterW = network.sublayers[0].W.get_value()
nfilter, _ , winlen = filterW.shape
cformatM(sys.stdout, 'conv_rgrgr_{}W'.format(modelid), filterW.reshape(-1, 1), nr = winlen * 4 - 3, nc=nfilter)
cformatV(sys.stdout, 'conv_rgrgr_{}b'.format(modelid), network.sublayers[0].b.get_value().reshape(-1))
sys.stdout.write("const int conv_rgrgr_{}stride = {};\n".format(modelid, network.sublayers[0].stride))
sys.stdout.write("""const size_t _conv_rgrgr_{}nfilter = {};
const size_t _conv_rgrgr_{}winlen = {};
""".format(modelid, nfilter, modelid, winlen))

""" Backward GRU (first layer)
"""
gru1 = network.sublayers[1].sublayers[0]
cformatM(sys.stdout, 'gruB1_rgrgr_{}iW'.format(modelid), gru1.iW.get_value())
cformatM(sys.stdout, 'gruB1_rgrgr_{}sW'.format(modelid), gru1.sW.get_value())
cformatM(sys.stdout, 'gruB1_rgrgr_{}sW2'.format(modelid), gru1.sW2.get_value())
cformatV(sys.stdout, 'gruB1_rgrgr_{}b'.format(modelid), gru1.b.get_value().reshape(-1))

""" Forward GRU (second layer)
"""
gru2 = network.sublayers[2]
cformatM(sys.stdout, 'gruF2_rgrgr_{}iW'.format(modelid), gru2.iW.get_value())
cformatM(sys.stdout, 'gruF2_rgrgr_{}sW'.format(modelid), gru2.sW.get_value())
cformatM(sys.stdout, 'gruF2_rgrgr_{}sW2'.format(modelid), gru2.sW2.get_value())
cformatV(sys.stdout, 'gruF2_rgrgr_{}b'.format(modelid), gru2.b.get_value().reshape(-1))

""" backward GRU(third layer)
"""
gru3 = network.sublayers[3].sublayers[0]
cformatM(sys.stdout, 'gruB3_rgrgr_{}iW'.format(modelid), gru3.iW.get_value())
cformatM(sys.stdout, 'gruB3_rgrgr_{}sW'.format(modelid), gru3.sW.get_value())
cformatM(sys.stdout, 'gruB3_rgrgr_{}sW2'.format(modelid), gru3.sW2.get_value())
cformatV(sys.stdout, 'gruB3_rgrgr_{}b'.format(modelid), gru3.b.get_value().reshape(-1))

""" Forward GRU (fourth layer)
"""
gru4 = network.sublayers[4]
cformatM(sys.stdout, 'gruF4_rgrgr_{}iW'.format(modelid), gru4.iW.get_value())
cformatM(sys.stdout, 'gruF4_rgrgr_{}sW'.format(modelid), gru4.sW.get_value())
cformatM(sys.stdout, 'gruF4_rgrgr_{}sW2'.format(modelid), gru4.sW2.get_value())
cformatV(sys.stdout, 'gruF4_rgrgr_{}b'.format(modelid), gru4.b.get_value().reshape(-1))

""" backward GRU(fifth layer)
"""
gru5 = network.sublayers[5].sublayers[0]
cformatM(sys.stdout, 'gruB5_rgrgr_{}iW'.format(modelid), gru5.iW.get_value())
cformatM(sys.stdout, 'gruB5_rgrgr_{}sW'.format(modelid), gru5.sW.get_value())
cformatM(sys.stdout, 'gruB5_rgrgr_{}sW2'.format(modelid), gru5.sW2.get_value())
cformatV(sys.stdout, 'gruB5_rgrgr_{}b'.format(modelid), gru5.b.get_value().reshape(-1))
""" Softmax layer
"""
nstate = network.sublayers[6].W.get_value().shape[0]
shuffle = np.append(np.arange(nstate - 1) + 1, 0)
cformatM(sys.stdout, 'FF_rgrgr_{}W'.format(modelid), network.sublayers[6].W.get_value()[shuffle])
cformatV(sys.stdout, 'FF_rgrgr_{}b'.format(modelid), network.sublayers[6].b.get_value()[shuffle])

sys.stdout.write('#endif /* NANONET_RGRGR_{}_MODEL_H */'.format(modelid.upper()))
2 changes: 1 addition & 1 deletion src/nanonet_events.h → src/models/nanonet_events.h
@@ -1,7 +1,7 @@
#pragma once
#ifndef NANONET_EVENTS_MODEL_H
# define NANONET_EVENTS_MODEL_H
# include "util.h"
# include "../util.h"
float __lstmF1_iW[] = {
0.793625, 0.022582, -0.0807776, 0.038936, -0.91329, 0.000772198, 0.00299331,
-0.0530244, -0.0643909, -0.0102255, 0.0238301, -0.0116242,
Expand Down

0 comments on commit 53606ee

Please sign in to comment.