# RCNN model
Scripts for setting up our RCNN model using tfomics (https://github.com/p-koo/tfomics)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, accuracy_score, roc_auc_score
import sys
import h5py
import conutils

from __future__ import print_function 
import os, sys
import numpy as np
from six.moves import cPickle
from collections import OrderedDict

import tensorflow as tf

sys.path.append('../Tensor/tfomics')
from tfomics import neuralnetwork as nn
from tfomics import utils, learn

# import models
from model_zoo import fourthplace_connectomics_model
from model_zoo import simple_connectomics_model, simple_connectomics_model2
from model_zoo import residual_connectomics_model, residual_connectomics_model2

%matplotlib inline
%load_ext autoreload
%autoreload

In [None]:
# Load data -- from https://www.kaggle.com/c/connectomics/data
#
filename = '../Tensor/kaggle_connect_data/normal_dataset.hdf5'
group_name = ['normal_data']
dataset = h5py.File(filename,'r')
%time F_1 = np.array(dataset['/'+group_name[0]+'/F_1'])
scores_1 = np.array(dataset['/'+group_name[0]+'/scores_1'])
F_2 = np.array(dataset['/'+group_name[0]+'/F_2'])
scores_2 = np.array(dataset['/'+group_name[0]+'/scores_2'])
F_3 = np.array(dataset['/'+group_name[0]+'/F_3'])
scores_3 = np.array(dataset['/'+group_name[0]+'/scores_3'])
F_4 = np.array(dataset['/'+group_name[0]+'/F_4'])
scores_4 = np.array(dataset['/'+group_name[0]+'/scores_4'])

In [None]:
# Load network positions for removing light scattering effects
#
pos = 'D:/Dropbox/Tensor/kaggle_connect_data/normal-1/networkPositions_normal-1.txt'
pos_1 = np.loadtxt(pos,delimiter=',')
F_1ls = conutils.unscatter(F_1.T,pos_1)

pos = 'D:/Dropbox/Tensor/kaggle_connect_data/normal-2/networkPositions_normal-2.txt'
pos_2 = np.loadtxt(pos,delimiter=',')
F_2ls = conutils.unscatter(F_2.T,pos_2)

pos = 'D:/Dropbox/Tensor/kaggle_connect_data/normal-3/networkPositions_normal-3.txt'
pos_3 = np.loadtxt(pos,delimiter=',')
F_3ls = conutils.unscatter(F_3.T,pos_3)

pos = 'D:/Dropbox/Tensor/kaggle_connect_data/normal-4/networkPositions_normal-4.txt'
pos_4 = np.loadtxt(pos,delimiter=',')
F_4ls = conutils.unscatter(F_4.T,pos_4)

In [None]:
# Downsample signals
#
ds_1 = conutils.roma_ds(F_1ls)
ds_2 = conutils.roma_ds(F_2ls)
ds_3 = conutils.roma_ds(F_3ls)
ds_valid = conutils.roma_ds(F_4ls)

In [None]:
# Z-score signals
vs_1 = conutils.standardize_rows(ds_1)
vs_2 = conutils.standardize_rows(ds_2)
vs_3 = conutils.standardize_rows(ds_3)
vs_valid = conutils.standardize_rows(ds_valid)

Now prep the data into 330 sample chunks

In [None]:
dtf, ltf = conutils.pairwise_prep_tuple((vs_1,vs_2,vs_3), (scores_1,scores_2,scores_3))

OK, now we can set up our network layers using tfomics

In [None]:
# Separate data into training and cross-validation sets
#
inds = np.random.choice(dtf.shape[0],replace=False,size=dtf.shape[0])
dtf = dtf[inds,:,:,:]
ltf = ltf[inds]

crossval = dtf.shape[0]//4
dtf_crossval = dtf[:crossval,:,:,:]
ltf_crossval = ltf[:crossval,:]
dtf = dtf[crossval:,:,:,:]
ltf = ltf[crossval:,:]

In [None]:
X_train = dtf
y_train = ltf
X_valid = dtf_crossval
y_valid = ltf_crossval

In [None]:
# get shapes
num_data, height, width, dim = X_train.shape
input_shape=[None, height, width, dim]
num_labels = y_train.shape[1]  

# load model
net, placeholders, optimization = residual_connectomics_model2.model(input_shape, num_labels)

# build neural network class
nnmodel = nn.NeuralNet(net, placeholders)
nnmodel.inspect_layers()

data_path = './'

# set output file paths
results_path = utils.make_directory(data_path, 'results')
output_name = 'dataset1_residual2'
filepath = os.path.join(results_path, output_name)

# compile neural trainer
nntrainer = nn.NeuralTrainer(nnmodel, optimization, save='best', filepath=filepath)

In [None]:
# Train
#
train = {'inputs': X_train, 'targets': y_train, 'keep_prob_conv': 0.8, 'keep_prob_dense': 0.5, 'is_training': True}
valid = {'inputs': X_valid, 'targets': y_valid, 'keep_prob_conv': 1.0, 'keep_prob_dense': 1.0, 'is_training': False}
data = {'train': train, 'valid': valid}
learn.train_minibatch(nntrainer, data, batch_size=100, num_epochs=200, 
                    patience=20, verbose=2, shuffle=True)

In [None]:
val_dat = vs_valid
val_lbl = scores_4
true_lbl = np.reshape(scores_4,(1e6,1))

In [None]:
# Evaluate model on validation data
#
pred_lbl =  conutils.valid_eval_tfomics(nntrainer,val_dat)

In [None]:
# Get ROC-AUC metric
fpr, tpr, thresholds = roc_curve(true_lbl, pred_lbl)
wrk = auc(fpr, tpr)
print(wrk)