## ResNet Implementation

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math

# Load Data

In [None]:
drug_fingerprints_fh = 'sample/sample_fingerprints.csv'
drug_targets_fh      = 'sample/sample_targets.csv'
drug_weights_fh      = 'sample/sample_weights.csv'

### Data dimensions

In [None]:
sample_size       = 10000
fingerprint_size  = 1024
fingerprint_width = 32
targets_num       = 420
weights_num       = 420
num_channels      = 1

In [None]:
import re
def populate_data(file_handle,data_matrix, data_size):
    with open(file_handle) as fh:
        j=0
        content = fh.readlines()
        content = [x.strip() for x in content]
        for line in content:
            result = re.split(r'[,\t]\s*',line)
            for i in range(1,data_size+1):
                data_matrix[j][i-1] = np.float32(result[i])
            j = j+1
    print(j)
    fh.close()

In [None]:
drug_fingerprints = []
drug_targets      = []
drug_weights      = []


for i in range(sample_size):
    fingerprint_holder = [0]* fingerprint_size
    drug_fingerprints.append(fingerprint_holder)
    
for i in range(sample_size):
    target_holder = [0]* targets_num
    drug_targets.append(target_holder)

for i in range(sample_size):
    weight_holder = [0]* weights_num
    drug_weights.append(weight_holder)

In [None]:
populate_data(drug_weights_fh, drug_weights, weights_num)
populate_data(drug_targets_fh, drug_targets, targets_num)
populate_data(drug_fingerprints_fh, drug_fingerprints, fingerprint_size)

In [None]:
drug_fingerprints = np.array(drug_fingerprints)
drug_targets      = np.array(drug_targets)
drug_weights      = np.array(drug_weights)

https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py

A **TensorFlow** graph consists of the following parts:

* **Placeholder** variables used for inputting data to the graph.
* **Variables** that are going to be optimized so as to make the convolutional network perform better.
* The mathematical formulas for the convolutional network.
* **Cost function** be used to guide the optimization of the variables.
* **Optimization** method which updates the variables.


# 1. Placeholders

In [None]:
x = tf.placeholder(tf.float32, [None, fingerprint_size],name = "In_Flat_Drug_Fingerprint")

drug_image = tf.reshape(x, [-1, fingerprint_width, fingerprint_width, num_channels], name="Drug_Image_32x32")

y_true = tf.placeholder(tf.float32, [None, targets_num],name='True_Labels')

cross_entropy_weights = tf.placeholder(tf.float32, [None, weights_num],name = "Cross_Entropy_Weights")

# 2. Variables

In [None]:
def new_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05), name="Weights")
def new_biases(length):
    return tf.Variable(tf.constant(0.05, shape=[length]), name="Biases")

# NETWORK ARCHITECTURE

![title](img/ResNetPic.jpg)

# Network Description
*  **conv1** (7x7 conv, 64,/2) -> ***filter_size***=7, ***out_channels***=64, ***stride***=2
*  **pooling layer** ***stride***=2
*  **block1** layers 6x[(3x3 con,64,)] -> 6 conv layers with:    ***filter_size***=3, ***out_channels***=64, ***stride***=1
*  **block2** layers 8x[(3x3 con,128,)] -> 8 conv layers with:   ***filter_size***=3, ***out_channels***=128, ***stride***=1
*  **block3** layers 12x[(3x3 con,256,)] -> 12 conv layers with: ***filter_size***=3, ***out_channels***=265, ***stride***=1 
*  **block4** layers 6x[(3x3 con,512,)] -> 6 conv layers with:   ***filter_size***=3, ***out_channels***=512, ***stride***=1
* **average pooling**
* **fully connected layer**

## Difficulty: Shortcut Connection (FIRST OROGINAL IMPLEMENTATION)
The identity shortcuts can be directly used when the input and output are of the same dimensions (solid line shortcuts on the graph above). When the dimension increase, we have two options:
<br>
* (A) the shortcut still performs identity mapping with extra zero passed for increasing dimensions,
* (B) the shortcut is used to match dimensions (done by 1x1 convolution)
<br>
<br>
For both options, when the shortcuts go across feature maps of two sizes, they are performed with a stride of 2.

<img src="img/better_residue.png" alt="Drawing" style="width: 700px;"/>

<img src="img/two_res_unit.png" alt="Drawing2" style="width:400px;"/>

### Info about Convolution layer API

`tf.nn.conv2d` function from TensorFlow API
<br>
<br>
`tf.nn.conv2d`(
<br>
    input,
 <br>
    filter,
    <br>
    strides,
    <br>
    padding,
    <br>
    use_cudnn_on_gpu=True,
    <br>
    data_format='NHWC',
    <br>
    dilations=[1, 1, 1, 1],
    <br>
    name=None
)
<br>
<br>
Computes a 2-D convolution given 4-D input and filter tensors.
<br>
<br>
Given an input tensor of shape `[batch, in_height, in_width, in_channels]` and a filter / kernel tensor of shape `[filter_height, filter_width, in_channels, out_channels]`, this op performs the following:
<br>
<br>
* Flattens the filter to a 2-D matrix with shape `[filter_height * filter_width * in_channels, output_channels]`.
* Extracts image patches from the input tensor to form a virtual tensor of shape `[batch, out_height, out_width, filter_height * filter_width * in_channels]`.
* For each patch, right-multiplies the filter matrix and the image patch vector.

In [None]:
def new_conv_layer(input,              # previous layer in the network
                   filter_size,        # size of a filer, width=height=size
                   in_channels,        # number of channels in the input layer
                   out_channels,       # number of channels in the output layer, aka number of filers
                   stride = 1):        # strides = [1,stride, stride, 1]
    
    #Shape of the filter weights for convolution
    shape = [filter_size, filter_size, in_channels, out_channels]
    
    #Create new weights = new filters of specified dimensions
    weights = new_weights(shape = shape)
    
    #Create new biases, one for each filter
    biases = new_biases(length = out_channels)
    
    #Create a new TensorFlow operation for convolution.
    layer = tf.nn.conv2d(input = input, filter = weights, strides = [1, stride, stride, 1],
                         padding=('SAME' if stride == 1 else 'VALID'), name = "CONV")
    
    layer = layer + biases
    
    return layer   

### Info about Max Pooling layer API

`tf.nn.max_pool`(
<br>
    value,
    <br>
    ksize,
    <br>
    strides,
    <br>
    padding,
    <br>
    data_format='NHWC',
    <br>
    name=None
)
<br>
* `value`: A 4-D Tensor of the format specified by data_format.
* `ksize`: A list or tuple of 4 ints. The size of the window for each dimension of the input tensor.
* `strides`: A list or tuple of 4 ints. The stride of the sliding window for each dimension of the input tensor.

In [None]:
def new_pooling_layer(input,dim_ksize, stride):
    
    layer = tf.nn.max_pool(value=input,ksize=[1, dim_ksize, dim_ksize, 1],strides=[1, stride, stride, 1],
                           padding='SAME', name = "POOLING_LAYER")
    
    return layer

In [None]:
BATCH_NORM_DECAY = 0.9
BATCH_NORM_EPSILON = 1e-5

In [None]:
def batch_norm_and_RELU(inputs,axis=3, relu = True):
    
    # axis= 3 because input's format is [num_images, img_height, img_width, num_channels]   
    layer = tf.layers.batch_normalization(inputs=inputs,axis=axis,center=True,
                                          scale=True,training=True, fused=True)
    if relu:
        layer = tf.nn.relu(layer) 
    return layer

### ORIGINAL IMPLEMENTATION OF THE RESIDUAL UNIT

In [None]:
def residual_unit(inputs,filter_size,in_channels,out_channels, strides,use_projection=False):
    
    shortcut = inputs
    
    if use_projection:
        kernel = 1
        shortcut = new_conv_layer(input=inputs,filter_size = kernel,in_channels= in_channels,
                                  out_channels = out_channels,stride = 1)
        print(shortcut)
        shortcut = batch_norm_and_RELU(shortcut, relu = False)
    
    inputs = new_conv_layer(input=inputs,filter_size = filter_size,in_channels = in_channels,
                            out_channels = out_channels,stride = 1)
    print(inputs)
    inputs = batch_norm_and_RELU(inputs, relu = True)
    
    inputs = new_conv_layer(input=inputs,filter_size = filter_size,in_channels = in_channels,
                            out_channels = out_channels,stride = 1)
    
    inputs = batch_norm_and_RELU(inputs, relu = False)
    
    return tf.nn.relu(inputs + shortcut)

In [None]:
def block_group(inputs, blocks,filter_size, in_channels,out_channels, strides, name):
    
    # Only the first block per block_group uses projection shortcut and strides.
    inputs = residual_unit(inputs, filter_size, in_channels, out_channels,strides ,use_projection = True)
    
    for _ in range(1, blocks):
        inputs = residual_unit(inputs, filter_size, in_channels, out_channels, 1, use_projection = False)
    
    return tf.identity(inputs, name)

# 3.Architecture of 34-layer residual neural network:
*  **conv1** (7x7 conv, 64,/2) -> ***filter_size***=7, ***out_channels***=64, ***stride***=2
*  **pooling layer** ***stride***=2
*  **block1** layers 6x[(3x3 con,64,)] -> 6 conv layers with:    ***filter_size***=3, ***out_channels***=64, ***stride***=1
*  **block2** layers 8x[(3x3 con,128,)] -> 8 conv layers with:   ***filter_size***=3, ***out_channels***=128, ***stride***=1
*  **block3** layers 12x[(3x3 con,256,)] -> 12 conv layers with: ***filter_size***=3, ***out_channels***=265, ***stride***=1 
*  **block4** layers 6x[(3x3 con,512,)] -> 6 conv layers with:   ***filter_size***=3, ***out_channels***=512, ***stride***=1
* **average pooling**
* **fully connected layer**

## INPUT
**Image** of shape `fingerprint_size` by `fingerprint_size` and `num_channels`

In [None]:
conv1 = new_conv_layer(input = drug_image ,filter_size=7,in_channels =1,out_channels=64, stride=2)
conv1

In [None]:
conv1 = batch_norm_and_RELU(conv1)
conv1

In [None]:
pooling_layer = new_pooling_layer(input= conv1, dim_ksize = 3,stride = 2)
pooling_layer

### PARAMETERS OF THE NETWORK

In [None]:
filter_size_par = 3

block_1_in_channels  = 64
block_1_out_channels = 64
num_blocks_1         = 3

block_2_in_channels  = 64
block_2_out_channels = 128
num_blocks_2         = 4

block_3_in_channels  = 128
block_3_out_channels = 256
num_blocks_3         = 6

block_4_in_channels  = 256
block_4_out_channels = 512
num_blocks_4         = 3

### BLOCK 1
layers 6x[(3x3 con,64,)] -> 6 conv layers with:    ***filter_size***=3, ***out_channels***=64, 
<br>
or 3 times residual unit

In [None]:
block1 = block_group(inputs = pooling_layer, blocks = num_blocks_1,filter_size = filter_size_par ,in_channels = block_1_in_channels,
                     out_channels = block_1_out_channels , strides = 1, name= "BLOCK1")

### BLOCK 2
layers 8x[(3x3 con,128,)] -> 8 conv layers with:    ***filter_size***=3, ***out_channels***=128, 
<br>
or 4 times residual unit

In [None]:
block2 = block_group(inputs = block1, blocks = num_blocks_2,filter_size = filter_size_par ,in_channels = block_2_in_channels,
                     out_channels = block_2_out_channels , strides = 2, name= "BLOCK2")

### BLOCK 3
layers 12 x[(3x3 con,256,)] -> 12 conv layers with:    ***filter_size***=3, ***out_channels***=256, 
<br>
or 6 times residual unit

In [None]:
block3 = block_group(inputs = block2, blocks = num_blocks_3,filter_size = filter_size_par ,in_channels = block_3_in_channels,
                     out_channels = block_3_out_channels , strides = 2, name= "BLOCK3")

### BLOCK 4 
layers 6x[(3x3 con,512,)] -> 6 conv layers with:    ***filter_size***=3, ***out_channels***=512, 
<br>
or  times residual unit

In [None]:
block4 = block_group(inputs = block3, blocks = num_blocks_4,filter_size = filter_size_par ,in_channels = block_4_in_channels,
                     out_channels = block_4_out_channels , strides = 2, name= "BLOCK3")

## MAX POOLING

In [None]:
inputs = tf.layers.average_pooling2d