In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import numpy as np

#
# image dimensions
#

img_height = 32
img_width = 32
img_channels = 1


In [2]:
drug_fingerprints_fh = 'sample/sample_fingerprints.csv'
drug_targets_fh      = 'sample/sample_targets.csv'
drug_weights_fh      = 'sample/sample_weights.csv'

In [3]:
sample_size       = 10000
fingerprint_size  = 1024
fingerprint_width = 32
targets_num       = 420
weights_num       = 420
num_channels      = 1

In [4]:
import re
def populate_data(file_handle,data_matrix, data_size):
    with open(file_handle) as fh:
        j=0
        content = fh.readlines()
        content = [x.strip() for x in content]
        for line in content:
            result = re.split(r'[,\t]\s*',line)
            for i in range(1,data_size+1):
                data_matrix[j][i-1] = np.float32(result[i])
            j = j+1
    print(j)
    fh.close()

In [5]:
drug_fingerprints = []
drug_targets      = []
drug_weights      = []


for i in range(sample_size):
    fingerprint_holder = [0]* fingerprint_size
    drug_fingerprints.append(fingerprint_holder)
    
for i in range(sample_size):
    target_holder = [0]* targets_num
    drug_targets.append(target_holder)

for i in range(sample_size):
    weight_holder = [0]* weights_num
    drug_weights.append(weight_holder)

In [6]:
populate_data(drug_weights_fh, drug_weights, weights_num)
populate_data(drug_targets_fh, drug_targets, targets_num)
populate_data(drug_fingerprints_fh, drug_fingerprints, fingerprint_size)

10000
10000
10000


In [7]:
drug_fingerprints = np.array(drug_fingerprints)
drug_targets      = np.array(drug_targets)
drug_weights      = np.array(drug_weights)

## PLACEHOLDERS

In [9]:
x = tf.placeholder(tf.float32, [None, fingerprint_size],name = "In_Flat_Drug_Fingerprint")

drug_image = tf.reshape(x, [-1, fingerprint_width, fingerprint_width, num_channels], name="Drug_Image_32x32")

y_true = tf.placeholder(tf.float32, [None, targets_num],name='True_Labels')

cross_entropy_weights = tf.placeholder(tf.float32, [None, weights_num],name = "Cross_Entropy_Weights")

In [10]:
# Data is transformed so that number of channels is first, this boost performance on GPU.
# from shape = [number_img, hight, wigth, channels] to [number_img, channels ,height, wigth] 

In [55]:
inputs = tf.transpose(drug_image, [0, 3, 1, 2])

In [56]:
inputs

<tf.Tensor 'transpose_2:0' shape=(?, 1, 32, 32) dtype=float32>

## FUNCTIONS

In [65]:
def residual_block(inputs, filters, is_training, strides,
                   use_projection=False, data_format='channels_first'):
    shortcut = inputs
    print("Residue unit:")
    if use_projection:
        shortcut = conv2d_fixed_padding(
        inputs=inputs, filters=filters, kernel_size=1, strides=strides,
        data_format=data_format)
        print("FIRST IN THE BLOCK")
    shortcut = batch_norm_relu(shortcut, is_training, relu=False,
                               data_format=data_format)
    print("Shortcut:")
    print(shortcut)
    print("CONV 1")
    inputs = conv2d_fixed_padding(
      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
      data_format=data_format)

    print(inputs)
    print("\n")
    inputs = batch_norm_relu(inputs, is_training, data_format=data_format)
    print("CONV 2")
    
    inputs = conv2d_fixed_padding(
      inputs=inputs, filters=filters, kernel_size=3, strides=1,
      data_format=data_format)
    print(inputs)
    print("\n") 
    inputs = batch_norm_relu(inputs, is_training, relu=False, init_zero=True,
                           data_format=data_format)
    
    print("Added Shortcut: ")
    sum_with_shortcut = inputs + shortcut
    print(sum_with_shortcut)
    print("\n")

    return tf.nn.relu( sum_with_shortcut )


In [39]:

def block_group(inputs, filters, blocks, strides, is_training, name,
                data_format='channels_first'):
    inputs = residual_block(inputs, filters, is_training, strides,
                    use_projection=True, data_format=data_format)
    for _ in range(1, blocks):
        inputs = residual_block(inputs, filters, is_training, 1,
                      data_format=data_format)

    return tf.identity(inputs, name)

In [15]:
def fixed_padding(inputs, kernel_size, data_format):
  """Pads the input along the spatial dimensions independently of input size.
  Args:
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
    kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
                 Should be a positive integer.
    data_format: The input format ('channels_last' or 'channels_first').
  Returns:
    A tensor with the same format as the input with the data either intact
    (if kernel_size == 1) or padded (if kernel_size > 1).
  """
  pad_total = kernel_size - 1
  pad_beg = pad_total // 2
  pad_end = pad_total - pad_beg

  if data_format == 'channels_first':
    padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
                                    [pad_beg, pad_end], [pad_beg, pad_end]])
  else:
    padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
                                    [pad_beg, pad_end], [0, 0]])
  return padded_inputs

In [105]:
def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
  """Strided 2-D convolution with explicit padding."""
  # The padding is consistent and is based only on `kernel_size`, not on the
  # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
#  if strides > 1:
#    inputs = fixed_padding(inputs, kernel_size, data_format)

  return tf.layers.conv2d(
      inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides,
      padding=('SAME' if strides == 1 else 'VALID'), use_bias=False,
      kernel_initializer=tf.variance_scaling_initializer(),
data_format=data_format)

In [66]:
BATCH_NORM_DECAY = 0.997
BATCH_NORM_EPSILON = 1e-5

In [20]:
def batch_norm_relu(inputs, is_training, relu=True, init_zero=False,
                    data_format='channels_first'):
  """Performs a batch normalization followed by a ReLU.
  Args:
    inputs: `Tensor` of shape `[batch, channels, ...]`.
    is_training: `bool` for whether the model is training.
    relu: `bool` if False, omits the ReLU operation.
    init_zero: `bool` if True, initializes scale parameter of batch
        normalization with 0 instead of 1 (default).
    data_format: `str` either "channels_first" for `[batch, channels, height,
        width]` or "channels_last for `[batch, height, width, channels]`.
  Returns:
    A normalized `Tensor` with the same `data_format`.
  """
  if init_zero:
    gamma_initializer = tf.zeros_initializer()
  else:
    gamma_initializer = tf.ones_initializer()

  if data_format == 'channels_first':
    axis = 1
  else:
    axis = 3

  inputs = tf.layers.batch_normalization(
      inputs=inputs,
      axis=axis,
      momentum=BATCH_NORM_DECAY,
      epsilon=BATCH_NORM_EPSILON,
      center=True,
      scale=True,
      training=is_training,
      fused=True,
      gamma_initializer=gamma_initializer)

  if relu:
    inputs = tf.nn.relu(inputs)
  return inputs


In [13]:
data_format = 'channels_first'

### First convolution

In [106]:
inputs = tf.transpose(drug_image, [0, 3, 1, 2])

In [107]:
inputs = conv2d_fixed_padding(
          inputs=inputs, filters= 64 , kernel_size= 7,
          strides= 2 , data_format=data_format)

In [108]:
inputs 

<tf.Tensor 'conv2d_78/Conv2D:0' shape=(?, 64, 16, 16) dtype=float32>

In [109]:
inputs = tf.identity(inputs, 'initial_conv')
inputs

<tf.Tensor 'initial_conv_6:0' shape=(?, 64, 16, 16) dtype=float32>

In [110]:
batch_norm_relu(inputs, is_training=True, data_format='channels_first')
inputs

<tf.Tensor 'initial_conv_6:0' shape=(?, 64, 16, 16) dtype=float32>

In [111]:
inputs = tf.layers.max_pooling2d(
            inputs=inputs, pool_size=3,
            strides=2, padding='SAME',
            data_format='channels_first')

In [112]:
inputs

<tf.Tensor 'max_pooling2d_6/MaxPool:0' shape=(?, 64, 8, 8) dtype=float32>

In [113]:
inputs = tf.identity(inputs, name='initial_max_pool')
inputs

<tf.Tensor 'initial_max_pool_5:0' shape=(?, 64, 8, 8) dtype=float32>

In [114]:
 inputs = block_group(
        inputs=inputs, filters=64, blocks=3,
        strides=1, is_training=True, name='block_group1',
data_format=data_format)

Residue unit:
FIRST IN THE BLOCK
Shortcut:
Tensor("batch_normalization_85/FusedBatchNorm:0", shape=(?, 64, 8, 8), dtype=float32)
CONV 1
Tensor("conv2d_80/Conv2D:0", shape=(?, 64, 8, 8), dtype=float32)


CONV 2
Tensor("conv2d_81/Conv2D:0", shape=(?, 64, 8, 8), dtype=float32)


Added Shortcut: 
Tensor("add_30:0", shape=(?, 64, 8, 8), dtype=float32)


Residue unit:
Shortcut:
Tensor("batch_normalization_88/FusedBatchNorm:0", shape=(?, 64, 8, 8), dtype=float32)
CONV 1
Tensor("conv2d_82/Conv2D:0", shape=(?, 64, 8, 8), dtype=float32)


CONV 2
Tensor("conv2d_83/Conv2D:0", shape=(?, 64, 8, 8), dtype=float32)


Added Shortcut: 
Tensor("add_31:0", shape=(?, 64, 8, 8), dtype=float32)


Residue unit:
Shortcut:
Tensor("batch_normalization_91/FusedBatchNorm:0", shape=(?, 64, 8, 8), dtype=float32)
CONV 1
Tensor("conv2d_84/Conv2D:0", shape=(?, 64, 8, 8), dtype=float32)


CONV 2
Tensor("conv2d_85/Conv2D:0", shape=(?, 64, 8, 8), dtype=float32)


Added Shortcut: 
Tensor("add_32:0", shape=(?, 64, 8, 8), dt

In [115]:
 inputs

<tf.Tensor 'block_group1_4:0' shape=(?, 64, 8, 8) dtype=float32>

In [116]:
shortcut = conv2d_fixed_padding(
        inputs=inputs, filters=128, kernel_size=1, strides=2,
        data_format=data_format)

In [117]:
shortcut

<tf.Tensor 'conv2d_86/Conv2D:0' shape=(?, 128, 4, 4) dtype=float32>

In [118]:
 inputs = block_group(
        inputs=inputs, filters=128, blocks=4,
        strides=2, is_training=True, name='block_group2',
data_format=data_format)

Residue unit:
FIRST IN THE BLOCK
Shortcut:
Tensor("batch_normalization_94/FusedBatchNorm:0", shape=(?, 128, 4, 4), dtype=float32)
CONV 1
Tensor("conv2d_88/Conv2D:0", shape=(?, 128, 4, 4), dtype=float32)


CONV 2
Tensor("conv2d_89/Conv2D:0", shape=(?, 128, 4, 4), dtype=float32)


Added Shortcut: 
Tensor("add_33:0", shape=(?, 128, 4, 4), dtype=float32)


Residue unit:
Shortcut:
Tensor("batch_normalization_97/FusedBatchNorm:0", shape=(?, 128, 4, 4), dtype=float32)
CONV 1
Tensor("conv2d_90/Conv2D:0", shape=(?, 128, 4, 4), dtype=float32)


CONV 2
Tensor("conv2d_91/Conv2D:0", shape=(?, 128, 4, 4), dtype=float32)


Added Shortcut: 
Tensor("add_34:0", shape=(?, 128, 4, 4), dtype=float32)


Residue unit:
Shortcut:
Tensor("batch_normalization_100/FusedBatchNorm:0", shape=(?, 128, 4, 4), dtype=float32)
CONV 1
Tensor("conv2d_92/Conv2D:0", shape=(?, 128, 4, 4), dtype=float32)


CONV 2
Tensor("conv2d_93/Conv2D:0", shape=(?, 128, 4, 4), dtype=float32)


Added Shortcut: 
Tensor("add_35:0", shape=(?, 1

In [119]:
is_training = True

In [120]:
inputs = block_group(
        inputs=inputs, filters=256, blocks=6,
        strides=2, is_training=is_training, name='block_group3',
        data_format=data_format)
inputs = block_group(
        inputs=inputs, filters=512, blocks=3,
        strides=2, is_training=is_training, name='block_group4',
data_format=data_format)

Residue unit:
FIRST IN THE BLOCK
Shortcut:
Tensor("batch_normalization_106/FusedBatchNorm:0", shape=(?, 256, 2, 2), dtype=float32)
CONV 1
Tensor("conv2d_97/Conv2D:0", shape=(?, 256, 2, 2), dtype=float32)


CONV 2
Tensor("conv2d_98/Conv2D:0", shape=(?, 256, 2, 2), dtype=float32)


Added Shortcut: 
Tensor("add_37:0", shape=(?, 256, 2, 2), dtype=float32)


Residue unit:
Shortcut:
Tensor("batch_normalization_109/FusedBatchNorm:0", shape=(?, 256, 2, 2), dtype=float32)
CONV 1
Tensor("conv2d_99/Conv2D:0", shape=(?, 256, 2, 2), dtype=float32)


CONV 2
Tensor("conv2d_100/Conv2D:0", shape=(?, 256, 2, 2), dtype=float32)


Added Shortcut: 
Tensor("add_38:0", shape=(?, 256, 2, 2), dtype=float32)


Residue unit:
Shortcut:
Tensor("batch_normalization_112/FusedBatchNorm:0", shape=(?, 256, 2, 2), dtype=float32)
CONV 1
Tensor("conv2d_101/Conv2D:0", shape=(?, 256, 2, 2), dtype=float32)


CONV 2
Tensor("conv2d_102/Conv2D:0", shape=(?, 256, 2, 2), dtype=float32)


Added Shortcut: 
Tensor("add_39:0", shape=

In [121]:
inputs

<tf.Tensor 'block_group4_1:0' shape=(?, 512, 1, 1) dtype=float32>

In [123]:
pool_size = (1, 1)
inputs = tf.layers.average_pooling2d(
    inputs=inputs, pool_size=pool_size, strides=1, padding='VALID',
    data_format=data_format)

In [124]:
inputs = tf.identity(inputs, 'final_avg_pool')
inputs = tf.reshape(inputs, [-1, 512])
inputs = tf.layers.dense(inputs=inputs,units=420,kernel_initializer=tf.random_normal_initializer(stddev=.01))
inputs = tf.identity(inputs, 'final_dense')
inputs

<tf.Tensor 'final_dense:0' shape=(?, 420) dtype=float32>

In [125]:
t = tf.constant([[1, 2, 3], [4, 5, 6]])
paddings = tf.constant([[1, 1,], [2, 2]])


In [126]:
t

<tf.Tensor 'Const:0' shape=(2, 3) dtype=int32>

In [127]:
paddings

<tf.Tensor 'Const_1:0' shape=(2, 2) dtype=int32>

In [128]:
tf.pad(t, paddings, "CONSTANT") 

<tf.Tensor 'Pad_15:0' shape=(4, 7) dtype=int32>