<a href="https://colab.research.google.com/github/philarnold4242/test_Python/blob/master/ConvNets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Chapter 4 - Convolutional Neural Nets<br>
https://www.tensorflow.org/tutorials/deep_cnn

## **Connect to Google Drive**<br>
More infors can be found here:<br>
https://medium.com/deep-learning-turkey/google-colab-free-gpu-tutorial-e113627b9f5d

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

In [0]:
## Mount Google Drive
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
!ls drive

## **Install modules**

In [0]:
# Pytorch: https://jovianlin.io/pytorch-with-gpu-in-google-colab/
!pip3 install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl 
!pip3 install torchvision

Collecting torch==0.3.0.post4 from http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl
  Downloading http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl (592.3MB)
[K    93% |█████████████████████████████▉  | 552.4MB 25.4MB/s eta 0:00:02

[K    100% |████████████████████████████████| 592.3MB 66.4MB/s 
Installing collected packages: torch
Successfully installed torch-0.3.0.post4
Collecting torchvision
  Downloading torchvision-0.2.0-py2.py3-none-any.whl (48kB)
[K    100% |████████████████████████████████| 51kB 1.8MB/s 
Collecting pillow>=4.1.1 (from torchvision)
  Downloading Pillow-5.0.0-cp36-cp36m-manylinux1_x86_64.whl (5.9MB)
[K    100% |████████████████████████████████| 5.9MB 238kB/s 
Installing collected packages: pillow, torchvision
  Found existing installation: Pillow 4.0.0
    Uninstalling Pillow-4.0.0:
      Successfully uninstalled Pillow-4.0.0
Successfully installed pillow-5.0.0 torchvision-0.2.0


In [0]:
# Sanity check
import torch

print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

0
<torch.cuda.device object at 0x7f7c420fa780>
1
Tesla K80


## Uncertainties<br>
Helper functions

In [0]:
# Pseudocount - needs thinking.
pc = 1e-10

def softmax(x):
    """ Multinomial logisitc fuction. """
    mmax = np.amax(x, axis=1).reshape(x.shape[0], 1)
    ex = np.exp(x - mmax)
    ex_sum = ex.sum(axis=1).reshape(ex.shape[0],1)
    return ex / ex_sum
    

def var_ratio(x):
    """ How spread is the distribution around the mode?""" 
    ratios = np.zeros(shape=(x.shape[1]))
    
    # Loop over test examples
    for n in range(x.shape[1]):
        
        # Get predicted class
        samples_n = np.argmax(x[:, n], axis=1)
        
        # Get most abundant class
        count = np.argmax(np.bincount(samples_n))
        
        # Return frequency of class
        ratios[n] = np.sum(samples_n==count) / x.shape[0]

    return ratios


def pred_ent(x):
    """ Average amount of information contained in posterior predictive distribution. """
    
    N = x.shape[1] # Number of test cases
    T = x.shape[0] # Number of samples per test case
    C = 10 # number of classes
    
    entropies = np.zeros(shape=(x.shape[1]))
    
    # Loop over test examples
    for n in range(N):
        
        # Approx posterior predictive distribution
        ppd = np.mean(x[:,n,:] + pc, axis=0)
        
        # Calculate predictive entropy
        entropies[n] = - np.sum(np.multiply(ppd, np.log(ppd)))
    
    return entropies


def mut_inf(x):
    """ MI between prediction y_pred and posterior distribution for the model parameters. """
    
    N = x.shape[1] # Number of test cases
    T = x.shape[0] # Number of samples per test case
    C = 10 # number of classes
        
    mis = np.zeros(shape=(x.shape[1]))
    
    # Loop over test examples
    for n in range(N):
        
        # Approx posterior predictive distribution
        ppd = np.mean(x[:,n,:] + pc, axis=0)
        
        # Calculate predictive entropy
        pe = - np.sum(np.multiply(ppd, np.log(ppd)))
        
        # Calculate conditional entropy        
        p = x[:,n,:] + pc
        logp = np.log(p)
        ce = np.sum(np.multiply(p,logp))
        
        mis[n] = 1/T * pe + ce
    
    return mis

## **Let's get started**

### Set up modules

In [0]:
import os
import sys

import numpy as np
np.set_printoptions(precision=5)

import pandas as pd

%matplotlib notebook
import matplotlib.pyplot as plt

import pickle
import urllib
import tarfile

import tensorflow as tf
print(tf.__version__)

1.6.0


In [0]:
## Check GPU
with tf.device('/gpu:0'):
    a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
    b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
    c = tf.matmul(a, b)

with tf.Session() as sess:
    print (sess.run(c))

[[22. 28.]
 [49. 64.]]


### Random Forest Baseline
Details can be found here:<br>
https://www.kaggle.com/atorin/mnist-digit-recognition-with-random-forests

### ConvNet - The hard way

In [0]:
## Load MNIST
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

In [0]:
## Helper functions
def weight_variable(shape):
    """ Specifies the weight for either fully-connected or conv layers."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    """ Bias elements. """
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, W):
    """ This specifies a full convolution with an output the same size
        as the input x. 
        [batch, height, width, channels] """
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    """ ksize: A 1-D int Tensor of 4 elements. The size of the window for each 
               dimension of the input tensor.
        strides: A 1-D int Tensor of 4 elements. The stride of the sliding window for 
                 each dimension of the input tensor. """
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [0]:
## Architecture parameters
pic_res = 28
n_channel = 1

# Placeholders to feed stuff to network
x = tf.placeholder(tf.float32, shape=[None, pic_res*pic_res])
y_true = tf.placeholder(tf.float32, shape=[None, 10])
keep_prob = tf.placeholder(tf.float32)

# Uncertainties
bayesian_keep_prob = 0.9

In [0]:
## The net
# Remember, the input image is a 1D tensor of length 784.
mnist.train.images[1].shape

# To apply the layer, we first reshape x to a 4d tensor, with the second and 
# third dimensions corresponding to image width and height, and the final dimension 
# corresponding to the number of color channels.
x_image = tf.reshape(x, [-1, pic_res, pic_res, n_channel])

# First conv layer
# The convolution will compute 32 features for each 3x3 patch. Its weight tensor will have a 
# shape of [3, 3, 1, 32]. The first two dimensions are the patch size, the next is the number 
# of input channels, and the last is the number of output channels. 
W_conv1 = weight_variable([3, 3, n_channel, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
d_conv1 = tf.nn.dropout(h_conv1, keep_prob=keep_prob)
h_pool1 = max_pool_2x2(d_conv1)

# Second conv layer
W_conv2 = weight_variable([3, 3, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
d_conv2 = tf.nn.dropout(h_conv2, keep_prob=keep_prob)
h_pool2 = max_pool_2x2(d_conv2)

# Dense layer 
pic_res4_sq = int(pic_res/4)*int(pic_res/4)
W_fc1 = weight_variable([pic_res4_sq * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, pic_res4_sq*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# Dropout
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob=keep_prob)

# Output
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])

h_fc2 = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob=keep_prob)
y_conv = h_fc2_drop

In [0]:
# Objective function
y_pred = tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_conv)
cross_entropy = tf.reduce_mean(y_pred)

# Optimizer
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

# Evaluation
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_true, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [0]:
## Train - MNIST
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for i in range(int(1e4)+1):
    batch = mnist.train.next_batch(64)

    # Training
    _, loss = sess.run([train_step, cross_entropy], 
                       feed_dict={x: batch[0], y_true: batch[1], 
                                  keep_prob: float(bayesian_keep_prob)})
    
    # Status report
    if i % 1000 == 0:
        train_acc = sess.run(accuracy, feed_dict={x: batch[0], 
                                                  y_true: batch[1], 
                                                  keep_prob: float(1.0)})
        
        test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images,
                                                  y_true: mnist.test.labels, 
                                                  keep_prob: 1.0})
        info = [str(tx) for tx in [i, loss, train_acc, test_acc] ]
        print('\t'.join(info))

0	4.5360656	0.078125	0.0633
1000	0.30717698	0.984375	0.9688
2000	0.23724404	0.96875	0.9768
3000	0.15531254	0.96875	0.9846
4000	0.10624134	1.0	0.9861
5000	0.08902804	1.0	0.988
6000	0.15384579	0.984375	0.9883
7000	0.07657587	1.0	0.9907
8000	0.029367274	1.0	0.9908
9000	0.046900593	1.0	0.9909
10000	0.07492903	1.0	0.991


**Uncertainties from stochastic forward passes**

In [0]:
## Stochastic forward passes
N = 100
samples = np.zeros( shape=(N, mnist.test.images.shape[0], 10))

for n in range(N):
    logits = sess.run(y_conv, feed_dict={x: mnist.test.images, 
                                         y_true: mnist.test.labels,
                                         keep_prob: bayesian_keep_prob})
    samples[n] = softmax(logits)

In [0]:
# Variation ratio
vr = var_ratio(samples)

# Predictive entropy
pe = pred_ent(samples)

# Mutual information
mi = mut_inf(samples)

In [0]:
with open('cnn.res', 'w') as myf:
    
    # Loop over test examples
    for n in range(mnist.test.labels.shape[0]):

        ypred = np.argmax(np.mean(samples[:,n,:], axis=0))
        ytrue = np.argmax(mnist.test.labels[n])

        res = [str(tx) for tx in [n, ytrue, ypred, vr[n], pe[n], mi[n], int(ypred==ytrue)]]
        print("\t".join(res), file=myf)

### ConvNet - with Keras<br>
You can find lots of Keras examples here:<br>
https://github.com/keras-team/keras/tree/master/examples

In [0]:
from keras import layers
from keras import models
from keras.utils import to_categorical

from keras.datasets import mnist

Using TensorFlow backend.


In [0]:
## Load MNIST
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [0]:
## Set up ConvNet
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# now: model.output_shape == (None, 64, 7, 7)
model.add(layers.Flatten())
# now: model.output_shape == (None, 64*7*7)

model.add(layers.Dense(units=64, activation='relu'))
model.add(layers.Dense(units=10, activation='softmax'))

In [0]:
## Train
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', 
              metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=5, batch_size=64)

Epoch 1/5
Epoch 2/5
Epoch 3/5

Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fa0c7ea35c0>

In [0]:
## Evaluation
test_loss, test_acc = model.evaluate(test_images, test_labels)
test_acc



0.9899