# Multi-View CNN

An implementation of the Multi-View CNN which learns to classify shapes according to their category.

** Multi-view Convolutional Neural Networks for 3D Shape Recognition **, Hang Su, Subhransu Maji, Evangelos Kalogerakis, Erik Learned-Miller, 2015. 
[Paper](http://vis-www.cs.umass.edu/mvcnn/docs/su15mvcnn.pdf)

In [1]:
import sys
import math
import time
import re
import numpy as np
import tensorflow as tf
from datetime import datetime

git_path = '/Users/optas/Documents/Git_Repos/'
sys.path.insert(0, git_path)

from deep_tensor import autograph, print_status

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
%%javascript
IPython.OutputArea.auto_scroll_threshold = 1000

<IPython.core.display.Javascript object>

In [3]:
IMG_SIZE = (224,224)
BATCH_SIZE = 1
PART_VIEWS = 80
CHANNELS = 1
THREADS = 5

GPUS = []  # Empty list implies CPU training.

DATA_PATH = '/Users/optas/DATA/Shapes/Model_Net_10/Views/Phong/'
LOG_PATH = '/Users/optas/DATA/Neural_Nets/Train_Log/MVCNN/M10/Phong/'
MODEL_PATH = '/Users/optas/DATA/Neural_Nets/Models/MVCNN/M10/Phong/'

QUEUE_EPOCH_FRACTION = 0.1
NUM_EPOCHS_PER_DECAY = 8          # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1  # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.001     # Initial learning rate.

## Setup The Model

In [4]:
def compute_inferenceA(in_image):
    
    layer = g.conv2d(in_image, filters=96, field_size=7, stride=2, padding='SAME', name="conv1", init_bias=0.0)\
                        .relu()\
                        .maxpool(kernel=(3,3), stride=(2,2))
            
    layer = g.conv2d(layer, filters=256, field_size=5, stride=2, padding='SAME', name="conv2", init_bias=0.0)\
                        .relu()\
                        .maxpool(kernel=(3,3), stride=(2,2))
                    
    layer = g.conv2d(layer, filters=512, field_size=3, stride=1, padding='SAME', name="conv3", init_bias=0.0)\
                        .relu()

    layer = g.conv2d(layer, filters=512, field_size=3, stride=1, padding='SAME', name="conv4", init_bias=0.0)\
                        .relu()

    layer = g.conv2d(layer, filters=512, field_size=3, stride=1, padding='SAME', name="conv5", init_bias=0.0)\
                        .relu()\
                        .maxpool(kernel=(3,3), stride=(2,2))

    return layer
    
def compute_inferenceB(in_signal, skip_softmax = False) :
    layer = g.fully_connected(in_signal, 4096, name="fc6", init_bias=0.0)\
                    .relu()\
                    .dropout(0.5 if g.is_training else 1.0)
                
    layer = g.fully_connected(layer, 4096, name="fc7", init_bias=0.0)\
                    .relu()\
                    .dropout(0.5 if g.is_training else 1.0)
            
    if (not skip_softmax) :
        layer = g.fully_connected(layer, num_classes, name="fc8", init_bias=0.0)
                      
    return layer

def compute_3d_inference(in_images, skip_softmax = False) :
    
        # Reshape to just treat as an array of images.
    in_bundle = tf.reshape(in_images.unwrap(), [-1, IMG_SIZE[1], IMG_SIZE[0], CHANNELS])

        # Compute first stage of inference (treating each image independently)
    inf = compute_inferenceA(g.wrap(in_bundle)).unwrap()

        # Get the shape of the current inference tensor (from the Conv layers)
    inf_shape = inf.get_shape().as_list()
    
        # Reshape the results again to be in buckets of images per solid
    inf = tf.reshape(inf, [-1, PART_VIEWS, inf_shape[1], inf_shape[2], inf_shape[3]])

        # Compute the maximum value across the views and reduce the tensor to that.
    reduce_inf = tf.reduce_max(inf, reduction_indices=1)
    
        # Compute second stage of inference on the max-reduced data (each across all images for a given solid)
    final_inf = compute_inferenceB(g.wrap(reduce_inf), skip_softmax)
    
    return final_inf

In [5]:
def input_loader(source_path, path_regex=['(.*)','.*','\.png$']): 
        # Create a stream that walks the data files and builds a list of filenames & labels
    files_labels = g.filename_label_stream(source_path, path_regex=path_regex, views=PART_VIEWS )

        # Apply a producer to the stream (producing another stream) for pushing data.
    producer = g.produce(files_labels, name='producer', shuffle=True, capacity=10000)

        # On the first stream dimension (filenames) apply an image stream to input images
    shapes = g.image_stream(producer[0], name='shapes_images', channels=CHANNELS, ext='png') \
                .resize(IMG_SIZE[0], IMG_SIZE[1]) \
                .image_summary()

        # Get the second stream dimension as the labels.
    labels = producer[1].summary("labels")

        # Merge labels and shapes back into a single stream.
    input_data = g.merge([labels, shapes])

        # Batch the output (pull the data in batches from the streams above) to produce a multi-tensor.
    input_data = input_data.batch(name="batch", batch_size=BATCH_SIZE, queue_capacity=100, threads=THREADS, shuffle=True)
    
    return (input_data, files_labels.label_count(), files_labels.sample_count, files_labels)

## Train the Model

In [6]:
g = autograph()
input_data, num_classes, sample_count, label_source = input_loader(DATA_PATH, path_regex=['(.*)','train','.*','\.png$'])

In [13]:
opt = g.momentum_optimizer("optimizer", momentum=0.9)\
        .learning_rate(INITIAL_LEARNING_RATE)\
        .exponential_decay(rate=LEARNING_RATE_DECAY_FACTOR, 
                       step_size=(sample_count * NUM_EPOCHS_PER_DECAY)/BATCH_SIZE, staircase=True)
        
loss = g.softmax_loss()

trainer = g.trainer(input_data[1], input_data[0], compute_3d_inference, opt, loss, gpus=GPUS)\
                .variable_summary()\
                .gradient_summary()

In [None]:
total_steps = 60000
with g.session() as s:
    s.train(trainer, total_steps, sample_count)\
            .summary(10, LOG_PATH, tracing=False)\
            .save(100, MODEL_PATH + "/m10_phong_in_cpu.ckpt")\
            .callback(10, print_status)\
            .run({})              

step 270, epoch 0, loss = 2.49 (0.1 examples/sec); 8.296 sec/batch

In [None]:
with g.session() as s:
    s.load(MODEL_PATH + "/m10_phong_in_cpu.ckpt", step=200, variables=['conv1/W','conv1/b','conv2/W','conv2/b',
                                                           'conv3/W','conv3/b','conv4/W','conv4/b',
                                                           'conv5/W','conv5/b','fc6/W','fc6/b',
                                                           'fc7/W','fc7/b'])


In [None]:
# Plot images of first batch.
s = g.session()
l, imgs = s.run(input_data.unwrap(),{})
labels = [label_source.index_to_label(i) for i in l]
for i in range(BATCH_SIZE) :
    plt.figure(num=None, figsize=(5, 5), dpi=80, facecolor='w', edgecolor='k')
    plt.title(labels[i])
    plt.imshow(imgs[i,0,:,:,0], cmap = plt.get_cmap('gray'), interpolation='nearest')
    plt.axis('off')

## Loading and Testing the Model

In [6]:
g = autograph()
input_data, num_classes, sample_count, label_source = input_loader(DATA_PATH, path_regex=['(.*)','test','.*','\.png$'])

# with tf.device('/gpu:' + str(GPUS[0])) :
with tf.device('/cpu:0'):
    inference = compute_3d_inference(input_data[1])
#     .unwrap()
#     pred = tf.cast(tf.argmax(inference, dimension=1), tf.int32)
#     equiv = tf.equal(pred, input_data[0].unwrap())
#     accuracy = tf.reduce_sum(tf.cast(equiv, 'float'))
    
s = g.session().load(MODEL_PATH + "m10_phong_in_cpu.ckpt", step=100)

In [9]:
inference.relu().unwrap()

<tf.Tensor 'fc8_2/Relu:0' shape=(1, 10) dtype=float32>

In [10]:
testing = s.run(inference.relu().unwrap(), {})

In [11]:
testing

array([[ 0.        ,  0.11914377,  0.14545166,  0.        ,  0.        ,
         0.10920067,  0.        ,  0.22027047,  0.0235798 ,  0.        ]], dtype=float32)

In [11]:
total_accuracy = 0.0
for step in range(sample_count/BATCH_SIZE) :
    acc = s.run(accuracy, {})
    total_accuracy += acc
    sys.stdout.write("\rstep = %d" % (step))
    sys.stdout.flush()
    
print "Accuracy = " + str(total_accuracy/sample_count)

908
step = 907Accuracy = 0.120044052863


## Visualize the Model

In [5]:
g = autograph()

input, num_classes, sample_count = input_data(DATA_PATH, path_regex=['(.*)','test','.*','\.png$'])

thumbnails = tf.slice(input[1].unwrap(), [0,0,0,0,0], [-1,1,-1,-1,-1])
labels = input[0].unwrap()

# with tf.device('/gpu:' + str(GPUS[0])) :
with tf.device('/cpu:0'):
    inference = compute_3d_inference(input[1], skip_softmax=True).unwrap()
    pred = tf.cast(inference,tf.float64)

In [6]:
s = g.session()\
        .load(MODEL_PATH + "/new_dg_m40_org.ckpt", step=30000)

In [None]:
x_labels=None
y=None

x_thumbnails = [None] * num_classes

for step in range(sample_count/BATCH_SIZE) :
    res=s.run([labels, pred, thumbnails], {})
    if (x_labels is None) :
        x_labels=res[0]
        y=res[1]
    else:
        x_labels=np.concatenate((x_labels,res[0]))
        y=np.concatenate((y,res[1]))
        
    for i in range(0,len(res[0])) :
        x_l = res[0][i]
        if (x_thumbnails[x_l] is None) :
            x_thumbnails[x_l] = res[2][i]
        
    sys.stdout.write("\rstep = %d" % (step))
    sys.stdout.flush()

In [8]:
color_list = plt.cm.hsv(np.linspace(0, 1, np.max(x_labels)+1)) * 255
colors = ['rgb('+str(int(c[0]))+','+str(int(c[1]))+','+str(int(c[2]))+')' for c in color_list]

In [9]:
plot_data = label_scatter(y, x_labels, dim=2, color_palette=colors)

In [None]:
layout = Layout(
    showlegend=True,
    height=600,
    width=600,
)

fig = dict( data=plot_data, layout=layout )

iplot(fig)

In [None]:
plt.figure(num=None, figsize=(100, 100), dpi=80, facecolor='w', edgecolor='k')
for i in range(0, shapes.label_count()) :
    if not (x_thumbnails[i] is None) :
        plt.subplot(shapes.label_count(),1,i+1)
        img = x_thumbnails[i].reshape([IMG_SIZE[1],IMG_SIZE[0],1])
        img = np.expand_dims(img, 2)
        img = (img/255.0) * color_list[i]
        plt.imshow(img, cmap = plt.get_cmap('gray'), interpolation='nearest')
        plt.axis('off')