# Deep Learning Image Compression Project MLP part

This code applies predictive coding algoritm with a basic MLP structure. Details of predictive coding algorithm can be found [here](https://web.stanford.edu/class/ee398a/handouts/lectures/06-Prediction.pdf)

The code has four parts

1. Huffman encoder (Coppied from [here](http://www.techrepublic.com/article/huffman-coding-in-python/))
2. Creation of prediction blocks and label for predictive coding
3. Linear regression algorithm for seeing the baseline
4. MLP algorithm (initial phase)

# Part-1: Huffman encoder


In [1]:
#Binary tree data structure
#http://www.techrepublic.com/article/huffman-coding-in-python/
class Node(object):
	left = None
	right = None
	item = None
	weight = 0

	def __init__(self, i, w):
		self.item = i
		self.weight = w

	def setChildren(self, ln, rn):
		self.left = ln
		self.right = rn

	def __repr__(self):
		return "%s - %s — %s _ %s" % (self.item, self.weight, self.left, self.right)

	def __cmp__(self, a):
		return cmp(self.weight, a.weight)

In [2]:
#Huffman Encoder
#http://www.techrepublic.com/article/huffman-coding-in-python/

from itertools import groupby
from heapq import *


#Huffman encoder  
def huffman(input):
    itemqueue =  [Node(a,len(list(b))) for a,b in groupby(sorted(input))]
    heapify(itemqueue)
    while len(itemqueue) > 1:
        l = heappop(itemqueue)
        r = heappop(itemqueue)
        n = Node(None, r.weight+l.weight)
        n.setChildren(l,r)
        heappush(itemqueue, n) 
        
    codes = {}
    def codeIt(s, node):
        if node.item:
            if not s:
                codes[node.item] = "0"
            else:
                codes[node.item] = s
        else:
            codeIt(s+"0", node.left)
            codeIt(s+"1", node.right)
    codeIt("",itemqueue[0])
    return codes, "".join([codes[a] for a in input])


In [3]:
#Test Huffman encoder with an image

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
img=mpimg.imread('lena512.bmp')
#print(img.shape)
#imgplot=plt.imshow(img,cmap='gray')

img_input=img.reshape([-1]).astype(str)
#print(img_input)
huffman_img = huffman(img_input)
#print(huffman_img[1])

#print('Huffman code for ' + str(img) + ' is ' + str(huffman_img))
#print('Original length is '+str(len(input) * 8)+', length of huffman coding is '+ str(len(huffman(input)[1])))
print('Bitrate of the original image')
print('Bits per pixel is ' + str(float(len(huffman_img[1])/float(len(img_input)))) + ' bpp')

Bitrate of the original image
Bits per pixel is 7.46820831299 bpp


# Part-2: Creation of prediction blocks and label for predictive coding


In [4]:
#Lossless image copmpression using predictive coding. For reference see below
#(https://web.stanford.edu/class/ee398a/handouts/lectures/06-Prediction.pdf)

from itertools import product


#Returns prediction blocks and the corresponding pixels in the image
#Very naive implementation, neglects boundaries, can be improved further
def pred_vectors(img,pred_size):
    (n,m)=img.shape #image size
    k,l=pred_size #Size of the predictive window
    
    fvec=np.zeros([(n-k-1)*(m-2*l),2*k*l+k+l])
    #print(fvec.shape)
    label = np.zeros([(n-k-1)*(m-2*l),1])
    for (i,j) in product(range(k,n-1), range(l,m-l)):
        #print(i,j)
        idx = (i-k)*(m-2*l)+j-l
        fvec_current =img[i-k:i,j-l:j+l+1].reshape([-1])
        fvec_current = np.append(fvec_current,img[i,j-l:j].reshape([-1]))
        fvec[idx,:]=fvec_current
        label[idx]=img[i,j]
        
    return fvec, label



fvec,label = pred_vectors(img,[3,7])

# Part-3: Linear regression algorithm for seeing the baseline


In [5]:
#First trial: Simple regression network. No relation to deep learning just to gain some intuition


from sklearn import datasets, linear_model


#Create the regression model using sklearn
regr = linear_model.LinearRegression()
regr.fit(fvec, label)

#Predict and quantize the labels
label_pred = np.round(regr.predict(fvec))

#Calculate the error
err=label_pred-label;

print('Results with linear regression')
#MSE
print('MSE is '  + str(np.mean(err**2)))

#Calculate Huffman coding of the error
huffman_err = huffman(err.reshape([-1]).astype(str))
print('Bits per pixel is ' + str(float(len(huffman_err[1])/float(len(err)))) + ' bpp')


Results with linear regression
MSE is 33.4797180849
Bits per pixel is 4.43469547481 bpp


# Part-4: MLP algorithm (initial phase)

In [None]:
#Second trial: MLP

import tensorflow as tf

def mlp(x, hidden_sizes, activation_fn=tf.nn.relu,dropout_rate=1.0,std_dev=1.0):
    if not isinstance(hidden_sizes, (list, tuple)):
        raise ValueError("hidden_sizes must be a list or a tuple")
    scope_args = {'initializer': tf.random_normal_initializer(stddev=std_dev)}
    for k in range(len(hidden_sizes)-1):
        layer_name="weights"+str(k)
        #FC layers
        with tf.variable_scope(layer_name, **scope_args):
            W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[k]])
            #b = tf.get_variable('b', shape=[hidden_sizes[k]])
            x = activation_fn(tf.matmul(x, W))# + b)
            #Dropout before the last layer
            x = tf.nn.dropout(x, keep_prob=dropout_rate)
    #Softmax layer
    with tf.variable_scope('outlayer', **scope_args):
        W = tf.get_variable('W', shape=[x.shape[-1], hidden_sizes[-1]])
        #b = tf.get_variable('b', shape=[hidden_sizes[-1]])
        return tf.matmul(x, W)# + b
    


In [None]:
#Normalize the vectors and labels
#Sometimes does not work beacuse of wron initialization

fvec_n=fvec/np.round(np.max(label))
label_n = label/np.round(np.max(label))
def test_classification(model_function, learning_rate=0.1):

    with tf.Graph().as_default() as g:
        # where are you going to allocate memory and perform computations
        with tf.device("/gpu:0"):
            
            # define model "input placeholders", i.e. variables that are
            # going to be substituted with input data on train/test time
            x_ = tf.placeholder(tf.float32, [None, fvec_n.shape[1]])
            y_ = tf.placeholder(tf.float32, [None, 1])
            y_logits = model_function(x_)
            
            # naive implementation of loss:
            # > losses = y_ * tf.log(tf.nn.softmax(y_logits))
            # > tf.reduce_mean(-tf.reduce_sum(losses, 1))
            # can be numerically unstable.
            #
            # so here we use tf.nn.softmax_cross_entropy_with_logits on the raw
            # outputs of 'y', and then average across the batch.
            
            #Basic MSE loss
            loss = tf.reduce_mean(tf.pow(tf.subtract(y_,y_logits), 2.0))
            #loss = tf.reduce_mean(tf.abs(tf.subtract(y_,y_logits)))
            #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
            train_step = tf.train.AdamOptimizer(learning_rate=5e-3,beta1=0.3,beta2=0.999, 
                                                epsilon=1e-08,use_locking=False).minimize(loss)
           
            y_pred = y_logits
            correct_prediction = tf.equal(y_pred, y_)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with g.as_default(), tf.Session() as sess:
        # that is how we "execute" statements 
        # (return None, e.g. init() or train_op())
        # or compute parts of graph defined above (loss, output, etc.)
        # given certain input (x_, y_)
        sess.run(tf.initialize_all_variables())
        #sess.run(tf.global_variables_initializer())
        
        # train
        #print(label.shape[0])
        ids=[i for i in range(100)]
        for iter_i in range(50001):
            #print(label.shape[0])
            #print(2*my_range)
            batch_xs = fvec_n[ids,:] 
            batch_ys = label_n[ids]
            ids=[(ids[0]+100+i)%label.shape[0] for i in range(100)]
            sess.run(train_step, feed_dict={x_: batch_xs, y_: batch_ys})
            
            # test trained model
            if iter_i % 2000 == 0:
                tf_feed_dict = {x_: fvec_n, y_: label_n}
                acc_value = sess.run(loss, feed_dict=tf_feed_dict)
                y_pred_val = sess.run(y_pred, feed_dict=tf_feed_dict)
                err_value = np.round((sess.run(y_pred, feed_dict=tf_feed_dict)-label_n)*255)
                huffman_err = huffman(err_value.reshape([-1]).astype(str))
                huffman_bpp = float(len(huffman_err[1])/float(len(err_value)))
                print('iteration %d\t mse loss: %.5f\t Huffman bitrate is %.3f'%(iter_i, acc_value, huffman_bpp))
        err_value =  np.round((sess.run(y_pred, feed_dict=tf_feed_dict)-label_n)*255)
        print(err_value)
                
test_classification(lambda x: mlp(x, [32,16,8,4,2,1], activation_fn=tf.nn.relu,std_dev=1e-1), learning_rate=0.1)

Instructions for updating:
Use `tf.global_variables_initializer` instead.
iteration 0	 mse loss: 0.29553	 Huffman bitrate is 7.474
iteration 2000	 mse loss: 0.00106	 Huffman bitrate is 4.802
iteration 4000	 mse loss: 0.00101	 Huffman bitrate is 4.725
iteration 6000	 mse loss: 0.00085	 Huffman bitrate is 4.723
iteration 8000	 mse loss: 0.00109	 Huffman bitrate is 4.704
iteration 10000	 mse loss: 0.00072	 Huffman bitrate is 4.619
iteration 12000	 mse loss: 0.00069	 Huffman bitrate is 4.620
