# Extracting Layer Weights

This ipython notebook can be used to generate cluster of CNN layer weights from a set of images, and cluster those images based on the layer weights. Used to generate a sample image cluster for the image clustering UI. 

## Requirements
* Environment variables for CAFFE_HOME set to the location of the CAFFE installation for NVIDIA's fork of the caffe framework
* Environment variable for DIGITS_HOME set to the location of the NVIDIA Digits installation
* Existing model for doing the feature detection
* sklearn version > 0.18 

In [None]:
import os
# get the location of your caffe and digits
caffe_home = os.environ['CAFFE_HOME']
digits_home = os.environ['DIGITS_HOME']

# set the model directory and the mean file
model_dir = "/data/digits/jobs/20150910-211807-d169/"
mean_file = "/data/digits/jobs/20150910-150610-9373/mean.binaryproto"
test_image_file = r"/data/1020/data/military uniform/937.jpg"

# import some digits webserver code
import sys
sys.path.append(caffe_home +"/python/")
sys.path.append(digits_home)
sys.path.append(digits_home + 'digits')

import digits
from digits import config
config.load_config()
import utils
from job import Job


In [None]:
import caffe
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from caffe.proto import caffe_pb2
import sklearn 
import glob
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import shutil

In [2]:
#configure plotting
plt.rcParams['figure.figsize'] = (10,10)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

In [3]:
caffe.set_mode_cpu()

In [4]:
# Create the CNN object using the caffemodel file for model parameters 
# and the prototxt for defining the model architecture
netdeploymet_proto = model_dir + "deploy.prototxt"
latest_model_iteration = model_dir + "snapshot_iter_505860.caffemodel"

net = caffe.Net(netdeploymet_proto,
               latest_model_iteration,
               caffe.TEST)

In [5]:
#im = caffe.io.load_image("/home/sander/Documents/Pixelate/corgi_test.png")
im = caffe.io.load_image(test_image_file)


In [None]:
def getlayerweightsWeb(imfname, net=net,\
                  meanfname="/data_ebs/digits/jobs/20150910-150610-9373/mean.binaryproto",
                 layer='fc7'):
    
    

    network = caffe_pb2.NetParameter()
    deploy_file="/data_ebs/digits/jobs/20150910-211807-d169/deploy.prototxt"
    infile = open(deploy_file)
    text_format.Merge(infile.read(), network)

    imtest = imfname
    image = utils.image.load_image(imtest)
    # need programmatic access
    dataset = Job.load("20150910-150610-9373")
    # get transformer
    data_shape = None
    channel_swap = None
    mean_pixel = None
    data_shape = network.input_dim[:4]

    channel_swap = (2,1,0)
    infile = open(meanfname,'rb')
    blob = caffe_pb2.BlobProto()
    blob.MergeFromString(infile.read())
    mean_pixel = np.reshape(blob.data,\
                                (\
                                    dataset.image_dims[2],\
                                    dataset.image_dims[0],\
                                    dataset.image_dims[1],\
                                    )\
                                ).mean(1).mean(1)


    t = caffe.io.Transformer(
                    inputs = {'data':  data_shape}
                    )
    t.set_transpose('data', (2,0,1))
    t.set_channel_swap('data', channel_swap)
    t.set_mean('data', mean_pixel)
    image = np.array(image)
    preprocessed = None
    try:
        preprocessed = t.preprocess('data',image)
    except RuntimeError as e:
        print e
        return None
        
    test_shape = (1,) + preprocessed.shape
    if net.blobs['data'].data.shape != test_shape:
                net.blobs['data'].reshape(*test_shape)

    net.blobs['data'].data[...] = preprocessed
    output = net.forward(end=layer)
    
    return output[layer]

In [None]:
# get a list of files from
catlist = glob.glob("/data_ebs/1020/data/*")
imcatlist = []
for cat in catlist:
    imcatlist.extend(glob.glob(cat+"/*")[:100])

In [15]:
imfilelist = glob.glob("/data/twitterpicsdata/*jpg")


In [16]:
catlist = glob.glob("/data/1020/data/*")
imcatlist = []
for cat in catlist:
    imcatlist.extend(glob.glob(cat+"/*")[:10])

print imcatlist[:30]

['/data/1020/data/baseball/316.jpg', '/data/1020/data/baseball/525.jpg', '/data/1020/data/baseball/393.jpg', '/data/1020/data/baseball/618.jpg', '/data/1020/data/baseball/181.jpg', '/data/1020/data/baseball/783.jpg', '/data/1020/data/baseball/900.jpg', '/data/1020/data/baseball/169.jpg', '/data/1020/data/baseball/119.jpg', '/data/1020/data/baseball/107.jpg', '/data/1020/data/fire engine/316.jpg', '/data/1020/data/fire engine/525.jpg', '/data/1020/data/fire engine/393.jpg', '/data/1020/data/fire engine/1236.jpg', '/data/1020/data/fire engine/618.jpg', '/data/1020/data/fire engine/975.jpg', '/data/1020/data/fire engine/1030.jpg', '/data/1020/data/fire engine/682.jpg', '/data/1020/data/fire engine/783.jpg', '/data/1020/data/fire engine/900.jpg', '/data/1020/data/patas/316.jpg', '/data/1020/data/patas/525.jpg', '/data/1020/data/patas/393.jpg', '/data/1020/data/patas/618.jpg', '/data/1020/data/patas/181.jpg', '/data/1020/data/patas/682.jpg', '/data/1020/data/patas/783.jpg', '/data/1020/data

In [19]:
imfilelist.extend(imcatlist)

In [23]:
imweights = np.zeros([len(imfilelist),4096],dtype=float)


In [None]:
for i, im in enumerate(imfilelist[965:]): 
    imweights[i+965,:] = get7thweights(im)

In [None]:
for i, im in enumerate(imfilelist[:1000]): 
    imweights[i+965,:] = get7thweights(im)

In [None]:
imweights[503,:]

In [None]:
imweights[965,:]

In [None]:
np.save("imweights.npy",imweights)

In [6]:
imweights = np.load("imweights.npy")

In [None]:
model = TSNE(n_components=2, perplexity=5)

In [8]:
model2 = TSNE(n_components=3)

In [None]:
trweights3d = model2.fit_transform(imweights)

In [9]:
trweights3d = model2.fit_transform(imweights[:1000,:])

In [21]:
len(imfilelist)

10172

In [None]:
trweights = model.fit_transform(imweights)

In [None]:
f = open("/home/sander/twitterpicsdata.txt","w")

In [None]:
f2 = open("/home/sander/imdata2.txt","w")

In [None]:
f.write("filename\tx\ty\n")
for i in range(len(imfilelist[:54])): 
    f.write("%s\t%3.1f\t%3.1f\n" % (imfilelist[i].split("/")[-1], trweights[i,0], trweights[i,1] ))

In [None]:
f2.write("filename\tx\ty\n")
for i in range(len(imfilelist)): 
    f2.write("%s\t%3.1f\t%3.1f\n" % (imfilelist[i], trweights[i,0], trweights[i,1] ))

In [None]:
f.close()
f2.close

In [None]:
f2.close()

In [None]:
import shutil

In [None]:
namemap = {}

In [23]:
namemap = {j : i for i, j in enumerate(set([f.split("/")[-2] for f in imfilelist]))}

In [33]:
for f in imfilelist[:1000]:
    ext = f.split("/")[-1].split(".")[-1]
    fname = f.split("/")[-1].split(".")[0]
    subdir = f.split("/")[-2]
    shutil.copyfile(f, "/data/staging/forJason/%s_%i.%s" % (fname, namemap[subdir],ext))

In [None]:
outfile = open("/home/sander/imdata3.txt","w")
for i, f in enumerate(imfilelist):
    ext = f.split("/")[-1].split(".")[-1]
    fname = f.split("/")[-1].split(".")[0]
    subdir = f.split("/")[-2]
    newfile = "%s_%i.%s" % (fname, namemap[subdir],ext)
    outfile.write("%s\t%3.5f\t%3.5f\n" % (newfile, trweights[i,0], trweights[i,1] ))
outfile.close()
    

In [30]:
outfile = open("/home/sander/imdata3d.txt","w")
for i, f in enumerate(imfilelist[:1000]):
    ext = f.split("/")[-1].split(".")[-1]
    fname = f.split("/")[-1].split(".")[0]
    subdir = f.split("/")[-2]
    newfile = "%s_%i.%s" % (fname, namemap[subdir],ext)
    outfile.write("%s\t%3.5f\t%3.5f\t%3.5f\n" % (newfile, trweights3d[i,0], trweights3d[i,1], trweights3d[i,2] ))
outfile.close()
    

In [31]:
import shutil

In [32]:
for i, f in enumerate(imfilelist[:1000]):
    ext = f.split("/")[-1].split(".")[-1]
    fname = f.split("/")[-1].split(".")[0]
    subdir = f.split("/")[-2]
    newfile = "%s_%i.%s" % (fname, namemap[subdir],ext)
    shutil.copyfile("//data//staging//%s" % newfile, "//data//staging//forJason//%s"% newfile)


IOError: [Errno 2] No such file or directory: '//data//staging//393_362.jpg'