In [1]:
import numpy as np
import os
import sys


In [2]:
sys.path.append('/home/caffe-user/caffe/python/')


In [3]:
import caffe


In [4]:
caffe.set_mode_gpu()


In [5]:
import matplotlib.image as img
import matplotlib.pyplot as plt
%matplotlib inline


In [6]:
from scipy import signal

In [7]:
net = caffe.Net('/home/caffe-user/caffe/models/bvlc_reference_caffenet/deploy.prototxt', '/home/caffe-user/caffe/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel', caffe.TEST)


In [8]:
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})


In [9]:
# This is from our current code which uses ILSVRC_2012 image set for training
# The PlacesNet image set is different
trans_model = np.load('/home/caffe-user/caffe/python/caffe/imagenet/ilsvrc_2012_mean.npy')


In [10]:
# the mean of any dataset composed of natural images should be close to [104, 117, 123].
# My guess for greyscale is 115 ... 
trans_model.mean(1).mean(1)



array([ 104.00698793,  116.66876762,  122.67891434])

In [11]:
transformer.set_transpose('data', (2, 0, 1))
# Values from BlobProto notebook from places205CNN_mean.binaryproto
transformer.set_mean('data', trans_model.mean(1).mean(1))
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2, 1, 0))


In [12]:
def myTestFiles():
    file_list = []
    with open('/downloads/kenglish/data/protest_concert/files.lst', 'r') as f:
        for line in f:
            fpath = line.rstrip()
            file_list.append(fpath)
    return file_list

In [13]:
# NOTE:  I had to delete corrupted files from the file list
files = myTestFiles()
net.blobs['data'].reshape(len(files), 3, 227, 227)
i = 0
for fpath in files:
    net.blobs['data'].data[i] = transformer.preprocess('data', caffe.io.load_image(fpath))
    i += 1


In [14]:
net.forward()

{'prob': array([[  2.96074109e-06,   1.05774234e-05,   1.10066867e-05, ...,
           1.86541479e-06,   9.31652976e-05,   1.10132733e-05],
        [  2.31958765e-07,   5.96108111e-06,   8.71833799e-06, ...,
           2.25066628e-06,   4.03781160e-05,   3.67437224e-06],
        [  1.48275910e-06,   1.07829874e-04,   4.29514557e-06, ...,
           1.02066115e-05,   3.94420727e-04,   5.48260141e-05],
        ..., 
        [  7.33461434e-08,   2.29140915e-06,   8.76468278e-07, ...,
           3.32213006e-07,   2.56489602e-05,   6.61659487e-06],
        [  9.34105415e-08,   5.09251151e-07,   6.98679372e-08, ...,
           4.79063829e-08,   5.71175178e-06,   7.25702932e-07],
        [  2.46839761e-07,   1.81030136e-05,   2.67839846e-06, ...,
           3.18771924e-07,   2.82288529e-04,   1.87977330e-05]], dtype=float32)}

In [15]:
net.blobs.keys()

['data',
 'conv1',
 'pool1',
 'norm1',
 'conv2',
 'pool2',
 'norm2',
 'conv3',
 'conv4',
 'conv5',
 'pool5',
 'fc6',
 'fc7',
 'fc8',
 'prob']

In [16]:
net.blobs['norm2'].data[0].shape

(256, 13, 13)

In [17]:
min(net.blobs['norm2'].data[0].flatten()) , max(net.blobs['norm2'].data[0].flatten())

(0.0, 137.5351)

In [18]:
np.dot(net.blobs['norm2'].data[0].flatten()/256.0,net.blobs['norm2'].data[1].flatten()/256.0)

303.16623

In [19]:
feat = [ net.blobs['conv3'].data[i] for i in range(len(net.blobs['data'].data))]

In [20]:
def dist(V, W, scale):
    D = (W.flatten() - V.flatten()) / scale
    return np.dot(D, D)/np.prod(V.shape)

In [21]:
dist(net.blobs['norm2'].data[0][0], net.blobs['norm2'].data[1][0], 255.0)

0.0083628970490404846

In [22]:
N_images = len(net.blobs['norm2'].data)
all_distances = []
for i in range(N_images):
    for ii in range(i+1, N_images):
        for j in range(256):
            d = dist(net.blobs['norm2'].data[i][j], net.blobs['norm2'].data[ii][j], 255.0)
            all_distances.append((d, i, ii, j))
all_distances.sort()


In [23]:
len(all_distances)

4942336

In [24]:
# TOP of match list:
# There are a number of exact matches between a given feature identified in pairs of images
def exactMatchCount():
    n = 0
    for tup in all_distances:
        if tup[0] < 0.000000001:
            n = n + 1
        else:
            # (all_distances is sorted)
            break
    return n
exactMatchCount()

19636

In [25]:
# MIDDLE of match list:
middle_index = len(all_distances)/2
all_distances[middle_index-5:middle_index+5]

[(0.013693063216801931, 46, 164, 20),
 (0.013693064627562755, 96, 172, 36),
 (0.013693077324410162, 104, 191, 95),
 (0.013693080145931808, 22, 130, 198),
 (0.013693085788975101, 137, 183, 189),
 (0.013693098485822508, 53, 124, 27),
 (0.013693099896583332, 89, 121, 24),
 (0.013693099896583332, 89, 138, 24),
 (0.013693102718104977, 0, 89, 212),
 (0.013693104128865801, 34, 121, 148)]

In [26]:
# BOTTOM of match list:
# We see that the way we computed "distance" doesn't give the most intuitive numerical results ...
# "0.138" is FAR FAR Away and as different as two features can get in this computation
all_distances[-10:]

[(0.14109437705497063, 103, 149, 255),
 (0.1411789662739229, 103, 156, 255),
 (0.14199636109481903, 103, 174, 255),
 (0.14308360624595506, 103, 152, 255),
 (0.14401124356061043, 47, 62, 90),
 (0.14488600555961653, 36, 47, 90),
 (0.14578444576827732, 48, 62, 90),
 (0.14594951607066498, 36, 48, 90),
 (0.15081880783893653, 25, 62, 90),
 (0.15156951317420372, 25, 36, 90)]

In [27]:
# pulls the top ==num== pairs from the sorted all_distances array.
def topPairs(num):
    pair_dict = {}
    for i in range(num):
        pair = (all_distances[i][1], all_distances[i][2])
        if pair not in pair_dict:
            pair_dict[pair] = 0
        pair_dict[pair] = pair_dict[pair] + 1
    pair_list = []
    for pair in pair_dict:
        pair_list.append( (pair_dict[pair], pair))
    pair_list.sort(reverse=True)
    return pair_list

In [28]:
topPairs(len(all_distances)/8)

[(256, (145, 147)),
 (256, (126, 131)),
 (256, (122, 142)),
 (256, (121, 138)),
 (256, (116, 148)),
 (256, (113, 128)),
 (256, (106, 120)),
 (256, (102, 143)),
 (256, (101, 148)),
 (256, (101, 116)),
 (256, (99, 125)),
 (256, (31, 33)),
 (242, (125, 137)),
 (242, (99, 137)),
 (109, (7, 45)),
 (105, (7, 24)),
 (104, (7, 35)),
 (102, (7, 28)),
 (101, (125, 147)),
 (101, (125, 145)),
 (101, (114, 146)),
 (101, (99, 147)),
 (101, (99, 145)),
 (100, (35, 45)),
 (99, (24, 35)),
 (98, (137, 147)),
 (98, (137, 145)),
 (98, (125, 134)),
 (98, (108, 134)),
 (98, (99, 134)),
 (98, (7, 22)),
 (97, (28, 45)),
 (97, (28, 35)),
 (96, (24, 45)),
 (96, (7, 33)),
 (96, (7, 31)),
 (94, (134, 137)),
 (94, (45, 156)),
 (94, (22, 35)),
 (93, (108, 125)),
 (93, (99, 108)),
 (92, (137, 146)),
 (92, (35, 156)),
 (92, (22, 24)),
 (91, (22, 45)),
 (91, (7, 156)),
 (89, (125, 146)),
 (89, (108, 137)),
 (89, (99, 146)),
 (89, (24, 28)),
 (88, (146, 147)),
 (88, (145, 146)),
 (88, (114, 147)),
 (88, (114, 145)),
 (

In [29]:
def getTag(imageNum):
    tag = ""
    if imageNum < 51:
        tag = "day_concert"
    elif imageNum < 100:
        tag = "day_protest"
    elif imageNum < 150:
        tag = "night_concert"
    else:
        tag = "night_protest"
    return tag


In [34]:
with open('/downloads/kenglish/data/BLVC-Reference-closest_images_by_norm2_distance.tab', 'w') as f:
    for tup in topPairs(len(all_distances)/8):
        t0 = getTag(tup[1][0])
        t1 = getTag(tup[1][1])
        x = 1
        if t0 != t1:
            x = 0
        print >> f, '\t'.join(map(str, [tup[1][0], tup[1][1], tup[0], t0, t1, x ]))

In [31]:
len(files)

197

In [32]:
197*196/2

19306

In [33]:
# find how many pairs 'match' for a given match count score
def pairCountsByCountBySortedFraction(fraction):
    pair_counts_by_count = []
    top_pair_fraction_by_distance = topPairs(int(fraction * len(all_distances)))
    for i in range(256):
        min_score = 256 - i
        count = 0
        for pair in top_pair_fraction_by_distance:
            if pair[0] < min_score:
                break
            count = count + 1
        pair_counts_by_count.append((min_score, count))
    return pair_counts_by_count

pairCountsByCountBySortedFraction(1.0/8.0)

[(256, 12),
 (255, 12),
 (254, 12),
 (253, 12),
 (252, 12),
 (251, 12),
 (250, 12),
 (249, 12),
 (248, 12),
 (247, 12),
 (246, 12),
 (245, 12),
 (244, 12),
 (243, 12),
 (242, 14),
 (241, 14),
 (240, 14),
 (239, 14),
 (238, 14),
 (237, 14),
 (236, 14),
 (235, 14),
 (234, 14),
 (233, 14),
 (232, 14),
 (231, 14),
 (230, 14),
 (229, 14),
 (228, 14),
 (227, 14),
 (226, 14),
 (225, 14),
 (224, 14),
 (223, 14),
 (222, 14),
 (221, 14),
 (220, 14),
 (219, 14),
 (218, 14),
 (217, 14),
 (216, 14),
 (215, 14),
 (214, 14),
 (213, 14),
 (212, 14),
 (211, 14),
 (210, 14),
 (209, 14),
 (208, 14),
 (207, 14),
 (206, 14),
 (205, 14),
 (204, 14),
 (203, 14),
 (202, 14),
 (201, 14),
 (200, 14),
 (199, 14),
 (198, 14),
 (197, 14),
 (196, 14),
 (195, 14),
 (194, 14),
 (193, 14),
 (192, 14),
 (191, 14),
 (190, 14),
 (189, 14),
 (188, 14),
 (187, 14),
 (186, 14),
 (185, 14),
 (184, 14),
 (183, 14),
 (182, 14),
 (181, 14),
 (180, 14),
 (179, 14),
 (178, 14),
 (177, 14),
 (176, 14),
 (175, 14),
 (174, 14),
 (17