In [1]:
import nbimporter
import locality_sensitive_hashing as lsh
import min_hashing as mh
import shingling as sh
import pre_process as pp
import euclidean_projection as ep
import cosine_projection as cp
import pickle
import numpy as np
import sys
import cv2 as cv
import os, os.path

Importing Jupyter notebook from locality_sensitive_hashing.ipynb
Importing Jupyter notebook from min_hashing.ipynb
Importing Jupyter notebook from prime.ipynb
Importing Jupyter notebook from shingling.ipynb


Using TensorFlow backend.


Importing Jupyter notebook from pre_process.ipynb
Importing Jupyter notebook from euclidean_projection.ipynb
Importing Jupyter notebook from cosine_projection.ipynb


In [2]:
############################################################################################################################
# 1. SELECT YOUR SHINGLE TYPE
############################################################################################################################

In [3]:
# BINARY SOBEL SHINGLES
b_sobel_shingles, bs_labels, bs_tot = pp.get_cached_shingles("Sobel", 3)
b_sobel_shingles.shape

Sobel\clock_sobel.pickle
Sobel\dog_sobel.pickle
Sobel\firetruck_sobel.pickle


(4124, 50176)

In [4]:
# BINARY CNN FEATURES (block5_conv3)
b_cnn_shingles, bc_labels, bc_tot = pp.get_cached_shingles("CNN", 3)
b_cnn_shingles.shape

CNN\dog_cnn.pickle
CNN\orange_cnn.pickle
CNN\snake_cnn.pickle


(4298, 25088)

In [5]:
# CNN FEATURES (prediction layer, real values)
cnn_shingles, c_labels, c_tot = pp.get_cached_shingles("CNN_2", 3)
cnn_shingles.shape

CNN_2\church_cnn_2.pickle
CNN_2\palmtree_cnn_2.pickle
CNN_2\spider_cnn_2.pickle


(4347, 1000)

In [6]:
############################################################################################################################
# 2. SELECT YOUR ALGORITHM (JACCARD-COSINE-EUCLIDEAN)
############################################################################################################################

In [7]:
############################################################################################################################
# 2.1. JACCARD LSH
############################################################################################################################

In [8]:
# Select one to use in algorithms 
shingles = b_cnn_shingles
labels = bc_labels
totals = bc_tot

In [9]:
# MIN-HASHING (input shingles as feature_count x image_count)
min_hash = mh.MinHashing(shingles.T, 250, 100)
signature = min_hash.generate_signature()
signature.shape

(120, 4298)

In [10]:
# LOCALITY SENSITIVE HASHING (input signature as image_count x signature_size)
lsh_util = lsh.LocalitySensitiveHashing(50, 5, np.transpose(signature))
candidate_pairs = lsh_util.generate_candidates()
print("Number of candidate pairs found: " + str(len(candidate_pairs)))

Number of candidate pairs found: 283583


In [11]:
# PERFORMANCE
print("JACCARD LSH PERFORMANCE")
tp = 0
fp = 0

total_correct_pairs = np.sum(totals)
total_pairs = np.shape(shingles)[0] * (np.shape(shingles)[0]-1) / 2

for i in candidate_pairs:
    if labels[i[0]] == labels[i[1]]:
        tp +=1
    else:
        fp +=1

fn = int(total_correct_pairs - tp)
tn = int(total_pairs - total_correct_pairs - fp)

print("TP: " + str(tp))
print("FP: " + str(fp))
print("TN: " + str(tn))
print("FN: " + str(fn))

print("Precision: " + str(tp/(tp+fp)))
print("Recall: " + str(tp/(tp+fn)))

print("NPV: " + str(tn / (tn + fn)))
print("FPR: " + str(fp / (fp + tn)))
print("FDR: " + str(fp / (tp + fp)))
print("F1: " + str((2 * tp / (tp + fp)) / (tp / (tp + fp) + tp / (tp + fn))))
print("F2: " + str((5 * tp / (tp + fp)) / (4 * tp / (tp + fp) + tp / (tp + fn))))
print("F3: " + str((10 * tp / (tp + fp)) / (9 * tp / (tp + fp) + tp / (tp + fn))))

JACCARD LSH PERFORMANCE
TP: 126065
FP: 157518
TN: 5930722
FN: 3019948
Precision: 0.4445435727811611
Recall: 0.040071353805594574
NPV: 0.6626009002678012
FPR: 0.025872501741061458
FDR: 0.5554564272188389
F1: 1.8346260025962242
F2: 1.2224519113263626
F3: 1.100093014473192


In [12]:
############################################################################################################################
# 2.2. COSINE LSH 
############################################################################################################################

In [13]:
# Select one to use in algorithms 
shingles = cnn_shingles
labels = c_labels
totals = c_tot

In [14]:
# COSINE PROJECTION
cosine_shingles = cp.cosineValues(shingles, 1000)
cosine_shingles.shape

(4347, 1000)

In [15]:
# MIN-HASHING (input shingles as feature_count x image_count)
min_hash = mh.MinHashing(cosine_shingles.T, 120, 100)
signature = min_hash.generate_signature()
signature.shape

(120, 4347)

In [16]:
# LOCALITY SENSITIVE HASHING (input signature as image_count x signature_size)
lsh_util = lsh.LocalitySensitiveHashing(12, 10, np.transpose(signature))
candidate_pairs = lsh_util.generate_candidates()
print("Number of candidate pairs found: " + str(len(candidate_pairs)))

Number of candidate pairs found: 859331


In [17]:
# PERFORMANCE
print("COSINE LSH PERFORMANCE")
tp = 0
fp = 0

total_correct_pairs = np.sum(totals)
total_pairs = np.shape(shingles)[0] * (np.shape(shingles)[0]-1) / 2

for i in candidate_pairs:
    if labels[i[0]] == labels[i[1]]:
        tp +=1
    else:
        fp +=1

fn = int(total_correct_pairs - tp)
tn = int(total_pairs - total_correct_pairs - fp)

print("TP: " + str(tp))
print("FP: " + str(fp))
print("TN: " + str(tn))
print("FN: " + str(fn))

print("Precision: " + str(tp/(tp+fp)))
print("Recall: " + str(tp/(tp+fn)))

print("NPV: " + str(tn / (tn + fn)))
print("FPR: " + str(fp / (fp + tn)))
print("FDR: " + str(fp / (tp + fp)))
print("F1: " + str((2 * tp / (tp + fp)) / (tp / (tp + fp) + tp / (tp + fn))))
print("F2: " + str((5 * tp / (tp + fp)) / (4 * tp / (tp + fp) + tp / (tp + fn))))
print("F3: " + str((10 * tp / (tp + fp)) / (9 * tp / (tp + fp) + tp / (tp + fn))))

COSINE LSH PERFORMANCE
TP: 741874
FP: 117457
TN: 6161330
FN: 2425370
Precision: 0.8633157654035523
Recall: 0.2342332955717968
NPV: 0.7175434101575693
FPR: 0.018706957251456373
FDR: 0.1366842345964477
F1: 1.5731702501505622
F2: 1.170598804418025
F3: 1.0785952724523709


In [None]:
############################################################################################################################
# 2.3. EUCLIDEAN LSH 
############################################################################################################################

In [None]:
# Select one to use in algorithms 
shingles = cnn_shingles
labels = c_labels
totals = c_tot

In [None]:
# EUCLIDEAN LOCALITY SENSITIVE HASHING
ht = ep.find_projection(shingles, signature_size=50, bucket_count=500)
candidate_pairs = ep.generate_candidates(ht)

In [None]:
# PERFORMANCE
tp = 0
fp = 0

total_correct_pairs = np.sum(totals)
total_pairs = np.shape(shingles)[0] * (np.shape(shingles)[0]-1) / 2

for i in candidate_pairs:
    if labels[i[0]] == labels[i[1]]:
        tp +=1
    else:
        fp +=1

fn = int(total_correct_pairs - tp)
tn = int(total_pairs - total_correct_pairs - fp)

print("TP: " + str(tp))
print("FP: " + str(fp))
print("TN: " + str(tn))
print("FN: " + str(fn))

print("Precision: " + str(tp/(tp+fp)))
print("Recall: " + str(tp/(tp+fn)))

print("NPV: " + str(tn / (tn + fn)))
print("FPR: " + str(fp / (fp + tn)))
print("FDR: " + str(fp / (tp + fp)))
print("F1: " + str((2 * tp / (tp + fp)) / (tp / (tp + fp) + tp / (tp + fn))))
print("F2: " + str((5 * tp / (tp + fp)) / (4 * tp / (tp + fp) + tp / (tp + fn))))
print("F3: " + str((10 * tp / (tp + fp)) / (9 * tp / (tp + fp) + tp / (tp + fn))))