# Features

## imports

In [None]:
%matplotlib inline

In [None]:
# ipython
import IPython
import IPython.display as ipydisp

# numpy
import numpy as np

# matplotlib
import matplotlib
matplotlib.use('gtkagg')
import matplotlib.pyplot as plt

# skimage
import skimage
import skimage.io
from skimage import data
from skimage import transform as tf
from skimage.feature import (match_descriptors, corner_harris, corner_peaks, ORB, plot_matches)
from skimage.color import rgb2gray

# scipy
# .spatial.distance.euclidean
import scipy

# seaborn
import seaborn as sns

plt.style.use('seaborn-notebook')
#sns.set_style("whitegrid")
#sns.set_style("ticks")
sns.set_style("dark")

plt.rc('font', family='Noto Sans Korean')
#plt.rc("savefig", dpi=200)

import os

## ORB

### dimenality reduction

In [None]:
import scipy.cluster.vq

def reduce_dimension(obs, k):
    newobs=scipy.cluster.vq.whiten(obs)
    
    reducedobs, label=scipy.cluster.vq.kmeans2(newobs, 10)
    #reducedobs, label=scipy.cluster.vq.kmeans(newobs, 10)
    
    return reducedobs, label

### match function with distance

In [None]:
import numpy as np
from scipy.spatial.distance import cdist


def match_descriptors2(descriptors1, descriptors2, metric=None, p=2,
                      max_distance=np.inf, cross_check=True):
    """Brute-force matching of descriptors.
    For each descriptor in the first set this matcher finds the closest
    descriptor in the second set (and vice-versa in the case of enabled
    cross-checking).
    Parameters
    ----------
    descriptors1 : (M, P) array
        Binary descriptors of size P about M keypoints in the first image.
    descriptors2 : (N, P) array
        Binary descriptors of size P about N keypoints in the second image.
    metric : {'euclidean', 'cityblock', 'minkowski', 'hamming', ...}
        The metric to compute the distance between two descriptors. See
        `scipy.spatial.distance.cdist` for all possible types. The hamming
        distance should be used for binary descriptors. By default the L2-norm
        is used for all descriptors of dtype float or double and the Hamming
        distance is used for binary descriptors automatically.
    p : int
        The p-norm to apply for ``metric='minkowski'``.
    max_distance : float
        Maximum allowed distance between descriptors of two keypoints
        in separate images to be regarded as a match.
    cross_check : bool
        If True, the matched keypoints are returned after cross checking i.e. a
        matched pair (keypoint1, keypoint2) is returned if keypoint2 is the
        best match for keypoint1 in second image and keypoint1 is the best
        match for keypoint2 in first image.
    Returns
    -------
    matches : (Q, 2) array
        Indices of corresponding matches in first and second set of
        descriptors, where ``matches[:, 0]`` denote the indices in the first
        and ``matches[:, 1]`` the indices in the second set of descriptors.
    """

    if descriptors1.shape[1] != descriptors2.shape[1]:
        raise ValueError("Descriptor length must equal.")

    if metric is None:
        if np.issubdtype(descriptors1.dtype, np.bool):
            metric = 'hamming'
        else:
            metric = 'euclidean'

    distances = cdist(descriptors1, descriptors2, metric=metric, p=p)

    indices1 = np.arange(descriptors1.shape[0])
    indices2 = np.argmin(distances, axis=1)

    if cross_check:
        matches1 = np.argmin(distances, axis=0)
        mask = indices1 == matches1[indices2]
        indices1 = indices1[mask]
        indices2 = indices2[mask]

    matches = np.column_stack((indices1, indices2))

    if max_distance < np.inf:
        matches = matches[distances[indices1, indices2] < max_distance]
        
    matched_distances = distances[matches[:,0], matches[:,1]]
        
    return matches, matched_distances

### match images and show distance distribution

In [None]:
def match_image(img1, img2, filename1, filename2, show_result=True):

    nkp=128

    descriptor_extractor = ORB(n_keypoints=nkp)

    descriptor_extractor.detect_and_extract(img1)
    keypoints1 = descriptor_extractor.keypoints
    descriptors1 = descriptor_extractor.descriptors

    descriptor_extractor.detect_and_extract(img2)
    keypoints2 = descriptor_extractor.keypoints
    descriptors2 = descriptor_extractor.descriptors

    matches, distances = match_descriptors2(descriptors1, descriptors2, cross_check=True)
    
    if show_result:
        show_matched_result(keypoints1, keypoints2, matches, distances, img1, img2, filename1, filename2)
        
    return keypoints1, keypoints2, matches, distances
    
    
def show_matched_result(keypoints1, keypoints2, matches, distances, img1, img2, filename1, filename2):

    try:
        #fig, ax = plt.subplots(nrows=1, ncols=2, gridspec_kw = {'width_ratios':[2, 2]})
        fig, ax = plt.subplots(nrows=1, ncols=2)

        plt.gray()
        plot_matches(ax[0], img1, img2, keypoints1, keypoints2, matches)
        #ax[0].axis('off')

        plt.hist(distances, axes=ax[1])
        #plt.axis([0, 1, 0, len(distances12)])
        ax[1].set_xlim([0,0.5])
            
        fig = matplotlib.pyplot.gcf()
        fig.set_size_inches(20, 8)

        if filename1 and filename2:
            fn1=os.path.basename(filename1)
            fn2=os.path.basename(filename2)
            #s=u'LEFT: %s   RIGHT: %s' % (fn1, fn2)
            s=unicode(fn1.decode('utf-8')) + ' ' + unicode(fn2.decode('utf-8')) #.encode('utf-8')
            st=plt.suptitle(s, fontsize="x-large")
            st.set_y(1.05)

        fig.subplots_adjust(top=0.95)
        fig.tight_layout()

        plt.show()      
    except RuntimeError as e:
        print filename1, filename2
        print e.message
        
def imread_convert(f, img_num=None):
    try:
        return rgb2gray(skimage.io.imread(f))
    except IOError as e:
        print f
        print e.message
        return None
    
def match_files(filespec):

    try:
        collection=skimage.io.ImageCollection(filespec, load_func=imread_convert)
    except IOError as e:
        print filespec
        print e.message
        return None
    
    #print collection.files

    distances_list=[]
    for i in range(len(collection)):
        for j in range(i, len(collection)):
            if i==j:
                continue
            keypoints1, keypoints2, matches, distances=match_image(collection[i], collection[j], collection.files[i], collection.files[j], True)        
        distances_list.append(distances)
        
    return distances_list

### Let's go to '04' set

In [None]:
import pickle

In [None]:
rootdir='../../../similarity/sim/05/'
filespec='*'
distances_list_tr=match_files(rootdir+filespec)

In [None]:
darr = distances_list_tr
X=np.concatenate(darr)
fd=open(rootdir+'distances.pickle', 'w+')
pickle.dump(X, fd)
fd.close()
#plt.hist(X)
#plt.xlim(0, 1)

In [None]:
from scipy.stats import chi2

df=2
#x=np.linspace(chi2.ppf(0.01, df),chi2.ppf(0.99, df), 100)

# shape, location, and scale
shape, loc, scale=chi2.fit(X)

x=np.linspace(chi2.ppf(0.01, df),chi2.ppf(0.99, df), 100)

### Randomly choose two images

In [None]:
import imghdr

#groups=['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34']h
filelist=[]
for root, dirs, filenames in os.walk('../../../similarity/sim/'):
    #print root
    for filename in filenames:
        filepath=os.path.join(root, filename)
        if os.path.isdir(filepath):
            continue
        if imghdr.what(filepath):
            filelist.append(filepath)

n=len(filelist)
niter=50

distances_list=[]
for i in range(niter):
    ix=np.random.randint(0, n, 2)
    
    # check if images belong to the same class
    fn1=filelist[ix[0]]
    fn2=filelist[ix[1]]
    if os.path.basename(os.path.dirname(fn1))==os.path.basename(os.path.dirname(fn2)):
        continue
    
    dl=match_files([fn1, fn2])
    distances_list += dl


In [None]:
import datetime
from scipy.stats import expon, norm, chi2

darr =  distances_list
timestamp=datetime.datetime.now().strftime("%Y%m%d%H%M%S")
X1=np.concatenate(darr)
fd=open(rootdir+('distance-test%s.pickle' % timestamp), 'w+')
pickle.dump(X, fd)
fd.close()

plt.hist(X1, normed=True, alpha=0.5, bins=20)
plt.hist(X, normed=True, alpha=0.5, bins=20)

mu, sig=norm.fit(X1)
#print mu, sig
x = sig*(np.linspace(norm.ppf(0.000001), norm.ppf(0.9999999), 100))+mu
plt.plot(x, norm.pdf(x, loc=mu, scale=sig), 'b-', lw=5, alpha=0.6, label='norm pdf')


loc, scale=expon.fit(X)
print loc, scale
x = scale*(np.linspace(expon.ppf(0.000001), expon.ppf(0.9999999), 100))+loc
plt.plot(x, expon.pdf(x, loc=loc, scale=scale), 'g-', lw=5, alpha=0.6, label='exponential pdf')

plt.xlim(0, 0.5)

In [None]:
#img1 = rgb2gray(data.astronaut())
fn1='../../../similarity/sim/01/084_1.jpg'
fn2='../../../similarity/sim/01/20151106_4.jpg'
fn3='../../../similarity/sim/01/7d5ffda950a0693f9d1beddeb79affdcbe87d7d4.15.jpg'

img1 = rgb2gray(skimage.io.imread(fn1))
img2 = rgb2gray(skimage.io.imread(fn2))
img3 = rgb2gray(skimage.io.imread(fn3))

#img2 = tf.rotate(img1, 180)
#tform = tf.AffineTransform(scale=(1.3, 1.1), rotation=0.5,
#                           translation=(0, -200))
#img3 = tf.warp(img1, tform)

descriptor_extractor = ORB(n_keypoints=100)

descriptor_extractor.detect_and_extract(img1)
keypoints1 = descriptor_extractor.keypoints
descriptors1 = descriptor_extractor.descriptors

K=10
descriptors1a, label = reduce_dimension(descriptor_extractor.descriptors, K)
 
descriptor_extractor.detect_and_extract(img2)
keypoints2 = descriptor_extractor.keypoints
descriptors2 = descriptor_extractor.descriptors

descriptor_extractor.detect_and_extract(img3)
keypoints3 = descriptor_extractor.keypoints
descriptors3 = descriptor_extractor.descriptors

matches12, distances12 = match_descriptors2(descriptors1, descriptors2, cross_check=True)
matches13, distances13 = match_descriptors2(descriptors1, descriptors3, cross_check=True)

fig, ax = plt.subplots(nrows=2, ncols=1)

plt.gray()

plot_matches(ax[0], img1, img2, keypoints1, keypoints2, matches12)
ax[0].axis('off')

plot_matches(ax[1], img1, img3, keypoints1, keypoints3, matches13)
ax[1].axis('off')

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(30, 30)

plt.show()

In [None]:
print distances12.shape
print matches12.shape
print distances12
plt.hist(distances12)

print distances13.shape
print matches13.shape
print distances13
plt.hist(distances13)

In [None]:
distances_ratio=[1]
for i in range(1, len(distances12)):
    distances_ratio.append(distances12[i]/distances12[i-1])
plt.plot(distances_ratio)
#plt.axis([0, 1, 0, len(dist)])
#plt.plot(dist, 'ro')
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(15, 5)
plt.show()