In [152]:
%reload_ext autoreload
%autoreload 2

import os
import argparse
import sys
from time import time
import cv2

from joblib import Parallel, delayed

sys.path.append(os.path.join(os.environ['GORDON_REPO_DIR'], 'utilities'))
from utilities2015 import *

from matplotlib.path import Path
%matplotlib inline

import scipy.ndimage as nd
import scipy

from skimage.feature import hog
from skimage import data, color, exposure
from sklearn.cluster import KMeans

import random

In [None]:
vocabulary = np.load('/oasis/projects/nsf/csd395/ruogu/vocabulary/vocabulary.npy');
M = vocabulary.shape[0];
L = 3;
dataDir = '/oasis/projects/nsf/csd395/yuncong/CSHL_data_patches/patches/';
svmDataDir = '/oasis/projects/nsf/csd395/ruogu/svm/';
classes = ['12N', '5N', '7n', '7N', 'Gr', 'LVe', 'Pn', 'SuVe', 'VLL', 'Bg'];

In [None]:
kmeans = KMeans(init='random', n_clusters=M, n_init=10);
kmeans.fit(vocabulary);

In [176]:
# Return the histogram of an image by applying SIFT + K-means + SPM
# image: the input grayscaled image
# kmeans: python kmeans object used to predict
#             feature type of a new sift descriptor
# M: number of channels (feature typs)
# L: number of SPM levels.
def getHistogram(image, kmeans, M, L):
    x_size = image.shape[0];
    y_size = image.shape[1];
    # Get SIFT key points and descriptor.
    sift = cv2.SIFT();
    key_points, descriptor = sift.detectAndCompute(image, None);
    # Map key points with type.
    if descriptor is None:
        return None;
    cluster_labels = kmeans.predict(descriptor);
    cluster_labels = cluster_labels;
    # Generate position list of key_points
    positions = np.asarray([kp.pt for kp in key_points], np.int)
    # Level 0
    weight_0 = 1.0 / (2**L);
    histogram = np.bincount(cluster_labels, minlength = M);
    histogram = histogram * weight_0;
    # Other levels
    for l in range(1, L):
        weight = 1.0 / (2**(L - l + 1));
        grid_size_x = x_size / (2**l);
        grid_size_y = y_size / (2**l);
        grid_boundaries_x = range(0, x_size, grid_size_x);
        grid_boundaries_y = range(0, y_size, grid_size_y);
        number_of_grid = (2**l)**2;
    
        grid_crs = positions / [grid_size_x, grid_size_y];
        grid_cols = grid_crs[:,0];  # x
        grid_rows = grid_crs[:,1];  # y
    
        keypoint_grid_indices = grid_rows * (2**l) + grid_cols;
        hists = np.zeros(shape=(number_of_grid, M), dtype=np.int);
        for ki, label in zip(keypoint_grid_indices, cluster_labels):
            hists[ki][label] += 1;
        hists = hists * weight;
        histogram = np.hstack((histogram, hists.flatten()));
    return (histogram / len(key_points)); # Normalization

In [181]:
fileNames = os.listdir(dataDir);
X = [];
y = [];
for fileName in fileNames:
    images = np.load(dataDir + fileName);
    if images.shape[0] > 100:
        sample_images = random.sample(images, 100);
    else:
        sample_images = images;
    label = -1;
    # Get class label
    if fileName[7] == '_':
        className = fileName.partition('_')[-1].rpartition('_')[0];
        label = classes.index(className);
    else: # background
        label = 9;
    print "Processing file: \"{}\" with label: \"{}\"".format(fileName, label);
    for img in sample_images:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY);
        h = getHistogram(img, kmeans, M, L);
        if h is None:
            continue;
        else:
            X.append(h);
            y.append(label);

Processing file: "patchesBg_Pn_115.npy" with label: "9"
Processing file: "patches_Pn_138.npy" with label: "6"
Processing file: "patchesBg_7N_99.npy" with label: "9"
Processing file: "patchesBg_LVe_79.npy" with label: "9"
Processing file: "patches_Pn_109.npy" with label: "6"
Processing file: "patches_7N_91.npy" with label: "3"
Processing file: "patches_Pn_128.npy" with label: "6"
Processing file: "patchesBg_7N_98.npy" with label: "9"
Processing file: "patches_Pn_89.npy" with label: "6"
Processing file: "patches_Pn_115.npy" with label: "6"
Processing file: "patches_7N_95.npy" with label: "3"
Processing file: "patches_Gr_108.npy" with label: "4"
Processing file: "patches_Pn_110.npy" with label: "6"
Processing file: "patches_5N_85.npy" with label: "1"
Processing file: "patchesBg_Pn_138.npy" with label: "9"
Processing file: "patchesBg_7n_92.npy" with label: "9"
Processing file: "patchesBg_VLL_86.npy" with label: "9"
Processing file: "patches_LVe_82.npy" with label: "5"
Processing file: "pat

In [182]:
np.save('/oasis/projects/nsf/csd395/ruogu/svm/X.npy', X);
np.save('/oasis/projects/nsf/csd395/ruogu/svm/y.npy', y);