In [1]:
import ImageLoader
import ImageCompare
import database
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from os import listdir
from os.path import isfile, join
from collections import Counter
%matplotlib notebook

Configuration saved: 
	port: 1, exposure 0.5 (sec)


In [16]:
def retrieve_filepaths(directory_name):          # WORKS
    """
    ACCEPTS: a single string that represents the directory which contains ALL of your songs.

    SIDE NOTE: The songs in this directory are in the following format:
        Name_Artist.mp3
    
    RETURNS: A list of strings that represent the file paths of each song
    """
    onlyfiles = [f for f in listdir(directory_name) if isfile(join(directory_name, f))]
    str_of_filepaths = []

    for i in range(len(onlyfiles)):
        str_of_filepaths.append(directory_name + '\\' +onlyfiles[i])

    return str_of_filepaths

In [17]:
def load_images(filenames):
    """
    Loads images from file and returns images for each identifiable person as well as corresponding descriptor vectors.
    
    Parameters
    ----------
    filenames: list of strings
        Paths to each file to be loaded.
    
    Returns
    -------
    images: list of numpy arrays
    descs: list of numpy arrays
        Descriptors of each image, corresponding to images array.
    """
    images = []
    descs = []
    for fn in filenames:
        print("Loading:", fn)
        img = ImageLoader.get_img_from_file(fn)
        det = ImageLoader.find_faces(img)
        des = ImageLoader.find_descriptors(img, det)
        for i in range(len(det)):
            print("Adding:", fn, i)
            images.append(img)
            descs.append(des[i])
    return images, descs

In [18]:
def cluster(images, descs, threshold=0.5, iters=300):
    """
    Clusters images based on who is in them, using a graph and whisper algorithm, then displays them.
    
    Parameters
    ----------
    images: list of numpy arrays
        List of images to be sorted.
    descs: list of numpy arrays, shape (128,)
        List of descriptor vectors.
    threshold: float
        The maximum L2 distance between two images to be classified as a match.
    iters: int
        The number of times the whisper algorithm should run.
        
    Returns
    -------
    True
    """
    d_descs = np.array(descs)
    distances = np.sqrt(np.sum(d_descs**2, axis=1, keepdims=True) 
                   + np.sum(d_descs**2, axis=1) 
                   - 2*np.dot(d_descs, d_descs.T)
                   )
    distances[np.arange(len(descs)), np.arange(len(descs))] = 0
    condition = distances > 0.5
    distances[condition] = 0
    where = np.where(distances > 0)
    distances[where[0], where[1]] = 1 / (distances[where[0], where[1]]**2)
    ids = np.arange(len(descs))
    # Graph computed
    
    # Whisper algorithm
    for i in range(500):
        center = np.random.randint(0, len(descs))
        adjs = distances[center]
        adj_node_counts = np.zeros(len(descs))
        for i in range(len(adjs)):
            adj_node_counts[ids[i]] += adjs[i]
        arg_max = np.argmax(adj_node_counts[1:]) + 1
        ids[center] = arg_max
    
    # Matching photographs
    d = {}
    for i in range(len(descs)):
        if ids[i] not in d:
            d[ids[i]] = [images[i]]
        else:
            d[ids[i]].append(images[i])
    
    for key, values in d.items():
        plt.figure(key)
        n = (len(values) - 1) // 4 + 1
        for ind, img in enumerate(values):
            plt_idx = ind + 1
            plt.subplot(n, 4, ind + 1)
            plt.imshow(img.astype('uint8'))
            plt.axis('off')
    
    plt.show()
    return True

In [14]:
images = load_images(retrieve_filepaths("test_images"))
cluster(*images)

ValueError: 'axis' entry is out of bounds