In [3]:
import cv2
import numpy as np
from pickle import dump, load
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt
from keras.applications.vgg16 import VGG16
from datetime import datetime # library to compute run of various algorithm

Using TensorFlow backend.


In [None]:
# function to read images
def read_images(path, filenames, extension = None, shape = None, labels = None, verbose = 0):
    '''
    Returns list of images by reading them as arrays, filenames that we read 
    successfully and associated label indexes if labels are provided.
    
    Parameters
    -----------
    path: string
          Path of the directory where the images are stored
    
    filenames: list of strings
               Names of images to be read
               
    exension: string
              If extension not present in filenames list, 
              provide the extension of images to be read.
    
    shape: tuple
           If specified, only those images with matching shape will be returned. 
    
    labels: list
            List of labels associated with images. 
            Used if shape is specified.
    
    verbose: int
             If 1, print the progress of reading images.
    '''
    path = path.replace('\\', '/')
    if path[-1] != '/':
        path = path + '/'
    
    # Creating an empty list of labels to return if labels are specified
    if labels is not None:
        if len(filenames) != len(labels):
            raise ValueError("Number of labels not equal to number of files to be read")
        y = []
            
    if not extension:
        extension = ''
    else:
        extension = extension.lower()
        if extension[0] != '.':
            extension = '.' + extension

    images = []
    extracted_filenames = []
    total_files = len(filenames)
    
    for idx in range(len(filenames)):
        try:
            img = cv2.imread(path + str(filenames[idx]) + extension)
            
            if verbose == 1 and (idx + 1) % 1000 == 0: 
                print("Extracted", idx+1, "images out of", total_files)
            
            if shape is not None:
                if img.shape == shape:
                    images.append(img)
                    extracted_filenames.append(filenames[idx])
                    if labels is not None:
                        y.append(idx)
            
            else:
                images.append(img)
                extracted_filenames.append(filenames[idx])
                if labels is not None:
                    y.append(idx)
        except:
            print('Skipping ' + str(filenames[idx]) + extension + 
                                ', no such file in directory ' + path)
    
    # Converting list of arrays into multi-dimensional array 
    # if all images have the same shape
    if shape is not None:
        images = np.stack(images)
    
    if labels is not None:
        return images, extracted_filenames, y
    else:
        return images, extracted_filenames

In [None]:
# function to save pickle
def save_pickle(file, variable):
    '''
    Saves variable as a pickle file
    
    Parameters
    -----------
    file: str
          File name/path in which the variable is to be stored
    
    variable: object
              The variable to be stored in a file
    '''
    if file.split('.')[-1] != "pickle":
        file += ".pickle"
        
    with open(file, 'wb') as f:
        dump(variable, f)
        print("Variable successfully saved in " + file)

In [2]:
# function to open pickle
def open_pickle(file):
    '''
    Returns the variable after reading it from a pickle file
    
    Parameters
    -----------
    file: str
          File name/path from which variable is to be loaded
    '''
    if file.split('.')[-1] != "pickle":
        file += ".pickle"
    
    with open(file, 'rb') as f:
        return load(f)

In [11]:
# Function to resize colored image
def resize_image(image, dimension = (80,60,3)):
    
    if image.shape != dimension:
        if image.shape[2] == 3: # to check whether image is colored or not
            image = cv2.resize(image, (dimension[1], dimension[0]))
        else:
            raise AttributeError("Image should be of 3-dimensions")
    
    return image  

In [12]:
# function to get embedddings from pre-trained VGG16 model
def get_embeddings(image, dimension = (80, 60,3), model = None):
    '''
    Returns the VGG16 embeddings of given input image
    
    Parameters
    -----------
    image: numpy array
           The image for which embeddings are needed.
           Recommended shape (80, 60, 3)
           
    resize: boolean
            Wether to resize higher dimensional image to (80,60,3)
    '''
        
    image = resize_image(image=image, dimension=dimension)
    image = image.reshape(1, dimension[0], dimension[1], dimension[2])
        
    if model is None:
        model = VGG16(weights = 'imagenet', input_shape = dimension, include_top = False)
        model.trainable = False
    
    return model.predict(image)[0]

In [1]:
# function to plot mutliple images
def plot_images(images, nrows = None, ncols = None, figsize = None, ax = None, 
                axis_style = 'on', bgr2rgb = True):
    '''
    Plots a given list of images and returns axes.Axes object
    
    Parameters
    -----------
    images: list
            A list of images to plot
            
    nrows: int
           Number of rows to arrange images into
    
    ncols: int
           Number of columns to arrange images into
    
    figsize: tuple
             Plot size (width, height) in inches
           
    ax: axes.Axes object
        The axis to plot the images on, new axis will be created if None
        
    axis_style: str
                'off' if axis are not to be displayed
    '''
    N = len(images)
    if not isinstance(images, (list, np.ndarray)):
        raise AttributeError("The images parameter should be a list of images, "
                             "if you want to plot a single image, pass it as a "
                             "list of single image")

    # Setting nrows and ncols as per parameter input
    if nrows is None:
        if ncols is None:
            nrows = N
            ncols = 1
        else:
            nrows = int(np.ceil(N / ncols))
    else:
        if ncols is None:
            ncols = int(np.ceil(N / nrows))
    
    if ax is None:
        _, ax = plt.subplots(nrows, ncols, figsize = figsize)
    
    if len(images) == 1:
        ax.imshow(images[0])
        return ax
    
    else:
        for i in range(nrows):
            for j in range(ncols):
                if (i * ncols + j) < N:
                    img = images[i * ncols + j]
                    
                    if bgr2rgb == True:
                            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    
                    # For this condition, ax is a 2d array else a 1d array
                    if nrows >1 and ncols > 1: 
                        ax[i][j].imshow(img)
                    
                    else:
                        ax[i + j].imshow(img)
                
                if nrows > 1 and ncols > 1:
                    ax[i][j].axis(axis_style)
                else:
                    ax[i + j].axis(axis_style)
        
        return ax
          

In [1]:
# returns time
def get_time():
    return datetime.now()