<h1> DS200A Computer Vision Assignment</h1>

<h2>  Part Two: Feature Selection </h2>	


<h4> In this section, we would like you to select between 15 and 20 features to focus your model on. This will require significant explatoratory research. The first one is already implemented for you, and the next two are pre-specified.  </h4>

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage.feature import blob_dog, blob_log, blob_doh, peak_local_max
from skimage.color import rgb2gray
import cv2

In [2]:
def get_data(isTest=False):
    if isTest:
        cacheName = 'cleaned_test.pkl'
    else:
        cacheName = 'cleaned_train.pkl'
        
    if Path(cacheName).is_file():
        return pd.read_pickle(cacheName)
    else:
        print("Datafile does not yet exist!")

data_from_nb1 = get_data(isTest=False)

In [3]:
data_from_nb1.head(2)

Unnamed: 0,Folder name,Image name,Image object,Encoding
0,airplanes,airplanes_0001.jpg,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0
1,airplanes,airplanes_0002.jpg,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0
2,airplanes,airplanes_0003.jpg,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0
3,airplanes,airplanes_0004.jpg,"[[[174, 190, 187], [173, 189, 186], [172, 188,...",0
4,airplanes,airplanes_0005.jpg,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0


In [69]:
# Returns the pixel size of the image
def ft0(image):
    return image.size

# Returns the average of the red-channel pictures for the images
def ft1(image):
    if len(image.shape) < 3:
        return 0
    return np.mean([[r for [r, g, b] in pixel] for pixel in image])
    
#Returns the aspect ratio of the image
def ft2(image):
    return image.shape[1] / image.shape[0]

# Returns the average of the green-channel pictures for the images
def ft3(image):
    if len(image.shape) < 3:
        return 0
    return np.mean([[g for [r, g, b] in pixel] for pixel in image])

# Returns the average of the blue-channel pictures for the images
def ft4(image):
    if len(image.shape) < 3:
        return 0
    return np.mean([[b for [r, g, b] in pixel] for pixel in image])

# Returns the median of the red-channel pictures for the images
def ft5(image):
    if len(image.shape) < 3:
        return 0
    return np.median([[r for [r, g, b] in pixel] for pixel in image])

# Returns the median of the green-channel pictures for the images
def ft6(image):
    if len(image.shape) < 3:
        return 0
    return np.median([[g for [r, g, b] in pixel] for pixel in image])

# Returns the median of the blue-channel pictures for the images
def ft7(image):
    if len(image.shape) < 3:
        return 0
    return np.median([[b for [r, g, b] in pixel] for pixel in image])

# Returns the stdev of the red-channel pictures for the images
def ft8(image):
    if len(image.shape) < 3:
        return 0
    return np.std([[r for [r, g, b] in pixel] for pixel in image])

# Returns the stdev of the green-channel pictures for the images
def ft9(image):
    if len(image.shape) < 3:
        return 0
    return np.std([[g for [r, g, b] in pixel] for pixel in image])

# Returns the median of the blue-channel pictures for the images
def ft10(image):
    if len(image.shape) < 3:
        return 0
    return np.std([[b for [r, g, b] in pixel] for pixel in image])

# Returns the mean luminance for the images
def ft11(image):
    return np.mean(rgb2gray(image))

# Returns the median luminance for the images
def ft12(image):
    return np.mean(rgb2gray(image))

# Returns the stdev luminance for the images
def ft13(image):
    return np.std(rgb2gray(image))

# Returns number of key points using ORB
def ft14(image):
    # Initiate STAR detector
    orb = cv2.ORB_create()

    # find the keypoints with ORB
    kp = orb.detect(image,None)

    # compute the descriptors with ORB
    kp, des = orb.compute(image, kp)
    return len(kp)

# Returns number of blobs using Laplacian of Gaussian
def ft15(image):
    image_gray = rgb2gray(image)
    blobs_log = blob_log(image_gray, max_sigma=30, num_sigma=10, threshold=.1)
    return len(blobs_log)

# Returns number of peaks, which are the local max in a region
def ft16(image):
    return len(peak_local_max(image))

# Returns the average luminance of filtered image
# The purpose of this is to approximate the amount of edges in the image
def ft17(image):
    return np.mean(filters.sobel(rgb2gray(image)))

In [17]:
# TESTS

def test_ft1(image):
    assert ft1(image) == np.mean([2,4,10,12,20,40,100,120]), \
        f"Answer received: {ft1(image)}, Answer expected:{np.mean([2,4,10,12,20,40,100,120])}"
    print("PASS ft1 test")

def test_ft2(image):
    assert ft2(image) == 2, \
        f"Answer received: {ft2(image)}, Answer expected: 2"
    print("PASS ft2 test")
    
def test_ft3(image):
    assert ft3(image) == np.mean([0,0,1,3,4,5,5,6]), \
        f"Answer received: {ft3(image)}, Answer expected:{np.mean([0,0,1,3,4,5,5,6])}"
    print("PASS ft3 test")
    
def test_ft4(image):
    assert ft4(image) == np.mean([5,5,6,7,7,8,9,10]), \
        f"Answer received: {ft4(image)}, Answer expected:{np.mean([5,5,6,7,7,8,9,10])}"    
    print("PASS ft4 test")

def test_ft5(image):
    assert ft5(image) == np.median([2,4,10,12,20,40,100,120]), \
        f"Answer received: {ft5(image)}, Answer expected:{np.mean([2,4,10,12,20,40,100,120])}"
    print("PASS ft5 test")

def test_ft6(image):
    assert ft6(image) == np.median([0,0,1,3,4,5,5,6]), \
        f"Answer received: {ft6(image)}, Answer expected:{np.median([0,0,1,3,4,5,5,6])}"
    print("PASS ft6 test")

def test_ft7(image):
    assert ft7(image) == np.median([5,5,6,7,7,8,9,10]), \
        f"Answer received: {ft7(image)}, Answer expected:{np.median([5,5,6,7,7,8,9,10])}"  
    print("PASS ft7 test") 

def test_ft8(image):
    assert ft8(image) == np.std([2,4,10,12,20,40,100,120]), \
        f"Answer received: {ft8(image)}, Answer expected:{np.std([2,4,10,12,20,40,100,120])}" 
    print("PASS ft8 test")

def test_ft9(image):
    assert ft9(image) == np.std([0,0,1,3,4,5,5,6]), \
        f"Answer received: {ft9(image)}, Answer expected:{np.std([0,0,1,3,4,5,5,6])}" 
    print("PASS ft9 test")

def test_ft10(image):
    assert ft10(image) == np.std([5,5,6,7,7,8,9,10])  , \
        f"Answer received: {ft10(image)}, Answer expected:{np.std([5,5,6,7,7,8,9,10])}" 
    print("PASS ft10 test")  

In [18]:
def run_all_tests():
    image = np.array([[ [2, 0, 5],
                    [4, 0, 5],
                    [10, 1, 6],
                    [12, 3, 7]],

                   [[20, 4, 7],
                    [40, 5, 8],
                    [100, 5, 9],
                    [120, 6, 10]]])
    test_ft1(image)
    test_ft2(image)
    test_ft3(image)
    test_ft4(image)
    test_ft5(image)
    test_ft6(image)
    test_ft7(image)
    test_ft8(image)
    test_ft9(image)
    test_ft10(image)
    print("----\nALL TESTS PASSED!")
    
run_all_tests()

PASS ft1 test
PASS ft2 test
PASS ft3 test
PASS ft4 test
PASS ft5 test
PASS ft6 test
PASS ft7 test
PASS ft8 test
PASS ft9 test
PASS ft10 test
----
ALL TESTS PASSED!


Define more features above, performing any EDA research below. We expect all external sources sited, and a couple significant different graphs indicating some form of EDA. 

<h4> Graphs </h4>

In [None]:
# NEED 3+ graphs!

<h4> Sources </h4>

* https://scikit-image.org/
* https://docs.opencv.org/3.0-beta/doc/py_tutorials/py_feature2d/py_orb/py_orb.html#orb

<h4> DataFrame Creation </h4>

In [70]:
# THIS IS REALLY, REALLY, SLOW!!!!

def feature_frame(df):
    df_new = df.copy()
    func_lst = [f'ft{x}' for x in range(1,17)]

    for func in func_lst:
        df_new[func] = eval("df_new['Image object'].apply(" + func + ")")
    
    return df_new
    #Returns data-frame with all the features now inside, and calculated

In [None]:
feature_frame(data_from_nb1)