In [1]:
from __future__ import division
from __future__ import print_function
from PIL import Image
import sys
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from io import BytesIO
from sklearn.metrics import accuracy_score
import urllib.request as urllib2
from urllib.parse import urlparse



In [34]:
clf = RandomForestClassifier()

def process_image_url(image_url):
    '''Given an image URL it returns its feature vector

    Args:
      image_url (str): url of the image to process.

    Returns:
      list of float: feature vector.

    Raises:
      Any exception raised by urllib2 requests.

      IOError: if the URL does not point to a valid file.
    '''
    parsed_url = urlparse(image_url)
    request = urllib2.Request(image_url)
    # set a User-Agent and Referer to work around servers that block a typical
    # user agents and hotlinking. Sorry, it's for science!
    request.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux ' \
            'x86_64; rv:31.0) Gecko/20100101 Firefox/31.0')
    request.add_header('Referrer', parsed_url.netloc)
    # Wrap network data in StringIO so that it looks like a file
    net_data = BytesIO(urllib2.build_opener().open(request).read())
    image = Image.open(net_data)
    return process_image(image)


def process_image(image, blocks=4):
    '''Given a PIL Image object it returns its feature vector.

    Args:
      image (PIL.Image): image to process.
      blocks (int, optional): number of block to subdivide the RGB space into.

    Returns:
      list of float: feature vector if successful. None if the image is not
      RGB.
    '''
    if not image.mode == 'RGB':
        return None
    feature = [0] * blocks * blocks * blocks
    pixel_count = 0
    for pixel in image.getdata():
        ridx = int(pixel[0]/(256/blocks))
        gidx = int(pixel[1]/(256/blocks))
        bidx = int(pixel[2]/(256/blocks))
        idx = ridx + gidx * blocks + bidx * blocks * blocks
        feature[idx] += 1
        pixel_count += 1
        
    return [x/pixel_count for x in feature]

def process_image_file(image_path):
    '''Given an image path it returns its feature vector.

    Args:
      image_path (str): path of the image file to process.

    Returns:
      list of float: feature vector on success, None otherwise.
    '''
    image_fp = BytesIO(open(image_path, 'rb').read())
    try:
        image = Image.open(image_fp)
        return process_image(image)
    except IOError:
        print("Exception" , IoError)
        return None
    
def process_directory(directory):
    '''Returns an array of feature vectors for all the image files in a
    directory (and all its subdirectories). Symbolic links are ignored.

    Args:
      directory (str): directory to process.

    Returns:
      list of list of float: a list of feature vectors.
    '''
    training = []
    for root, _, files in os.walk(directory):
        for file_name in files:
            file_path = os.path.join(root, file_name)
            img_feature = process_image_file(file_path)
            print(file_path)
            if img_feature:
                training.append(img_feature)
    return training

In [35]:
def train(training_path_a,training_path_b):
    print("Hello")
    training_a = process_directory(training_path_a)
    training_b = process_directory(training_path_b)
    data = training_a + training_b
    # target is the list of target classes for each feature vector: a '1' for
    # class A and '0' for class B
    target = [1] * len(training_a) + [0] * len(training_b)
    x_train, x_test, y_train, y_test = train_test_split(data,
            target, test_size=0.20)
    clf.fit(x_train, y_train)
    preds = clf.predict(x_test)

    print("Accuracy:", accuracy_score(y_test,preds))
    
    
   
    

In [39]:
train("gear_images/helmets","gear_images/boots")   

Hello
gear_images/helmets/100536.jpeg
gear_images/helmets/10085954x1031041_zm.jpeg
gear_images/helmets/10085954x1114172_zm.jpeg
gear_images/helmets/10085954x1114180_zm.jpeg
gear_images/helmets/10085954x1114185_zm.jpeg
gear_images/helmets/10093454x1013457_zm.jpeg
gear_images/helmets/10093454x1042156_zm.jpeg
gear_images/helmets/100943.jpeg
gear_images/helmets/100973.jpeg
gear_images/helmets/10187948x1011898_zm.jpeg
gear_images/helmets/10187948x1012905_zm.jpeg
gear_images/helmets/10187948x1046104_zm.jpeg
gear_images/helmets/10187948x1078795_zm.jpeg
gear_images/helmets/10187949x1040578_zm.jpeg
gear_images/helmets/10187949x1121031_zm.jpeg
gear_images/helmets/10188393x1011898_zm.jpeg
gear_images/helmets/10188393x1043709_zm.jpeg
gear_images/helmets/10188393x1105548_zm.jpeg
gear_images/helmets/10188394x1011898_zm.jpeg
gear_images/helmets/10188395x1036824_zm.jpeg
gear_images/helmets/10190075x1102897_zm.jpeg
gear_images/helmets/10190075x1145821_zm.jpeg
gear_images/helmets/10190104x1010868_zm.jpe

gear_images/boots/51t-mAcoA0L._AC_US436_QL65_.jpg
gear_images/boots/51VGqNBvazL._AC_US436_QL65_.jpg
gear_images/boots/51WX8V6q94L._AC_US436_QL65_.jpg
gear_images/boots/51xk7ZHkCkL._AC_US436_QL65_.jpg
gear_images/boots/51XZd2kVbfL._AC_US436_QL65_.jpg
gear_images/boots/51y2T-wFPqL._AC_US436_QL65_.jpg
gear_images/boots/51yQeovtEdL._AC_US436_QL65_.jpg
gear_images/boots/51YZs+KEHML._AC_US436_QL65_.jpg
gear_images/boots/51ZAghEeYGL._AC_US436_QL65_.jpg
gear_images/boots/733952.jpeg
gear_images/boots/859910.jpeg
gear_images/boots/866158.jpeg
gear_images/boots/866908.jpeg
gear_images/boots/866950.jpeg
gear_images/boots/866951.jpeg
gear_images/boots/874196.jpeg
gear_images/boots/881270.jpeg
gear_images/boots/881431.jpeg
gear_images/boots/896259.jpeg
gear_images/boots/896260.jpeg
gear_images/boots/PAAAIAHENGMMFJBAx1002668_zm.jpeg
gear_images/boots/PAAAIALEPOLAOKBAx1039826_zm.jpeg
gear_images/boots/PMMPIHLHKKCNMANJx1014040_zm.jpeg
Accuracy: 0.8095238095238095


In [40]:
from sklearn.metrics import accuracy_score
preds = clf.predict(x_test)

print("Accuracy:", accuracy_score(y_test,preds))


NameError: name 'x_test' is not defined

In [41]:
#test my classifier
X = process_image_url("https://shop.epictv.com/sites/default/files/ae42ad29e70ba8ce6b67d3bdb6ab5c6e.jpeg")
X = X.reshape(X.shape[0]*X.shape[1]*X.shape[2],X.shape[3]).T
y = Y.reshape(y.shape[0],)
X, y = shuffle(X, y, random_state=42)
print(clf.predict(features))

AttributeError: 'list' object has no attribute 'reshape'