## Regression

In [2]:
import csv
import math

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

from keras import applications
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model, load_model
from keras.layers import Dropout, Flatten, Dense, Input
from keras.initializers import glorot_uniform
from keras.applications.vgg16 import preprocess_input

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import keras.backend as K

from keras.models import Model
from keras.layers import Input, Flatten, Dropout, Concatenate, Activation
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
from keras.layers import GlobalMaxPooling2D, GlobalAveragePooling2D

from keras.applications.imagenet_utils import decode_predictions
from keras.applications.imagenet_utils import preprocess_input
from keras_applications.imagenet_utils import _obtain_input_shape
from keras.utils.data_utils import get_file

# read the CSV into memory
prices = []
image_paths = []

data_path = "../datasets/bikes_im/"
with open("../datasets/bikes_filtered.csv") as file:
    reader = csv.reader(file)
    i = -1
    for row in reader:
        i += 1
        index = row[0]
        name = row[1]
        msrp = row[2]
        
        image_path = data_path + index + '.jpg'
        image_paths.append(image_path)
        prices.append(int(msrp))

train_indices = np.load("bikes_train_indices.npy")
test_indices = np.load("bikes_test_indices.npy")
print(train_indices.shape)
print(test_indices.shape)

def image_generator(indices, batch_size):

    num_batches = int(len(indices) / batch_size)
    
    while True:
        for batch_i in range(num_batches):
            if batch_i == num_batches - 1:
                # special case: return as many as possible
                start_i = batch_i * batch_size
                batch_indices = indices[start_i:]
                
                X = np.zeros((len(batch_indices), 224, 224, 3))
                Y = np.zeros((len(batch_indices), 1))
            
            else:
                start_i = batch_i * batch_size
                end_i = start_i + batch_size

                batch_indices = indices[start_i:end_i]

                X = np.zeros((batch_size, 224, 224, 3))
                Y = np.zeros((batch_size, 1))
            
            for i, index in enumerate(batch_indices):
                img = image.load_img(image_paths[index], target_size=(224, 224))
                X[i, :, :, :] = image.img_to_array(img)                
                Y[i] = prices[index]
            
            # use vgg16 preprocessing
            X = preprocess_input(X)
            
            yield (X, Y)

Using TensorFlow backend.


(19658,)
(2185,)


In [2]:
test_steps = math.ceil(len(test_indices) / 64)
# get predictions on each batch yielded the validation generator.
def evaluate(new_model):
    validation_generator = image_generator(test_indices, 64)

    predicted = []
    actual = []

    for step in range(test_steps):
        X, Y = next(validation_generator)
        curr_pred = new_model.predict(X)
        for entry in curr_pred:
            predicted.append(entry)
        for entry in Y:
            actual.append(entry)

    predicted = np.array(predicted)
    actual = np.array(actual)

    MSE = mean_squared_error(predicted, actual)
    MAE = mean_absolute_error(predicted, actual)
    R2 = r2_score(actual, predicted)

    print((np.sqrt(MSE), MAE, R2))

In [4]:
# new_model = load_model('output/bikes-cnn-SqueezeNet_Vanilla-best.hdf5')
# print("SqueezeNet:")
# evaluate(new_model)

new_model = load_model('E:/output/bikes-cnn-PriceNet-Reg-Aug/final.hdf5')
print("PriceNet")
evaluate(new_model)

ValueError: Unknown layer:layers

## Classification

In [3]:
# read the CSV into memory
prices = []
image_paths = []

data_path = "../datasets/bikes_im/"
with open("../datasets/bikes_classified.csv") as file:
    reader = csv.reader(file)
    i = -1
    for row in reader:
        i += 1
        index = row[0]
        name = row[1]
        msrp = row[2]
        label = row[3]
        
        image_path = data_path + index + '.jpg'
        image_paths.append(image_path)
        prices.append(str(label))
        
train_indices = np.load("bikes_train_indices.npy")
test_indices = np.load("bikes_test_indices.npy")
print(train_indices.shape)
print(test_indices.shape)

def image_generator(indices, batch_size):

    num_batches = int(len(indices) / batch_size)
    
    while True:
        for batch_i in range(num_batches):
            if batch_i == num_batches - 1:
                # special case: return as many as possible
                start_i = batch_i * batch_size
                batch_indices = indices[start_i:]
                
                X = np.zeros((len(batch_indices), 224, 224, 3))
                Y = np.zeros((len(batch_indices), 4)) # Change to one-hot
            
            else:
                start_i = batch_i * batch_size
                end_i = start_i + batch_size

                batch_indices = indices[start_i:end_i]

                X = np.zeros((batch_size, 224, 224, 3))
                Y = np.zeros((batch_size, 4)) # Change to one-hot
            
            for i, index in enumerate(batch_indices):
                img = image.load_img(image_paths[index], target_size=(224, 224))
                X[i, :, :, :] = image.img_to_array(img)
                # Convert to 1 hot vector
                p = prices[index]
                if p == "25":
                    Y[i,:] = np.array([1,0,0,0])
                if p == "50":
                    Y[i,:] = np.array([0,1,0,0])
                if p == "75":
                    Y[i,:] = np.array([0,0,1,0])
                if p == "100":
                    Y[i,:] = np.array([0,0,0,1])
            
            # use vgg16 preprocessing
            X = preprocess_input(X)

            yield (X, Y)

(19658,)
(2185,)


In [8]:
from sklearn.metrics import confusion_matrix, classification_report

test_steps = math.ceil(len(test_indices) / 64)
# get predictions on each batch yielded the validation generator.
def evaluate(new_model):
    counter = 0
    true_label = []
    predicted_label = []
    for index in test_indices:
        msrp = prices[index]
        true_label.append(str(msrp))

        path = image_paths[index]
        img = image.load_img(path, target_size=(224, 224))
        data = np.expand_dims(image.img_to_array(img), axis=0)

        # Prediction outputs softmax vector
        prediction = new_model.predict(data)

        # Set most confident prediction as label, and convert it to our price scale
        label = np.argmax(prediction) * 25 + 25
        
        if str(msrp) != str(label):
            counter+=1
        predicted_label.append(str(label))
    print(counter)
    print("Classification report:\n%s\n"
      % (classification_report(true_label, predicted_label)))
    print("Confusion matrix:\n%s" % confusion_matrix(true_label, predicted_label))

In [9]:
new_model = load_model('E:/output/bikes-cnn-PriceNet-Class-Aug/00015.hdf5')
print("PriceNet")
evaluate(new_model)

PriceNet
1553
Classification report:
             precision    recall  f1-score   support

        100       0.69      0.05      0.10       496
         25       0.00      0.00      0.00       596
         50       0.27      0.98      0.43       563
         75       0.42      0.10      0.16       530

avg / total       0.33      0.29      0.17      2185


Confusion matrix:
[[ 27   2 408  59]
 [  0   0 591   5]
 [  2   0 554   7]
 [ 10   0 469  51]]
