In [112]:
import numpy as np
from collections import defaultdict

class Image_Processing(object):
    def __init__(self, file_class_mapping, other_class = "new_whale"):
        self.file_class_mapping= file_class_mapping
        self.class_to_list_files = defaultdict(list)
        self.list_other_class = []
        self.list_all_files = list(file_class_mapping.keys())
        self.range_all_files = list(range(len(self.list_all_files)))

        for file, class_ in file_class_mapping.items():
            if class_ == other_class:
                self.list_other_class.append(file)
            else:
                self.class_to_list_files[class_].append(file)

        self.list_classes = list(set(self.file_class_mapping.values()))
        self.range_list_classes= range(len(self.list_classes))
        self.class_weight = np.array([len(self.class_to_list_files[class_]) for class_ in self.list_classes])
        self.class_weight = self.class_weight/np.sum(self.class_weight)

        


In [113]:
from keras import backend as K
from keras.models import Model
from keras.layers import Embedding, Flatten, Input, merge, Conv2D, MaxPooling2D, Dense, \
GlobalMaxPooling2D, Convolution2D,Dropout, BatchNormalization, GlobalMaxPool2D, Concatenate, \
GlobalAveragePooling2D, Lambda
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard
from keras import optimizers, losses, activations, models
from keras.applications.resnet50 import ResNet50
from PIL import Image

batch_size = 8
input_shape = (256, 256)
base_path = "Udacity_Projects/Humpback_Whale_Identification/train/"

def resize_images(filepath):
    im = Image.open((filepath)).convert('RGB')
    im = im.resize(input_shape)
    im_array = np.array(im, dtype="uint8")[..., ::-1]
    return np.array(im_array / (np.max(im_array)+ 0.001), dtype="float32")

def identity_loss(y_true, y_pred):

    return K.mean(y_pred - 0 * y_true)


def bpr_loss(X):

    positive_item_latent, negative_item_latent, user_latent = X

    # BPR loss
    loss = 1.0 - K.sigmoid(
        K.sum(user_latent * positive_item_latent, axis=-1, keepdims=True) -
        K.sum(user_latent * negative_item_latent, axis=-1, keepdims=True))

    return loss

def create_base_model():
    latent_dim = 50
    base_model = ResNet50(include_top=False) # use weights='imagenet' locally

    # for layer in base_model.layers:
    #     layer.trainable = False

    x = base_model.output
    x = GlobalMaxPooling2D()(x)
    x = Dropout(0.5)(x)
    dense_1 = Dense(latent_dim)(x)
    normalized = Lambda(lambda  x: K.l2_normalize(x,axis=1))(dense_1)
    base_model = Model(base_model.input, normalized, name="base_model")
    return base_model

def create_model():
    base_model = get_base_model()

    positive_example_1 = Input(input_shape+(3,) , name='positive_example_1')
    negative_example = Input(input_shape+(3,), name='negative_example')
    positive_example_2 = Input(input_shape+(3,), name='positive_example_2')

    positive_example_1_out = base_model(positive_example_1)
    negative_example_out = base_model(negative_example)
    positive_example_2_out = base_model(positive_example_2)

    loss = merge(
        [positive_example_1_out, negative_example_out, positive_example_2_out],
        mode=bpr_loss,
        name='loss',
        output_shape=(1, ))

    model = Model(
        input=[positive_example_1, negative_example, positive_example_2],
        output=loss)
    model.compile(loss=identity_loss, optimizer=Adam(0.000001))

    print(model.summary())

    return model


model_name = "humpbackWhale_model"

file_path = model_name + "weights.best.hdf5"



def create_humpback_model(weight_path=file_path):
    base_model = create_base_model()

    positive_example_1 = Input(input_shape+(3,) , name='positive_example_1')
    negative_example = Input(input_shape+(3,), name='negative_example')
    positive_example_2 = Input(input_shape+(3,), name='positive_example_2')

    positive_example_1_out = base_model(positive_example_1)
    negative_example_out = base_model(negative_example)
    positive_example_2_out = base_model(positive_example_2)

    loss = merge(
        [positive_example_1_out, negative_example_out, positive_example_2_out],
        mode=bpr_loss,
        name='loss',
        output_shape=(1, ))

    model = Model(
        input=[positive_example_1, negative_example, positive_example_2],
        output=loss)
    model.compile(loss=identity_loss, optimizer=Adam(0.000001))

    model.load_weights(weight_path)

    humpback_model = Model(base_model.get_input_at(0), output=base_model.get_output_at(0))
    humpback_model.compile(loss="mse", optimizer=Adam(0.000001))
    print(humpback_model.summary())

    return humpback_model

In [114]:
import os

def CreateImageData(fpaths, batch=16):
    i = 0
    for path in fpaths:
        if i == 0:
            imgs = []
            fnames = []
        i += 1
        img = resize_images(path)
        imgs.append(img)
        fnames.append(os.path.basename(path))
        if i == batch:
            i = 0
            imgs = np.array(imgs)
            yield fnames, imgs
    if i < batch:
        imgs = np.array(imgs)
        yield fnames, imgs
    raise StopIteration()

In [115]:
import pandas as pd

data = pd.read_csv('Udacity_Projects/Humpback_Whale_Identification/train.csv')
data.head(5)

Unnamed: 0,Image,Id
0,00022e1a.jpg,w_e15442c
1,000466c4.jpg,w_1287fbc
2,00087b01.jpg,w_da2efe0
3,001296d5.jpg,w_19e5482
4,0014cfdf.jpg,w_f22f3e3


In [116]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.3, shuffle=True, random_state=1337)

imageName_id_mapping_train = {i: j for i, j in zip(train.Image.values, train.Id.values)}
imageName_id_mapping_test = {i: j for i, j in zip(test.Image.values, test.Id.values)}

In [117]:
train_images = Image_Processing(imageName_id_mapping_train)
test_images = Image_Processing(imageName_id_mapping_test)

In [118]:
#model = build_model()



#model.load_weights(file_path)

#checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

#early = EarlyStopping(monitor="val_loss", mode="min", patience=2)

#callbacks_list = [checkpoint, early]  # early

#history = model.fit_generator(gen(train_images), validation_data=gen(test_images), epochs=3, verbose=2, workers=4, 
#   

In [119]:


humpback_model = create_humpback_model()



  name=name)


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           (None, None, None, 3 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_10[0][0]                   
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, None, None, 6 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation



In [120]:
import glob

data = pd.read_csv('Udacity_Projects/Humpback_Whale_Identification/train.csv')

images_id_mapping = {i: j for i, j in zip(data.Image.values, data.Id.values)}

train_files = glob.glob("Udacity_Projects/Humpback_Whale_Identification/train/*.jpg")
test_files = glob.glob("Udacity_Projects/Humpback_Whale_Identification/test/*.jpg")

trainImages_predictions = []
train_image_names = []
i = 1
for filenames, imgs in CreateImageData(train_files, batch=32):
    if i==1:#Ravi Delete it
        print(i*32/len(train_files)*100)
        i += 1
        predicts = humpback_model.predict(imgs)
        predicts = predicts.tolist()
        trainImages_predictions += predicts
        train_image_names += filenames
        break #Ravi Delete it
trainImages_predictions = np.array(trainImages_predictions)


testImages_predictions = []
test_image_names = []
i = 1
for fnames, imgs in CreateImageData(test_files, batch=32):
    if i==1: #Ravi Delete it
        print(i * 32 / len(test_files) * 100)
        i += 1
        predicts = humpback_model.predict(imgs)
        predicts = predicts.tolist()
        testImages_predictions += predicts
        test_image_names += fnames
        break #Ravi Delete it
testImages_predictions = np.array(testImages_predictions)

0.3248730964467005
0.20499679692504805


In [121]:
from sklearn.neighbors import NearestNeighbors

kNeighbors = NearestNeighbors(n_neighbors=5)
kNeighbors.fit(trainImages_predictions)

distances_testImages, neighbors_testImages = kNeighbors.kneighbors(testImages_predictions)

distances_testImages, neighbors_testImages = distances_testImages.tolist(), neighbors_testImages.tolist()

In [123]:
final_predictions = []

for filepath, distance, neighbour_ in zip(train_image_names, distances_testImages, neighbors_testImages):
    sample_result = []
    sample_classes = []
    for d, n in zip(distance, neighbour_):
        train_file = train_files[n].split(os.sep)[-1]
        class_train = images_id_mapping[train_file]
        sample_classes.append(class_train)
        sample_result.append((class_train, d))

    if "new_whale" not in sample_classes:
        sample_result.append(("new_whale", 0.1))
        sample_result.sort(key=lambda x: x[1])
        sample_result = sample_result[:5]
        final_predictions.append(" ".join([x[0] for x in sample_result]))

df = pd.DataFrame(final_predictions, columns=["Id"])
df['Image'] = [x.split(os.sep)[-1] for x in test_image_names]
df.to_csv("sub_%s.csv"%model_name, index=False)

ValueError: Length of values does not match length of index

In [124]:
model_name

'humpbackWhale_model'