In [1]:
import cv2
import pandas as pd
import numpy as np
import os
from tqdm import tqdm, tqdm_notebook

train_df = pd.read_csv('train.csv')
img_size = 256
batch_size = 16

In [2]:
pet_ids = train_df['PetID'].values
n_batches = len(pet_ids) // batch_size + 1

In [3]:
from tensorflow.keras.applications.densenet import preprocess_input, DenseNet121

In [34]:
def resize_to_square(im):
    old_size = im.shape[:2] # old_size is in (height, width) format
    ratio = float(img_size)/max(old_size)
    new_size = tuple([int(x*ratio) for x in old_size])
    # new_size should be in (width, height) format
    im = cv2.resize(im, (new_size[1], new_size[0]))
    delta_w = img_size - new_size[1]
    delta_h = img_size - new_size[0]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)
    color = [0, 0, 0]
    new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,value=color)
    return new_im

def load_image(path, pet_id):
    image = cv2.imread(f'{path}{pet_id}-1.jpg')
    new_image = resize_to_square(image)
    new_image = preprocess_input(new_image)
    return new_image

def get_features(df, folder, batch_size = 16):
    
    pet_ids = df['PetID'].values
    n_batches = len(pet_ids) // batch_size + 1

    features = {}
    for b in tqdm_notebook(range(n_batches)):
        start = b*batch_size
        end = (b+1)*batch_size
        batch_pets = pet_ids[start:end]
        batch_images = np.zeros((len(batch_pets),img_size,img_size,3))
        for i,pet_id in enumerate(batch_pets):
            try:
                batch_images[i] = load_image("{}/".format(folder), pet_id)
            except:
                pass
        batch_preds = m.predict(batch_images)
        for i,pet_id in enumerate(batch_pets):
            features[pet_id] = batch_preds[i]
    return features

In [5]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Input, Lambda, AveragePooling1D
import tensorflow.keras.backend as K

inp = Input((256,256,3))
backbone = DenseNet121(input_tensor = inp, include_top = False)
x = backbone.output
x = GlobalAveragePooling2D()(x)
x = Lambda(lambda x: K.expand_dims(x,axis = -1))(x)
x = AveragePooling1D(4)(x)
out = Lambda(lambda x: x[:,:,0])(x)

m = Model(inp,out)

Instructions for updating:
Colocations handled automatically by placer.


In [39]:
train_features = get_features(train_df, "train_images")

HBox(children=(IntProgress(value=0, max=938), HTML(value='')))

In [6]:
# features = {}
# for b in tqdm_notebook(range(n_batches)):
#     start = b*batch_size
#     end = (b+1)*batch_size
#     batch_pets = pet_ids[start:end]
#     batch_images = np.zeros((len(batch_pets),img_size,img_size,3))
#     for i,pet_id in enumerate(batch_pets):
#         try:
#             batch_images[i] = load_image("train_images/", pet_id)
#         except:
#             pass
#     batch_preds = m.predict(batch_images)
#     for i,pet_id in enumerate(batch_pets):
#         features[pet_id] = batch_preds[i]

HBox(children=(IntProgress(value=0, max=938), HTML(value='')))




In [40]:
train_feats = pd.DataFrame.from_dict(train_features, orient='index')

In [41]:
for cols in train_feats.columns.tolist():
    new_cols = 'img_feature_'+ str(cols)
    train_feats = train_feats.rename({cols: new_cols}, axis=1)

In [11]:
print(train_df.shape)
print(train_feats.shape)

(14993, 24)
(14993, 256)


In [42]:
train_feats = train_feats.reset_index().rename({'index':'PetID'}, axis = 1)
train_feats.to_csv('train_img_features.csv', index=False)
train_feats.head()

Unnamed: 0,PetID,img_feature_0,img_feature_1,img_feature_2,img_feature_3,img_feature_4,img_feature_5,img_feature_6,img_feature_7,img_feature_8,...,img_feature_246,img_feature_247,img_feature_248,img_feature_249,img_feature_250,img_feature_251,img_feature_252,img_feature_253,img_feature_254,img_feature_255
0,86e1089a3,0.002,0.1678,0.019715,0.015896,0.068162,0.002216,0.005042,0.004828,0.05076,...,0.787699,0.176626,0.575706,1.088629,0.439556,0.52046,1.547071,0.832572,0.599094,0.763348
1,6296e909a,0.002858,0.10745,0.019916,0.023482,0.174765,0.002297,0.005031,0.006338,0.083378,...,0.628259,0.686865,0.564,0.96819,1.070276,1.545741,0.89441,0.838596,0.468237,0.916672
2,3422e4906,0.002734,0.072015,0.024455,0.018021,0.154207,0.001946,0.004211,0.001576,0.100046,...,0.579115,0.557625,1.131405,0.720513,1.49667,0.870955,1.289682,1.184461,0.465113,0.892827
3,5842f1ff5,0.002106,0.274519,0.054815,0.013727,0.089969,0.00165,0.005506,0.004295,0.118727,...,1.295853,0.326144,0.291669,1.608087,1.119176,1.470888,0.591445,0.832754,0.483021,1.134127
4,850a43f90,0.002185,0.174022,0.044818,0.016244,0.169775,0.002075,0.004421,0.004157,0.099671,...,1.092663,0.669894,0.395784,0.886075,1.21973,1.033965,1.065686,0.304054,0.438069,0.676818


In [14]:
test_df = pd.read_csv('test/test.csv')

In [15]:
# pet_ids = test_df['PetID'].values
# n_batches = len(pet_ids) // batch_size + 1

In [35]:
test_features = get_features(test_df, "test_images")

HBox(children=(IntProgress(value=0, max=247), HTML(value='')))

In [16]:
# features = {}
# for b in tqdm_notebook(range(n_batches)):
#     start = b*batch_size
#     end = (b+1)*batch_size
#     batch_pets = pet_ids[start:end]
#     batch_images = np.zeros((len(batch_pets),img_size,img_size,3))
#     for i,pet_id in enumerate(batch_pets):
#         try:
#             batch_images[i] = load_image("test_images/", pet_id)
#         except:
#             pass
#     batch_preds = m.predict(batch_images)
#     for i,pet_id in enumerate(batch_pets):
#         features[pet_id] = batch_preds[i]

HBox(children=(IntProgress(value=0, max=247), HTML(value='')))




In [36]:
test_feats = pd.DataFrame.from_dict(test_features, orient='index')

In [37]:
for cols in test_feats.columns.tolist():
    new_cols = 'img_feature_'+ str(cols)
    test_feats = test_feats.rename({cols: new_cols}, axis=1)

In [38]:
test_feats = test_feats.reset_index().rename({'index':'PetID'}, axis = 1)
test_feats.to_csv('test_img_features.csv', index=False)
test_feats.head()

Unnamed: 0,PetID,img_feature_0,img_feature_1,img_feature_2,img_feature_3,img_feature_4,img_feature_5,img_feature_6,img_feature_7,img_feature_8,...,img_feature_246,img_feature_247,img_feature_248,img_feature_249,img_feature_250,img_feature_251,img_feature_252,img_feature_253,img_feature_254,img_feature_255
0,378fcc4fc,0.001806,0.117852,0.042729,0.01567,0.081283,0.002821,0.004229,0.005075,0.065772,...,1.029824,1.142765,0.314542,1.055707,0.500093,0.70422,0.98193,0.408823,0.19714,1.360667
1,73c10e136,0.002083,0.112052,0.04938,0.017084,0.066333,0.002041,0.00431,0.005714,0.06031,...,1.325531,0.269746,0.795547,1.333989,0.285445,0.615942,1.539221,0.348188,0.516552,0.450889
2,72000c4c5,0.002462,0.133409,0.054052,0.00819,0.078125,0.002364,0.005102,0.00739,0.043697,...,0.582577,0.207661,0.863859,2.020501,0.92331,1.194252,1.401715,0.4942,1.467094,1.246928
3,e147a4b9f,0.002349,0.133167,0.049585,0.020017,0.135033,0.001816,0.009131,0.004642,0.099958,...,0.975655,0.270012,0.318352,1.500878,1.101671,1.693731,1.46435,0.509028,1.214401,1.065798
4,43fbba852,0.002491,0.139229,0.03305,0.012187,0.096443,0.002061,0.004109,0.00265,0.075938,...,1.402405,0.596918,0.205048,1.24953,2.450217,2.003505,1.547028,0.72892,0.664124,0.500689


In [31]:
pet_id

'e5bbe3e54'