In [8]:
# resnet50, inceptionv3, xception, vgg19, densenet201 
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.applications.xception import Xception
from keras.applications.vgg19 import VGG19
from keras.applications.densenet import DenseNet201
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
import pandas as pd
import os
import re
from tqdm import tqdm
from imageFeaturesExtractionMulti import check_imgpath, load_image
from myutils import timer

tqdm.pandas()

def predict_imagenet(img_path, models, topk=3):
    defaultret = [0, 0, 0]
    if check_imgpath(img_path) == False:
        return [defaultret for _ in range(len(models))]
    try:
        img = image.load_img(img_path, target_size=(224, 224))
    except Exception as e:
        print('Cannot open image: ', img_path)
        return [defaultret for _ in range(len(models))]
    try:
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
    except Exception as e:
        print('Cannot resize: ', img_path)
        return [defaultret for _ in range(len(models))]

    preds = []
    for model in models:
        pred = model.predict(x)
        pred = decode_predictions(pred, top=topk)[0]
        pred = [p[2] for p in pred]
        preds.append(pred)
    return preds


In [2]:
models = []
models.append(ResNet50(weights='imagenet'))
models.append(InceptionV3(weights='imagenet'))
models.append(Xception(weights='imagenet'))
models.append(VGG19(weights='imagenet'))
models.append(DenseNet201(weights='imagenet'))
len(models)

5

In [7]:
features_path = '../features/'
input_path = '../input/'
train = pd.read_csv('../input/train.csv', usecols=['image'])
test = pd.read_csv('../input/test.csv', usecols=['image'])
print(train.isnull().sum())
print(test.isnull().sum())
lentrain = train.shape[0]
train['image'] = train['image'].fillna('')
test['image'] = test['image'].fillna('')
train['image'] = train['image'].apply(lambda x: input_path+'train_jpg/'+str(x)+'.jpg')
test['image'] = test['image'].apply(lambda x: input_path+'test_jpg/'+str(x)+'.jpg')



image    112588
dtype: int64
image    42609
dtype: int64


In [11]:
features = pd.concat([train, test])
#features = train.iloc[:100, :]
features.shape

(2011862, 1)

In [12]:
with timer('predicting'):
    features['preds'] = features['image'].progress_apply(lambda x: predict_imagenet(x, models))
features.head()

  0%|                                                                        | 888/2011862 [03:05<116:52:06,  4.78it/s]

KeyboardInterrupt: 

In [43]:
for t in range(3):
    for i in range(len(models)):
        features['imagenet_preds_top'+str(t+1)+'_'+str(i)] = [features['preds'][j][i][t] for j in range(features.shape[0])]

print(features.shape)
features.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,image,preds,imagenet_preds_top1_0,imagenet_preds_top1_1,imagenet_preds_top2_0,imagenet_preds_top2_1,imagenet_preds_top3_0,imagenet_preds_top3_1
0,../input/train_jpg/d10c7e016e03247a3bf2d13348f...,"[[0.28768688, 0.20811458, 0.19423328], [0.9999...",0.287687,0.999988,0.208115,1.237188e-05,0.194233,3.971218e-08
1,../input/train_jpg/79c9392cc51a9c81c6eb91eceb8...,"[[0.12275415, 0.10090396, 0.08167587], [1.0, 1...",0.122754,1.0,0.100904,1.287173e-09,0.081676,1.151123e-11
2,../input/train_jpg/b7f250ee3f39e1fedd77c141f27...,"[[0.57183695, 0.22945851, 0.16366412], [0.9994...",0.571837,0.999437,0.229459,0.0005130248,0.163664,4.96541e-05
3,../input/train_jpg/e6ef97e0725637ea84e3d203e82...,"[[0.27299944, 0.16049162, 0.107375585], [1.0, ...",0.272999,1.0,0.160492,2.365407e-08,0.107376,2.348315e-09
4,../input/train_jpg/54a687a3a0fc1d68aed99bdaaf5...,"[[0.5837688, 0.12003572, 0.082959704], [0.9614...",0.583769,0.961488,0.120036,0.03834981,0.08296,8.656106e-05


In [None]:
lentrain = 1503424

features = features.drop(['preds', 'image'], axis=1)
train = features[: lentrain]
test = features[lentrain:]

print(train.shape)
print(test.shape)

train = train.reset_index(drop=True)
test = test.reset_index(drop=True)

train.to_feather('../features/train/imagenet_train.feather')
test.to_feather('../features/test/imagenet_test.feather')
