In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import gc

import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

import cv2
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
x_train = []
x_test = []
y_train = []

df_train = pd.read_csv('train.csv')

In [3]:
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))

In [4]:
label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

In [6]:
for f, tags in tqdm(df_train.values, miniters=1000):
    img = cv2.imread('images\multi-label/{}.jpg'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x_train.append(cv2.resize(img, (64, 64)))
    y_train.append(targets)

100%|███████████████████████████████████████████████████████████████████████████| 40479/40479 [06:27<00:00, 104.47it/s]


In [7]:
y_train = np.array(y_train, np.uint8)
x_train = np.array(x_train, np.float16) / 255.

In [8]:
print(x_train.shape)
print(y_train.shape)

(40479, 64, 64, 3)
(40479, 17)


In [9]:
split = 35000
x_train, x_valid, y_train, y_valid = x_train[:split], x_train[split:], y_train[:split], y_train[split:]

In [32]:
model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(64, 64, 3),
                 padding='same'))

In [33]:
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.36))
model.add(Conv2D(256, (3,3), activation='relu'))
model.add(Conv2D(512, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(17, activation='sigmoid'))

In [34]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [36]:
model.fit(x_train, y_train,
          batch_size=32,
          epochs=4,
          verbose=1,
          validation_data=(x_valid, y_valid))

Train on 35000 samples, validate on 5479 samples
Epoch 1/4


 6720/35000 [====>.........................] - ETA: 129s - loss: 0.4156 - acc: 0.897 - ETA: 117s - loss: 0.5437 - acc: 0.914 - ETA: 113s - loss: 0.5164 - acc: 0.905 - ETA: 111s - loss: 0.4801 - acc: 0.895 - ETA: 109s - loss: 0.4657 - acc: 0.892 - ETA: 108s - loss: 0.4380 - acc: 0.893 - ETA: 108s - loss: 0.4253 - acc: 0.891 - ETA: 107s - loss: 0.4335 - acc: 0.887 - ETA: 106s - loss: 0.4263 - acc: 0.884 - ETA: 106s - loss: 0.4154 - acc: 0.885 - ETA: 106s - loss: 0.4073 - acc: 0.886 - ETA: 105s - loss: 0.4023 - acc: 0.885 - ETA: 105s - loss: 0.3948 - acc: 0.883 - ETA: 105s - loss: 0.3875 - acc: 0.884 - ETA: 105s - loss: 0.3845 - acc: 0.884 - ETA: 104s - loss: 0.3778 - acc: 0.885 - ETA: 104s - loss: 0.3741 - acc: 0.885 - ETA: 104s - loss: 0.3717 - acc: 0.884 - ETA: 104s - loss: 0.3681 - acc: 0.883 - ETA: 104s - loss: 0.3612 - acc: 0.885 - ETA: 104s - loss: 0.3586 - acc: 0.885 - ETA: 103s - loss: 0.3544 - acc: 0.887 - ETA: 103s - loss: 0.3522 - acc: 0.887 - ETA: 103s - loss: 0.3497 - acc: 0





KeyboardInterrupt: 

In [31]:
from sklearn.metrics import fbeta_score

p_valid = model.predict(x_valid, batch_size=128)
print(y_valid)
print(p_valid)
print(fbeta_score(y_valid, np.array(p_valid) > 0.25, beta=2, average='samples'))

[[0 0 0 ..., 0 1 0]
 [0 1 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 1]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 1 0 ..., 0 0 0]
 [0 0 0 ..., 1 0 0]]
[[  3.41795478e-03   3.49149406e-01   6.88070012e-03 ...,   4.70918044e-02
    1.05543230e-02   2.20186293e-01]
 [  3.62914783e-04   5.43621778e-01   2.14697188e-03 ...,   6.67340914e-03
    5.51735796e-03   2.05540493e-01]
 [  6.81488527e-05   5.49709022e-01   1.44418282e-03 ...,   1.35168675e-02
    3.66808847e-03   3.67286116e-01]
 ..., 
 [  8.01305578e-04   5.55778325e-01   8.93370307e-04 ...,   4.28759269e-02
    2.45495653e-03   2.20832571e-01]
 [  3.67341359e-04   5.96242368e-01   4.87625774e-04 ...,   5.86157180e-02
    1.51333096e-03   2.10207671e-01]
 [  7.93422514e-04   2.16321394e-01   1.50585244e-03 ...,   9.01548147e-01
    1.61375955e-03   2.62208879e-01]]
0.842427989782
