# Loading data

 <img src="index.png">

In [1]:
import os
import matplotlib.pyplot as plt
from PIL import Image
import sys
import shutil
from scipy.misc import imsave,imread
import numpy as np
import keras
from keras.utils import np_utils

Using Theano backend.
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: GeForce GTX 1050 (CNMeM is disabled, cuDNN not available)


In [3]:
def load_images(images_path, path_to_labels):
        path_to_images = images_path
        labels_file = path_to_labels
        images_labels = {}
        with open(labels_file, 'r') as f:
            
            dict_labels = dict([line.strip().split() for line in f.readlines()])
        # List files in this directory
        files = os.listdir(path_to_images)
        
        # Avoid hidden files
        files = filter( lambda files: not files.startswith('.'), files)
        #files = files[0:1000]

        # Create structure for holding images
        images = np.zeros((len(files), 150,150,3), dtype=np.uint8)
        labels = np.zeros(len(files), dtype=np.uint8)
        for fid, file in enumerate(files):
                if fid % 1000 == 0:
                        print(fid)
                image = imread(path_to_images + '/' + file)
                if image.shape == (150,150):
                    print(file)
                images[fid] = image
                labels[fid] = int(dict_labels[file])
        return images, labels, files
    
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [4]:
print "Loading Train data..."
X_train, y_train, train_files_name = load_images("./dataset_150/train_resized/",
                                                 "./dataset_150/train_labels.txt")
print 'Loading Test data...'
X_test, y_test, test_files = load_images('./dataset_150/test_resized/',
                    './dataset_150/test_labels.txt')

print 'Loading Validation data...'
X_val, y_val, valid_files = load_images('./dataset_150/val_resized/',
                    './dataset_150/validation_labels.txt')



X_train, y_train = unison_shuffled_copies(X_train, y_train)
#y_train = np_utils.to_categorical(y_train)
y_train -=1
y_val -=1

Loading Train data...
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
Loading Test data...
0
1000
2000
3000
4000
5000
6000
7000
Loading Validation data...
0
1000
2000


In [5]:
print 'Train data shape: ' 
print X_train.shape, y_train.shape
print 'Validation data shape:'
print X_val.shape, y_val.shape

Train data shape: 
(20324, 150, 150, 3) (20324,)
Validation data shape:
(2570, 150, 150, 3) (2570,)


In [6]:
train = np.concatenate((X_train, X_val), axis=0)
y_train = np.concatenate((y_train, y_val), axis=0)

In [7]:
# one-hot encode the labels
y_train = np_utils.to_categorical(y_train, 256)
y_val = np_utils.to_categorical(y_val, 256)

In [8]:
print 'Train data shape: ' 
print train.shape, y_train.shape
print 'Test data shape:'
print X_test.shape

Train data shape: 
(22894, 150, 150, 3) (22894, 256)
Test data shape:
(7680, 150, 150, 3)


# Classifier

 <img src="robo.jpg">

In [9]:
import keras
import numpy as np
from keras.applications.inception_v3 import InceptionV3, preprocess_input
import scipy

In [12]:
from keras.layers.normalization import BatchNormalization
from keras import backend as K
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.applications import imagenet_utils
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
import h5py
from keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Model

In [10]:
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
print('model loaded')

model loaded


In [11]:
from keras.applications.vgg19 import VGG19
base_vgg = VGG19(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

In [13]:
x = base_vgg.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024)(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
x = Dense(256)(x)
predictions = Activation('softmax')(x) 

# this is the model we will train
model = Model(inputs=base_vgg.input, outputs=predictions)

for layer in base_vgg.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)


model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [14]:
print model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
__________

In [15]:
data_generator = ImageDataGenerator(
    rescale = 1./255,
    rotation_range=35, 
    horizontal_flip=True, 
    width_shift_range=0.1,
    height_shift_range=0.1
)


In [24]:
model.fit(train, y_train,
          batch_size=200,
          epochs=4,
          verbose=1
          #callbacks=[model_checkpoint],
          #validation_split = 0.2
          #validation_data=(X_val, y_val)
         )
 

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f7840b14190>

In [25]:
y_pred = model.predict(X_test)

In [18]:
# Save model!!!!
model.save('vgg19_trans_model_2.h5')


In [19]:
model.save_weights('vgg19_trans_model_weights_22.h5')

In [26]:
pred_labels = y_pred.argmax(axis=-1)+1
print pred_labels[:50]

[221  49  47  96  20  51 188 227  97 167 122  56  22  91  13 180  70  78
 250 137 100 140 243 146 167 183 111 199 170 159 148 122  29 112 105  13
 204  90 216 105  55  63   2 216  47 199 125  90 182  99]


In [27]:
n = 0
for im in range(len(pred_labels)):
    if pred_labels[im] == 30:
        print test_files[im]
        n+=1
print n

20083.jpg
27403.jpg
30898.jpg
10378.jpg
8308.jpg
11716.jpg
8639.jpg
23626.jpg
21478.jpg
29127.jpg
29810.jpg
15252.jpg
4756.jpg
13


In [28]:
import csv
with open('results_transfer_vgg_extra.csv', 'w') as csvfile:
    fieldnames = ['image', 'class']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for i in range(len(y_pred)):
        writer.writerow({'image': test_files[i], 'class': pred_labels[i]})