In [1]:
import os
import sys

import numpy as np

import pandas as pd
import glob

In [2]:
BASE_MODEL_PATH="trained_model"
%mkdir -p $BASE_MODEL_PATH

## Data preparation

In [3]:
from models.modelutils import dir2filedict, split_fdict

Using TensorFlow backend.


In [4]:
fdict = dir2filedict("data")

In [5]:
categories = sorted(fdict.keys())

In [6]:
trdict, testdict = split_fdict(fdict, test_size=0.2, random_state = 123)

In [7]:
trdict, valdict = split_fdict(trdict, test_size=0.2, random_state = 456)

In [8]:
valdict['clouds'][0:5]

['data/clouds/0678.jpeg',
 'data/clouds/0701.jpeg',
 'data/clouds/0431.jpeg',
 'data/clouds/0033.jpeg',
 'data/clouds/0290.jpeg']

### Copy images files into temp directories

In [9]:
import tempfile
import shutil

In [10]:
tmp_train_dir = tempfile.TemporaryDirectory()
tmp_valid_dir = tempfile.TemporaryDirectory()
tmp_test_dir = tempfile.TemporaryDirectory()

In [14]:
def copy_images(tmp_dir, data_dict):
    for cat in data_dict.keys():
        os.makedirs("{}/{}".format(tmp_dir.name, cat), exist_ok=True)
        for img_path in data_dict[cat]:
            img_name = img_path.split("/")[-1]
            shutil.copy2(img_path, "{}/{}/{}".format(tmp_dir.name, cat, img_name))

In [13]:
%%time
copy_images(tmp_train_dir, trdict)

CPU times: user 2.21 s, sys: 5.55 s, total: 7.76 s
Wall time: 1min 19s


In [15]:
%%time
copy_images(tmp_valid_dir, valdict)

CPU times: user 360 ms, sys: 1.46 s, total: 1.82 s
Wall time: 13.4 s


In [16]:
%%time
copy_images(tmp_test_dir, testdict)

CPU times: user 512 ms, sys: 1.79 s, total: 2.3 s
Wall time: 23.5 s


### Set up data generator

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [20]:
IMG_SIZE = 256
BATCH_SIZE = 32

In [22]:
TRAIN_DATAGEN = ImageDataGenerator(
        rescale=1./255,
)

TRAIN_GENERATOR = TRAIN_DATAGEN.flow_from_directory(
        directory=tmp_train_dir.name,
        target_size=(IMG_SIZE, IMG_SIZE),
        class_mode='sparse',
        batch_size=BATCH_SIZE,
)

Found 7489 images belonging to 16 classes.


In [23]:
VALID_DATAGEN = ImageDataGenerator(
        rescale=1./255,
)

VALID_GENERATOR = VALID_DATAGEN.flow_from_directory(
        directory=tmp_valid_dir.name,
        target_size=(IMG_SIZE, IMG_SIZE),
        class_mode='sparse',
        batch_size=BATCH_SIZE,
)

Found 1880 images belonging to 16 classes.


In [39]:
TEST_DATAGEN = ImageDataGenerator(
        rescale=1./255,
)

TEST_GENERATOR = TEST_DATAGEN.flow_from_directory(
        directory=tmp_test_dir.name,
        target_size=(IMG_SIZE, IMG_SIZE),
        class_mode='sparse',
        batch_size=1,
)

Found 2352 images belonging to 16 classes.


## Model training and evaluation

In [25]:
from keras.applications.inception_v3 import InceptionV3
from keras.models import Model, model_from_json
from keras.layers import Dense, GlobalAveragePooling2D
from keras import optimizers

In [28]:
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(TRAIN_GENERATOR.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [29]:
for layer in model.layers[:len(base_model.layers)]:
    layer.trainable = False
for layer in model.layers[len(base_model.layers):]:
    layer.trainable = True

In [30]:
optimizer = optimizers.Adam(lr=0.001, decay=0.01)

In [31]:
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=["accuracy"])

### Training

In [32]:
model.fit_generator(
    generator=TRAIN_GENERATOR
    , steps_per_epoch=TRAIN_GENERATOR.n // BATCH_SIZE 
    , epochs=5
    , verbose=1
    , validation_data=VALID_GENERATOR
    , validation_steps=VALID_GENERATOR.n // BATCH_SIZE
)

Epoch 1/5





  'to RGBA images')








  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping 

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f1460aeb9b0>

### Evaluation

In [41]:
%%time

model.evaluate_generator(
    TEST_GENERATOR
    , steps=TEST_GENERATOR.n
)

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))


CPU times: user 6min 43s, sys: 24.2 s, total: 7min 7s
Wall time: 3min 40s


[0.75435584731138916, 0.73001700680272108]

left: loss, right: accuracy