In [1]:
%matplotlib inline

In [2]:
from PIL import Image as pil_image
from matplotlib.pyplot import imshow
import numpy as np
import pandas
import progressbar
import requests
import os
import shutil
import random
from sklearn.model_selection import train_test_split

In [3]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD

Using TensorFlow backend.


In [4]:
cycle = 72
image_width = 128
image_height = 96

In [5]:
# get csv of sensors and photos and correlate watermark to photos 
from bootstrap_cress import get_csv_files
get_csv_files(cycle)

In [6]:
def get_photo(url):
    fn = url.split('/')[-1]
    cycle = url.split('/')[-2]
    dirname = os.path.join('data/photo', cycle)
    os.makedirs(dirname, exist_ok=True)
    filename = os.path.join(dirname, fn)
    if not os.path.exists(filename):
        response = requests.get(url, stream=True)
        with open(filename, 'wb') as fp:
            shutil.copyfileobj(response.raw, fp)
        del response
    return filename

In [7]:
def load_image(fn):
    img = pil_image.open(fn)
    img = img.resize([image_width, image_height])
    img_a = np.asarray(img)
    return img_a

In [8]:
def img_cache(url, cycle):
    fn = url.split('/')[-1].split('.')[0] + '-' + str(image_width) + '.npz'
    dirname = os.path.join('data/cache', str(cycle))
    os.makedirs(dirname, exist_ok=True)
    filename = os.path.join(dirname, fn)
    if not os.path.exists(filename):
        img_a = load_image(get_photo(photo_ds['photo']))
        # x = img_a.transpose(2, 0, 1)
        x = img_a.astype('float32')/255
        np.savez(filename, x=x)
        return x
    npzfile = np.load(filename)
    return npzfile['x']

In [9]:
df_photos = pandas.read_csv("data/photo_cycle_{}_enriched.csv".format(cycle))
bar = progressbar.ProgressBar(max_value=len(df_photos))
x = []
y = []
for idx, photo_ds in bar(df_photos.iterrows()):
    url = photo_ds['photo']
    if photo_ds['watermark'] > 8000:
        cls = 1
    else:
        cls = 0
    x.append(img_cache(url, cycle))
    y.append(cls)

100% (2008 of 2008) |#####################| Elapsed Time: 0:00:04 Time: 0:00:04


In [10]:
X = np.array(x)

In [11]:
Y = np_utils.to_categorical(y)

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [13]:
model = Sequential()
 
model.add(Conv2D(32, (3, 3), input_shape=(image_height, image_width, 3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
          
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5)) 
model.add(Dense(2, activation='softmax'))

In [14]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [15]:
# 9. Fit model on training data
model.fit(X_train, Y_train, epochs=10, verbose=2, batch_size=32, 
          validation_data=(X_test, Y_test))

Train on 1606 samples, validate on 402 samples
Epoch 1/10
72s - loss: 0.4392 - acc: 0.8543 - val_loss: 0.2534 - val_acc: 0.8781
Epoch 2/10
71s - loss: 0.2015 - acc: 0.9072 - val_loss: 0.1676 - val_acc: 0.9328
Epoch 3/10
71s - loss: 0.1617 - acc: 0.9197 - val_loss: 0.1170 - val_acc: 0.9552
Epoch 4/10
75s - loss: 0.1322 - acc: 0.9421 - val_loss: 0.1224 - val_acc: 0.9453
Epoch 5/10
70s - loss: 0.1084 - acc: 0.9545 - val_loss: 0.0958 - val_acc: 0.9751
Epoch 6/10
72s - loss: 0.0975 - acc: 0.9539 - val_loss: 0.1008 - val_acc: 0.9701
Epoch 7/10
93s - loss: 0.0877 - acc: 0.9645 - val_loss: 0.0871 - val_acc: 0.9751
Epoch 8/10
77s - loss: 0.0789 - acc: 0.9676 - val_loss: 0.0920 - val_acc: 0.9602
Epoch 9/10
71s - loss: 0.0681 - acc: 0.9720 - val_loss: 0.0863 - val_acc: 0.9701
Epoch 10/10
70s - loss: 0.0600 - acc: 0.9720 - val_loss: 0.0945 - val_acc: 0.9677


<keras.callbacks.History at 0x7f5c4e2a3978>

In [16]:
model.predict(X_test[42:43])

array([[ 0.45113814,  0.54886186]], dtype=float32)

In [17]:
Y_test[42]

array([ 0.,  1.])

In [18]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 94, 126, 32)       896       
_________________________________________________________________
activation_1 (Activation)    (None, 94, 126, 32)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 92, 124, 32)       9248      
_________________________________________________________________
activation_2 (Activation)    (None, 92, 124, 32)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 46, 62, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 46, 62, 32)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 91264)             0         
__________

In [19]:
model.save('data/model_v1_{}_{}_dense64_conv32.h5'.format(cycle, image_width))