In [1]:
import numpy as np
import pandas as pd
import keras
from keras import backend as K
from keras.preprocessing import image
from keras.applications.vgg19 import decode_predictions
from keras.models import Sequential, Model
from keras.layers import Activation
from keras.layers.core import Dense, Flatten
from keras.optimizers import Adam, SGD
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
from glob import glob
import os.path
import PIL
import cv2
import re
from collections import defaultdict, namedtuple

Using TensorFlow backend.


In [2]:
pd.set_option('display.max_columns', None)

In [3]:
def get_classes(path):
    directories = glob(os.path.expanduser(path) + '/*')
    return [dir.split('/')[-1] for dir in directories]
n_classes =  len(get_classes('~/p5/data/interim/train'))
n_classes

50

In [4]:
def to_gray_scale(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
    return np.array(gray_image)

In [5]:
 vgg19 = keras.applications.vgg19.VGG19(weights='imagenet', include_top=False, input_shape=(224,224, 3))













In [6]:
# note we exclude the final dense layers and add one back below, we would retrain it ourselves
base_model = vgg19
 
# Freeze convolutional layers
for layer in base_model.layers:
    layer.trainable = False 
    
x = base_model.output
x = Flatten()(x) # flatten from convolution tensor output 
predictions = Dense(n_classes, activation='softmax')(x) # should match # of classes predicted

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [7]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [8]:
train_path = os.path.expanduser('~/p5/data/interim/train')
test_path = os.path.expanduser('~/p5/data/interim/test')
valid_path = os.path.expanduser('~/p5/data/interim/valid')

In [27]:
train_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(train_path,
                                                         target_size=(224,224), 
                                                         batch_size=6)
valid_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(valid_path, 
                                                         target_size=(224,224), 
                                                         batch_size=5)
test_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(test_path, 
                                                        target_size=(224,224),
                                                        shuffle=False,
                                                        batch_size=1)


Found 2766 images belonging to 50 classes.
Found 605 images belonging to 50 classes.
Found 543 images belonging to 50 classes.


In [10]:
# sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(Adam(lr=0.00002), loss='categorical_crossentropy', metrics=['accuracy'])
model.compile(Adam(lr=0.00002), loss='categorical_crossentropy', metrics=['accuracy'])





In [11]:
# patient early stopping
# es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
# mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
hist = model.fit_generator(train_batches, 
                    steps_per_epoch=4, 
                    validation_data=valid_batches, 
                    validation_steps=4,
                    epochs=500,
                    verbose=2)#, 
#                     callbacks=[es, mc])

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/500
 - 19s - loss: 12.8778 - acc: 0.0000e+00 - val_loss: 11.7921 - val_acc: 0.0000e+00
Epoch 2/500
 - 18s - loss: 12.9201 - acc: 0.0417 - val_loss: 12.7275 - val_acc: 0.0500
Epoch 3/500
 - 18s - loss: 12.6012 - acc: 0.0417 - val_loss: 10.7446 - val_acc: 0.1500
Epoch 4/500
 - 18s - loss: 13.2309 - acc: 0.0000e+00 - val_loss: 12.5498 - val_acc: 0.0500
Epoch 5/500
 - 18s - loss: 12.0086 - acc: 0.0000e+00 - val_loss: 12.3799 - val_acc: 0.0500
Epoch 6/500
 - 18s - loss: 10.3375 - acc: 0.0417 - val_loss: 11.8255 - val_acc: 0.0500
Epoch 7/500
 - 18s - loss: 11.5092 - acc: 0.0417 - val_loss: 12.2321 - val_acc: 0.1000
Epoch 8/500
 - 18s - loss: 12.4648 - acc: 0.0000e+00 - val_loss: 11.2697 - val_acc: 0.0500
Epoch 9/500
 - 18s - loss: 10.3295 - acc: 0.1250 - val_loss: 11.0981 - val_acc: 0.1500
Epoch 10/500
 - 18s - loss: 10.0026 - acc: 0.0833 - val_loss: 10.9572 - val_acc: 0.2000
Epoch 11/500


Epoch 88/500
 - 17s - loss: 2.6992 - acc: 0.6667 - val_loss: 3.8241 - val_acc: 0.5500
Epoch 89/500
 - 18s - loss: 4.0413 - acc: 0.5417 - val_loss: 2.0936 - val_acc: 0.7500
Epoch 90/500
 - 20s - loss: 2.3687 - acc: 0.6667 - val_loss: 4.7270 - val_acc: 0.5000
Epoch 91/500
 - 20s - loss: 5.9859 - acc: 0.5833 - val_loss: 5.9159 - val_acc: 0.6000
Epoch 92/500
 - 19s - loss: 4.3802 - acc: 0.5000 - val_loss: 5.4504 - val_acc: 0.5000
Epoch 93/500
 - 18s - loss: 2.5092 - acc: 0.5417 - val_loss: 2.8073 - val_acc: 0.6000
Epoch 94/500
 - 19s - loss: 4.2076 - acc: 0.5417 - val_loss: 2.9402 - val_acc: 0.5000
Epoch 95/500
 - 19s - loss: 2.5293 - acc: 0.5417 - val_loss: 4.6479 - val_acc: 0.6000
Epoch 96/500
 - 18s - loss: 4.0886 - acc: 0.6250 - val_loss: 2.3608 - val_acc: 0.7000
Epoch 97/500
 - 18s - loss: 1.5744 - acc: 0.7917 - val_loss: 3.6743 - val_acc: 0.7000
Epoch 98/500
 - 18s - loss: 3.1915 - acc: 0.6667 - val_loss: 3.5027 - val_acc: 0.7000
Epoch 99/500
 - 18s - loss: 1.1536 - acc: 0.8333 - val

Epoch 183/500
 - 18s - loss: 1.7051 - acc: 0.8333 - val_loss: 0.4292 - val_acc: 0.8500
Epoch 184/500
 - 18s - loss: 1.4265 - acc: 0.8750 - val_loss: 3.8071 - val_acc: 0.7000
Epoch 185/500
 - 18s - loss: 2.7628 - acc: 0.7500 - val_loss: 2.0971 - val_acc: 0.8000
Epoch 186/500
 - 18s - loss: 4.3285 - acc: 0.6250 - val_loss: 2.2209 - val_acc: 0.7500
Epoch 187/500
 - 18s - loss: 1.5080 - acc: 0.8333 - val_loss: 3.7588 - val_acc: 0.7000
Epoch 188/500
 - 18s - loss: 3.3339 - acc: 0.6250 - val_loss: 2.1386 - val_acc: 0.7000
Epoch 189/500
 - 18s - loss: 0.2255 - acc: 0.9583 - val_loss: 3.8557 - val_acc: 0.7000
Epoch 190/500
 - 18s - loss: 2.7253 - acc: 0.8333 - val_loss: 4.7685 - val_acc: 0.6000
Epoch 191/500
 - 18s - loss: 3.5888 - acc: 0.6250 - val_loss: 2.9369 - val_acc: 0.7000
Epoch 192/500
 - 18s - loss: 1.2618 - acc: 0.7917 - val_loss: 2.2108 - val_acc: 0.7500
Epoch 193/500
 - 17s - loss: 3.5291 - acc: 0.6250 - val_loss: 1.0316 - val_acc: 0.9000
Epoch 194/500
 - 18s - loss: 3.7449 - acc: 

Epoch 278/500
 - 18s - loss: 0.8196 - acc: 0.9167 - val_loss: 1.9200 - val_acc: 0.8500
Epoch 279/500
 - 18s - loss: 0.6812 - acc: 0.9583 - val_loss: 2.8115 - val_acc: 0.7000
Epoch 280/500
 - 18s - loss: 1.1243 - acc: 0.8750 - val_loss: 0.5872 - val_acc: 0.9000
Epoch 281/500
 - 17s - loss: 2.1421 - acc: 0.6667 - val_loss: 2.1262 - val_acc: 0.8000
Epoch 282/500
 - 18s - loss: 1.5855 - acc: 0.7917 - val_loss: 1.2793 - val_acc: 0.8000
Epoch 283/500
 - 18s - loss: 0.0315 - acc: 1.0000 - val_loss: 4.5852 - val_acc: 0.7000
Epoch 284/500
 - 18s - loss: 0.7899 - acc: 0.9167 - val_loss: 4.6096 - val_acc: 0.5500
Epoch 285/500
 - 18s - loss: 1.3749 - acc: 0.9167 - val_loss: 2.9113 - val_acc: 0.7500
Epoch 286/500
 - 18s - loss: 0.2707 - acc: 0.8750 - val_loss: 1.8241 - val_acc: 0.8500
Epoch 287/500
 - 18s - loss: 1.5013 - acc: 0.8333 - val_loss: 0.5375 - val_acc: 0.9000
Epoch 288/500
 - 17s - loss: 2.0515 - acc: 0.7917 - val_loss: 0.8385 - val_acc: 0.9500
Epoch 289/500
 - 18s - loss: 0.9516 - acc: 

Epoch 373/500
 - 18s - loss: 0.0981 - acc: 0.9583 - val_loss: 1.8729 - val_acc: 0.8500
Epoch 374/500
 - 18s - loss: 1.4213 - acc: 0.8750 - val_loss: 3.6721 - val_acc: 0.7000
Epoch 375/500
 - 18s - loss: 1.7451 - acc: 0.8333 - val_loss: 2.6840 - val_acc: 0.7500
Epoch 376/500
 - 18s - loss: 0.7493 - acc: 0.9583 - val_loss: 0.6724 - val_acc: 0.8500
Epoch 377/500
 - 18s - loss: 1.4132 - acc: 0.8750 - val_loss: 2.2861 - val_acc: 0.8000
Epoch 378/500
 - 18s - loss: 0.9370 - acc: 0.9167 - val_loss: 0.8079 - val_acc: 0.9500
Epoch 379/500
 - 18s - loss: 2.0336 - acc: 0.8750 - val_loss: 0.2769 - val_acc: 0.9000
Epoch 380/500
 - 18s - loss: 0.0062 - acc: 1.0000 - val_loss: 0.2508 - val_acc: 0.9000
Epoch 381/500
 - 18s - loss: 1.3477 - acc: 0.9167 - val_loss: 1.1324 - val_acc: 0.8500
Epoch 382/500
 - 18s - loss: 0.6783 - acc: 0.9583 - val_loss: 2.8849 - val_acc: 0.7500
Epoch 383/500
 - 18s - loss: 1.1005 - acc: 0.8750 - val_loss: 1.5917 - val_acc: 0.7500
Epoch 384/500
 - 18s - loss: 1.9324 - acc: 

Epoch 468/500
 - 18s - loss: 0.8961 - acc: 0.9167 - val_loss: 0.8130 - val_acc: 0.9500
Epoch 469/500
 - 18s - loss: 0.5882 - acc: 0.9583 - val_loss: 0.8656 - val_acc: 0.9500
Epoch 470/500
 - 17s - loss: 1.4326 - acc: 0.8750 - val_loss: 1.9800 - val_acc: 0.8000
Epoch 471/500
 - 18s - loss: 2.7418 - acc: 0.8333 - val_loss: 1.6136 - val_acc: 0.9000
Epoch 472/500
 - 18s - loss: 0.0193 - acc: 1.0000 - val_loss: 0.0059 - val_acc: 1.0000
Epoch 473/500
 - 18s - loss: 2.7039 - acc: 0.8333 - val_loss: 2.1692 - val_acc: 0.8500
Epoch 474/500
 - 18s - loss: 1.3478 - acc: 0.9167 - val_loss: 0.3235 - val_acc: 0.8500
Epoch 475/500
 - 18s - loss: 0.0248 - acc: 1.0000 - val_loss: 2.9854 - val_acc: 0.7500
Epoch 476/500
 - 18s - loss: 0.1580 - acc: 0.9583 - val_loss: 0.8452 - val_acc: 0.9500
Epoch 477/500
 - 18s - loss: 1.3454 - acc: 0.9167 - val_loss: 2.7648 - val_acc: 0.8000
Epoch 478/500
 - 17s - loss: 2.0414 - acc: 0.8750 - val_loss: 0.5252 - val_acc: 0.9500
Epoch 479/500
 - 17s - loss: 0.6875 - acc: 

In [12]:
for key in hist.history:
    print(key)

val_loss
val_acc
loss
acc


In [None]:
asdf =

In [None]:
!ls

In [13]:
model.save('hieroglyphs_vgg19_grayscale.h5')

In [None]:
# predictions = model.predict_generator(test_batches, steps=1, verbose=1)

In [28]:
STEP_SIZE_TEST=test_batches.n//test_batches.batch_size
test_batches.reset()
pred=model.predict_generator(test_batches,
steps=STEP_SIZE_TEST,
verbose=1)



In [29]:
predicted_class_indices=np.argmax(pred,axis=1)

In [30]:
labels = (train_batches.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [31]:
predictions[:11]

['Q3', 'S29', 'D21', 'D21', 'D21', 'D21', 'D21', 'D21', 'D21', 'D21', 'D21']

In [32]:
pred_df = pd.DataFrame(predictions, columns=['predicted'])
pred_df = pred_df.merge(pd.DataFrame(test_batches.filenames, columns=['filename']), left_index=True, right_index=True)
pred_df['glyph'] = pred_df.apply(lambda row: re.match(r'^([A-Z]{1}\d+)/', row['filename']).group(1), axis=1)
pred_df['match'] = pred_df['predicted'] == pred_df['glyph']
pred_df

Unnamed: 0,predicted,filename,glyph,match
0,Q3,D2/070242_D2.png,D2,False
1,S29,D2/220196_D2.png,D2,False
2,D21,D21/030269_D21.png,D21,True
3,D21,D21/030273_D21.png,D21,True
4,D21,D21/050286_D21.png,D21,True
...,...,...,...,...
538,Z1,Z1/050083_Z1.png,Z1,True
539,Z1,Z1/390122_Z1.png,Z1,True
540,Z1,Z1/390351_Z1.png,Z1,True
541,Z1,Z1/410102_Z1.png,Z1,True


In [33]:
acc = pred_df['match'].sum() / len(pred_df)
acc

0.9023941068139963

In [None]:
model = load_model(os.path.expanduser('~/p5/models/hieroglyphs_vgg19.h5'))

In [20]:
test_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(os.path.expanduser('~/p5/data/single'), 
                                                        target_size=(224,224),
                                                        shuffle=False,
                                                        batch_size=1)

Found 35 images belonging to 1 classes.


### PLAY

In [None]:
temp = np.random.randn(50,100)
# temp

In [None]:
plt.imshow(temp,cmap = 'jet')
plt.colorbar()

In [None]:
import cv2 


In [None]:
!ls

In [None]:
img = cv2.imread('SarcEastGH.jpg')  


In [None]:
img.shape

In [None]:
plt.figure(figsize=[10,10])
plt.imshow(img)

In [None]:
temp = np.random.randn(1060,1720,3)


In [None]:
plt.figure(figsize=[10,10])
plt.imshow(img+temp)