In [1]:
import numpy as np
import pandas as pd
import keras
from keras import backend as K
from keras.preprocessing import image
from keras.applications.vgg19 import decode_predictions
from keras.models import Sequential, Model
from keras.layers import Activation
from keras.layers.core import Dense, Flatten
from keras.optimizers import Adam, SGD
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
from glob import glob
import os.path
import PIL

import re
from collections import defaultdict, namedtuple

Using TensorFlow backend.


In [2]:
pd.set_option('display.max_columns', None)

In [3]:
def get_classes(path):
    directories = glob(os.path.expanduser(path) + '/*')
    return [dir.split('/')[-1] for dir in directories]
n_classes =  len(get_classes('~/p5/data/interim/train'))
n_classes

50

In [4]:
 vgg19 = keras.applications.vgg19.VGG19(weights='imagenet', include_top=False, input_shape=(224,224, 3))













In [5]:
# note we exclude the final dense layers and add one back below, we would retrain it ourselves
base_model = vgg19
 
# Freeze convolutional layers
for layer in base_model.layers:
    layer.trainable = False 
    
x = base_model.output
x = Flatten()(x) # flatten from convolution tensor output 
predictions = Dense(2, activation='softmax')(x) # should match # of classes predicted

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [6]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [7]:
train_path = os.path.expanduser('/Users/scott/p5/binary_glyphs/train')
test_path = os.path.expanduser('/Users/scott/p5/binary_glyphs/test')
valid_path = os.path.expanduser('/Users/scott/p5/binary_glyphs/valid')

In [39]:
train_batches = ImageDataGenerator().flow_from_directory(train_path, 
                                                         target_size=(224,224), 
                                                         batch_size=6)
valid_batches = ImageDataGenerator().flow_from_directory(valid_path, 
                                                         target_size=(224,224), 
                                                         batch_size=5)
test_batches = ImageDataGenerator().flow_from_directory(test_path, 
                                                        target_size=(224,224),
                                                        shuffle=False,
                                                        batch_size=1)


Found 2611 images belonging to 2 classes.
Found 331 images belonging to 2 classes.
Found 328 images belonging to 2 classes.


In [9]:
# sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(Adam(lr=0.00002), loss='categorical_crossentropy', metrics=['accuracy'])
model.compile(Adam(lr=0.00002), loss='categorical_crossentropy', metrics=['accuracy'])





In [63]:
# patient early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
hist = model.fit_generator(train_batches, 
                    steps_per_epoch=4, 
                    validation_data=valid_batches, 
                    validation_steps=4,
                    epochs=1,
                    verbose=2)#, 
#                     callbacks=[es, mc])

Epoch 1/1
 - 18s - loss: 0.2087 - acc: 0.9583 - val_loss: 0.3068 - val_acc: 0.9500


In [51]:
for key in hist.history:
    print(key)

val_loss
val_acc
loss
acc


In [None]:
model = 

In [52]:
!ls

Hieroglyph_Classifier.ipynb           SarcSouth.jpg
Hieroglyph_Classifier_backup.ipynb    best_model.h5
Hieroglyph_Classifier_binary.ipynb    hHieroglyph_Classifier_binary.h5
Hieroglyph_Classifier_grayscale.ipynb hieroglyphs_vgg19.h5
Opencv_test.ipynb                     hieroglyphs_vgg19_grayscale.h5
SarcEastGH.jpg                        hieroglyphs_weights_095_095.sav


In [70]:
model.save('hHieroglyph_Classifier_binary.h5')

In [54]:
# predictions = model.predict_generator(test_batches, steps=1, verbose=1)

In [64]:
STEP_SIZE_TEST=test_batches.n//test_batches.batch_size
test_batches.reset()
pred=model.predict_generator(test_batches,
steps=STEP_SIZE_TEST,
verbose=1)



In [65]:
predicted_class_indices=np.argmax(pred,axis=1)

In [66]:
labels = (train_batches.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [67]:
predictions[:11]

['unknown',
 'unknown',
 'unknown',
 'known',
 'known',
 'known',
 'known',
 'unknown',
 'unknown',
 'known',
 'known']

In [68]:
pred_df = pd.DataFrame(predictions, columns=['predicted'])
pred_df = pred_df.merge(pd.DataFrame(test_batches.filenames, columns=['filename']), left_index=True, right_index=True)
pred_df['glyph'] = pred_df.apply(lambda row: row['filename'].split('/')[0], axis=1)
pred_df['match'] = pred_df['predicted'] == pred_df['glyph']
pred_df

Unnamed: 0,predicted,filename,glyph,match
0,unknown,known/030021_X1.png,known,False
1,unknown,known/030064_X1.png,known,False
2,unknown,known/030097_S29.png,known,False
3,known,known/030124_N35.png,known,True
4,known,known/030132_N35.png,known,True
...,...,...,...,...
323,unknown,unknown/410379_UNKNOWN.png,unknown,True
324,known,unknown/410386_UNKNOWN.png,unknown,False
325,unknown,unknown/410396_UNKNOWN.png,unknown,True
326,unknown,unknown/410397_UNKNOWN.png,unknown,True


In [69]:
acc = pred_df['match'].sum() / len(pred_df)
acc

0.8414634146341463

In [None]:
model = load_model(os.path.expanduser('~/p5/models/hieroglyphs_vgg19.h5'))

In [30]:
test_batches = ImageDataGenerator().flow_from_directory(os.path.expanduser('~/p5/data/single'), 
                                                        target_size=(224,224),
                                                        shuffle=False,
                                                        batch_size=1)

Found 35 images belonging to 1 classes.


### PLAY

In [None]:
temp = np.random.randn(50,100)
# temp

In [None]:
plt.imshow(temp,cmap = 'jet')
plt.colorbar()

In [None]:
import cv2 


In [None]:
!ls

In [None]:
img = cv2.imread('SarcEastGH.jpg')  


In [None]:
img.shape

In [None]:
plt.figure(figsize=[10,10])
plt.imshow(img)

In [None]:
temp = np.random.randn(1060,1720,3)


In [None]:
plt.figure(figsize=[10,10])
plt.imshow(img+temp)