In [4]:
import numpy as np
import pandas as pd
import keras
from keras import backend as K
from keras.preprocessing import image
from keras.applications.vgg19 import decode_predictions
from keras.models import Sequential, Model
from keras.layers import Activation
from keras.layers.core import Dense, Flatten
from keras.optimizers import Adam, SGD
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
from glob import glob
import os.path
import PIL
import cv2
import re
from collections import defaultdict, namedtuple

Using TensorFlow backend.


In [5]:
pd.set_option('display.max_columns', None)

In [6]:
def get_classes(path):
    directories = glob(os.path.expanduser(path) + '/*')
    return [dir.split('/')[-1] for dir in directories]
n_classes =  len(get_classes('~/p5/data/M17_only/train'))
n_classes

2

In [7]:
def to_gray_scale(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
    return np.array(gray_image)

In [8]:
 vgg19 = keras.applications.vgg19.VGG19(weights='imagenet', include_top=False, input_shape=(224,224, 3))













In [9]:
# note we exclude the final dense layers and add one back below, we would retrain it ourselves
base_model = vgg19
 
# Freeze convolutional layers
for layer in base_model.layers:
    layer.trainable = False 
    
x = base_model.output
x = Flatten()(x) # flatten from convolution tensor output 
predictions = Dense(n_classes, activation='softmax')(x) # should match # of classes predicted

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [11]:
train_path = os.path.expanduser('~/p5/data/M17_only/train')
test_path = os.path.expanduser('~/p5/data/M17_only/test')
valid_path = os.path.expanduser('~/p5/data/M17_only/valid')

In [12]:
train_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(train_path,
                                                         target_size=(224,224), 
                                                         batch_size=6)
valid_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(valid_path, 
                                                         target_size=(224,224), 
                                                         batch_size=5)
test_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(test_path, 
                                                        target_size=(224,224),
                                                        shuffle=False,
                                                        batch_size=1)


Found 3506 images belonging to 2 classes.
Found 550 images belonging to 2 classes.
Found 434 images belonging to 2 classes.


In [13]:
# sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(Adam(lr=0.00002), loss='categorical_crossentropy', metrics=['accuracy'])
model.compile(Adam(lr=0.00002), loss='categorical_crossentropy', metrics=['accuracy'])





In [25]:
# patient early stopping
# es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
# mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
hist = model.fit_generator(train_batches, 
                    steps_per_epoch=4, 
                    validation_data=valid_batches, 
                    validation_steps=4,
                    epochs=1,
                    verbose=2)#, 
#                     callbacks=[es, mc])

Epoch 1/1
 - 16s - loss: 0.0094 - acc: 1.0000 - val_loss: 2.0662e-04 - val_acc: 1.0000


In [14]:
for key in hist.history:
    print(key)

NameError: name 'hist' is not defined

In [None]:
asdf =

In [None]:
!ls

In [26]:
model.save('hieroglyphs_vgg19_M17_only.h5')

In [None]:
# predictions = model.predict_generator(test_batches, steps=1, verbose=1)

In [17]:
STEP_SIZE_TEST=test_batches.n//test_batches.batch_size
test_batches.reset()
pred=model.predict_generator(test_batches,
steps=STEP_SIZE_TEST,
verbose=1)
print(pred)

[[2.0770143e-09 1.0000000e+00]
 [3.1581248e-08 1.0000000e+00]
 [7.9749575e-13 1.0000000e+00]
 ...
 [2.4417470e-06 9.9999762e-01]
 [1.2520649e-01 8.7479347e-01]
 [8.9195587e-02 9.1080445e-01]]


In [2]:
predicted_class_indices=np.argmax(pred,axis=1)
predicted_class_indices

NameError: name 'np' is not defined

In [19]:
labels = (train_batches.class_indices)
print(labels)
labels = dict((v,k) for k,v in labels.items())
print(labels)
predictions = [labels[k] for k in predicted_class_indices]

{'M17': 0, 'unknown': 1}
{0: 'M17', 1: 'unknown'}


In [20]:
predictions[:11]

['unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown',
 'unknown']

In [21]:
def get_glyph_name(series):
    file = series['filename']
    m = re.match(r'^([A-Z]{1}\d+)/', file)
    if m:
        return m.group(1)
    else:
        return 'unknown'

In [1]:
pred_df = pd.DataFrame(predictions, columns=['predicted'])
pred_df = pred_df.merge(pd.DataFrame(test_batches.filenames, columns=['filename']), left_index=True, right_index=True)
pred_df['glyph'] = pred_df.apply(lambda row: get_glyph_name(row), axis=1)
pred_df['match'] = pred_df['predicted'] == pred_df['glyph']
pred_df.to_csv('/Users/scott/p5/N35_predictions.csv', index=False)

NameError: name 'pd' is not defined

In [24]:
acc = pred_df['match'].sum() / len(pred_df)
acc

0.9864558111380145

In [25]:
pred_df['predicted'].unique()

array(['unknown', 'M17'], dtype=object)

In [26]:
m17 = pred_df[pred_df['predicted'] == 'M17']

In [27]:
m17.head()

Unnamed: 0,predicted,filename,glyph,match
111,M17,thumbs/img_100_x_520_y_20_.png,unknown,False
123,M17,thumbs/img_10110_x_170_y_170_.png,unknown,False
222,M17,thumbs/img_101_x_525_y_20_.png,unknown,False
240,M17,thumbs/img_10216_x_700_y_170_.png,unknown,False
355,M17,thumbs/img_1031_x_135_y_35_.png,unknown,False


In [28]:
m17.filename[:5]

111       thumbs/img_100_x_520_y_20_.png
123    thumbs/img_10110_x_170_y_170_.png
222       thumbs/img_101_x_525_y_20_.png
240    thumbs/img_10216_x_700_y_170_.png
355      thumbs/img_1031_x_135_y_35_.png
Name: filename, dtype: object

In [35]:
import shutil
for image_file in ['/Users/scott/p5/data/interim/m17/' + name for name in m17.filename.sample(100)]:
    print(image_file)
    shutil.copy2(image_file, '/Users/scott/p5/data/interim/m17/positives')
    


/Users/scott/p5/data/interim/m17/thumbs/img_17709_x_1205_y_280_.png
/Users/scott/p5/data/interim/m17/thumbs/img_18904_x_460_y_300_.png
/Users/scott/p5/data/interim/m17/thumbs/img_37663_x_175_y_580_.png
/Users/scott/p5/data/interim/m17/thumbs/img_49644_x_1280_y_755_.png
/Users/scott/p5/data/interim/m17/thumbs/img_53490_x_350_y_815_.png
/Users/scott/p5/data/interim/m17/thumbs/img_23199_x_95_y_365_.png
/Users/scott/p5/data/interim/m17/thumbs/img_19520_x_180_y_310_.png
/Users/scott/p5/data/interim/m17/thumbs/img_44_x_240_y_20_.png
/Users/scott/p5/data/interim/m17/thumbs/img_471_x_695_y_25_.png
/Users/scott/p5/data/interim/m17/thumbs/img_68249_x_225_y_1035_.png
/Users/scott/p5/data/interim/m17/thumbs/img_67085_x_1125_y_1015_.png
/Users/scott/p5/data/interim/m17/thumbs/img_36691_x_355_y_565_.png
/Users/scott/p5/data/interim/m17/thumbs/img_8121_x_305_y_140_.png
/Users/scott/p5/data/interim/m17/thumbs/img_2252_x_1200_y_50_.png
/Users/scott/p5/data/interim/m17/thumbs/img_28715_x_795_y_445_.png


In [23]:
 # /Users/scott/p5/data/interim/m17

In [15]:
model = load_model(os.path.expanduser('hieroglyphs_vgg19_M17_only.h5'))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




In [16]:
test_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(os.path.expanduser('/Users/scott/p5/data/interim/m17'), 
                                                        target_size=(224,224),
                                                        shuffle=False,
                                                        batch_size=1)

Found 79296 images belonging to 2 classes.


In [41]:
# model.predict()
# cv.im

### PLAY

In [None]:
temp = np.random.randn(50,100)
# temp

In [None]:
plt.imshow(temp,cmap = 'jet')
plt.colorbar()

In [None]:
import cv2 


In [None]:
!ls

In [None]:
img = cv2.imread('SarcEastGH.jpg')  


In [None]:
img.shape

In [None]:
plt.figure(figsize=[10,10])
plt.imshow(img)

In [None]:
temp = np.random.randn(1060,1720,3)


In [None]:
plt.figure(figsize=[10,10])
plt.imshow(img+temp)