In [1]:
# Data directory /Users/scott/p5/data/interim/n35_train_valid_test

# python src/click_select2.py -i ~/hieroglyphs/Texts/AnteSouthWall.jpg -p data/water600/ -q 4


In [48]:
import numpy as np
import pandas as pd
import keras
from keras import backend as K
from keras.preprocessing import image
from keras.applications.vgg19 import decode_predictions
from keras.models import Sequential, Model
from keras.layers import Activation
from keras.layers.core import Dense, Flatten
from keras.optimizers import Adam, SGD
from keras.metrics import categorical_crossentropy
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import *
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import itertools
from glob import glob
import os.path
import PIL
import cv2
import re
from collections import defaultdict, namedtuple
import pickle

from sklearn.metrics import f1_score, recall_score

from PIL import Image
from io import BytesIO
from IPython.display import HTML

In [2]:
pd.set_option('display.max_colwidth', -1)

def get_thumbnail(path):
    i = Image.open(path)
    i.thumbnail((150, 150), Image.LANCZOS)
    return i

def image_base64(im):
    if isinstance(im, str):
        im = get_thumbnail(im)
    with BytesIO() as buffer:
        im.save(buffer, 'jpeg')
        return base64.b64encode(buffer.getvalue()).decode()

def image_formatter(im):
    return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'

In [3]:
pd.set_option('display.max_columns', None)

In [4]:
def get_classes(path):
    directories = glob(os.path.expanduser(path) + '/*')
    return [dir.split('/')[-1] for dir in directories]
n_classes =  len(get_classes('/home/scott/p5/data/modeling_all/train'))
n_classes

11

In [5]:
def to_gray_scale(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
    return np.array(gray_image)

In [6]:
def gray_and_scale(image):
    return cv2.resize(to_gray_scale(image), (224, 224))

In [7]:
def get_heat_value(img_segment, model, labels):
    resized_and_grayscaled_image = cv2.resize(to_gray_scale(img_segment), (224, 224))
    pred_array = model.predict(np.expand_dims(resized_and_grayscaled_image, axis=0))
    return pred_array
#     if labels[np.argmax(pred_array)] == 'N35':
#         return 1
#     elif labels[np.argmax(pred_array)] == 'N35_part':
#         return 0.5
#     else:
#         return 0
    
# get_heat_value(img_part, model, labels)

In [8]:
 vgg19 = keras.applications.vgg19.VGG19(weights='imagenet', include_top=False, input_shape=(224,224, 3))













In [9]:
# note we exclude the final dense layers and add one back below, we would retrain it ourselves
base_model = vgg19
 
# Freeze convolutional layers
for layer in base_model.layers:
    layer.trainable = False 
    
x = base_model.output
x = Flatten()(x) # flatten from convolution tensor output 
predictions = Dense(n_classes, activation='softmax')(x) # should match # of classes predicted

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [10]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [11]:
train_path = os.path.expanduser('/home/scott/p5/data/modeling_all/train/')
test_path = os.path.expanduser('/home/scott/p5/data/modeling_all/test/')
valid_path = os.path.expanduser('/home/scott/p5/data/modeling_all/valid/')

In [12]:
train_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(train_path,
                                                         target_size=(224,224), 
                                                         batch_size=6)
valid_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(valid_path, 
                                                         target_size=(224,224), 
                                                         batch_size=5)
test_batches = ImageDataGenerator(preprocessing_function=to_gray_scale).flow_from_directory(test_path, 
                                                        target_size=(224,224),
                                                        shuffle=False,
                                                        batch_size=1)


Found 3612 images belonging to 11 classes.
Found 451 images belonging to 11 classes.
Found 456 images belonging to 11 classes.


In [13]:
# sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(Adam(lr=0.00002), loss='categorical_crossentropy', metrics=['accuracy'])
model.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])





In [53]:
# patient early stopping
# es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=300)
# mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)
hist = model.fit_generator(train_batches, 
                    steps_per_epoch=4, 
                    validation_data=valid_batches, 
                    validation_steps=4,
                    epochs=1,
                    verbose=2)#, 
#                     callbacks=[es, mc])

Epoch 1/1
 - 6s - loss: 0.5699 - acc: 0.9583 - val_loss: 0.5830 - val_acc: 0.8500


In [31]:
model.save('hieroglyphs_vgg19_N35_only_custom_images_multiclass.h5')

In [64]:
model = load_model(os.path.expanduser('hieroglyphs_vgg19_N35_only_custom_images_multiclass.h5'))

In [65]:
# predictions = model.predict_generator(test_batches, steps=1, verbose=1)

In [66]:
STEP_SIZE_TEST=test_batches.n//test_batches.batch_size
test_batches.reset()
pred=model.predict_generator(test_batches,
steps=STEP_SIZE_TEST,
verbose=1)
# print(pred)



In [67]:
predicted_class_indices=np.argmax(pred,axis=1)
predicted_class_indices

array([ 1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  9,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  7,
        0,  0,  0,  0,  0,  9,  0,  0,  9,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  4,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  4,
        0,  0,  0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  8,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  7,  0,  1,  0,  0,  0,  0,  0,  1,
        1,  0,  9,  8,  9,  1,  1,  4,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  0,  1,  7,  1,  7,  5,  1,  1,  5,  1,  1,  1,  1,  0,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  9,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  7,  2,  2,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  6,  3,  3,  3,  3,
        6,  6,  3,  3,  3,  3,  0,  3,  3,  3,  6,  3,  6,  3,  3,  3,  3,
        3,  3,  3,  3,  3

In [68]:
labels = (train_batches.class_indices)
print(labels)
labels = dict((v,k) for k,v in labels.items())
print(labels)
predictions = [labels[k] for k in predicted_class_indices]

{'N35': 0, 'basket': 1, 'bird': 2, 'cloth': 3, 'cobra': 4, 'mouth': 5, 'reed': 6, 'sun': 7, 'tether': 8, 'viper': 9, 'wick': 10}
{0: 'N35', 1: 'basket', 2: 'bird', 3: 'cloth', 4: 'cobra', 5: 'mouth', 6: 'reed', 7: 'sun', 8: 'tether', 9: 'viper', 10: 'wick'}


In [69]:
predictions[:11]

['basket',
 'N35',
 'N35',
 'N35',
 'N35',
 'N35',
 'N35',
 'N35',
 'N35',
 'N35',
 'N35']

In [70]:
# def get_glyph_name(series):
#     file = series['filename']
#     m = re.search(r'non_glyphs', file)
#     if m:
#         return 'non_glyphs'
#     else:
#         return 'glyphs'

# def get_glyph_name(series):
#     file = series['filename']
#     m = re.match(r'^([A-Z]{1}\d+)/', file)
#     if m:
#         return m.group(1)
#     else:
#         return 'unknown'


def get_glyph_name(series):
    file = series['filename']
    m = re.match(r'^([a-z]+)/', file)
    n = re.match(r'^([A-Z]{1}\d+)/', file)
    if m:
        return m.group(1)
    elif n:
        return n.group(1)
    else:
        return 'unknown'

In [71]:
pred_df = pd.DataFrame(predictions, columns=['predicted'])
pred_df = pred_df.merge(pd.DataFrame(test_batches.filenames, columns=['filename']), left_index=True, right_index=True)
pred_df['glyph'] = pred_df.apply(lambda row: get_glyph_name(row), axis=1)
pred_df['match'] = pred_df['predicted'] == pred_df['glyph']
pred_df.head()
# pred_df.to_csv('/Users/scott/p5/N35_predictions_custom_images.csv', index=False)

Unnamed: 0,predicted,filename,glyph,match
0,basket,N35/img_x_101_y_264.png,N35,False
1,N35,N35/img_x_106_y_15.png,N35,True
2,N35,N35/img_x_113_y_127.png,N35,True
3,N35,N35/img_x_121_y_102.png,N35,True
4,N35,N35/img_x_127_y_205.png,N35,True


In [72]:
acc = pred_df['match'].sum() / len(pred_df)
acc

0.7916666666666666

In [73]:
def get_indices(series, idx_dict):
    """Returns a numpy array from indices for input series of class names."""
    indices = pd.Series(index=series.index)
    for idx, val in series.iteritems():
        indices[idx] = idx_dict[val]
    return indices

In [74]:
y_pred = get_indices(pred_df.predicted, train_batches.class_indices)
y_true = get_indices(pred_df.glyph, train_batches.class_indices)
f1_score(y_true, y_pred, average='weighted')

0.7895892511834345

In [75]:
recall_score(y_true, y_pred, average='weighted')

0.7916666666666666

In [27]:
pred_df.sample(5)

Unnamed: 0,predicted,filename,glyph,match
293,mouth,mouth/img_x_813_y_423.png,mouth,True
380,sun,sun/img_x_58_y_51.png,sun,True
207,cloth,cloth/img_x_398_y_367.png,cloth,True
138,basket,basket/img_x_501_y_182.png,basket,True
60,N35,N35/img_x_411_y_200.png,unknown,False


In [None]:
# Orig Image Shape (1507, 1720, 3)
# img = cv2.imread('/home/scott/sync/Antewest.jpg')[250:1000, 1220:]
img = cv2.imread(os.path.expanduser('~/hieroglyphs/Texts/AnteSouthWall.jpg'))

# *************************************************************
height, width, depth = img.shape
print((height, width, depth))
half_width = width // 2
half_height = height // 2
img = img[half_height:, half_width:].copy()

img = img[:300, 60:]
            
print(img.shape)
plt.figure(figsize=(12,12))
plt.imshow(img)


In [None]:
heat_map = []
window_height = 40
window_width = 40
for y_step in range(0, img.shape[0], 20):
    horizontal = []
    if y_step + (window_height / 2) > img.shape[0]:
            break
    for x_step in range(0, img.shape[1], 20):
        if x_step + (window_width / 2) > img.shape[1]:
            break
        
        # Get window_width x window_height image segement
        crop_img = img[y_step:y_step+window_height, x_step:x_step+window_width].copy()
        # Get the heat value of the image segemnt
        heat = get_heat_value(crop_img, model, labels)
        # save positive results to file
        if heat[0][0] >= 0.9:
            cv2.imwrite('/home/scott/p5/data/water1k/pos/pos_{}_{}.png'.format(x_step, y_step), crop_img)
            # Appened heat value to heat map
            horizontal.append(heat[0][0])
        else:
            # Append 0
            horizontal.append(0)
            cv2.imwrite('/home/scott/p5/data/water1k/neg/neg_{}_{}.png'.format(x_step, y_step), crop_img)
        print((x_step, y_step, heat[0][0]))
    heat_map.append(np.array(horizontal))

In [None]:
import pickle
with open(os.path.expanduser('~/p5/data/heatmap_large_test.pkl'), 'wb') as f:
    pickle.dump(heat_map, f)

In [None]:
hm2 = heat_map.copy()

In [None]:
scott = np.vstack(tuple(heat_map))
scott.shape

In [None]:
np_map = np.repeat(np.repeat(np.array(heat_map), 20, axis=0), 20, axis=1)
np_map.shape

In [None]:
# with open(os.path.expanduser('~/p5/data/heatmap_large_test.pkl'), 'rb') as f:
#     heat_map = pickle.load(f)

In [None]:
# heat_map 172

In [None]:
# np_map = np.array(heat_map)

In [None]:
np_map.shape

In [None]:
# np_tmp = np_map[:, 0]
# np_tmp = np_tmp[:, 0] 
# np_tmp

In [None]:
# [f(x) if condition else g(x) for x in sequence]
filter_value = 0.9
np_tmp2 = np_map #np.array([val if val > filter_value else 0 for val in np_map])

In [None]:
np_tmp2.shape

In [None]:
np_tmp3 = np_tmp2.reshape((60, 160))

In [None]:
# np_tmp4 = np.repeat(np.repeat(np_tmp3, 5, axis=0), 5, axis=1)

In [None]:
plt.imshow(np_tmp2)

In [None]:
import seaborn as sns
sns.heatmap(np_tmp2, cmap='Blues')

In [None]:
# print(np_tmp4.shape)
# print(img.shape)

In [None]:
# np_tmp4

In [None]:
import heatmap
from scipy import ndimage
from skimage import io
import os

In [None]:
cv2.imwrite(os.path.expanduser('~/sync/Antewest_cropped.png'), img)
image_filename = '~/sync/Antewest_cropped.png'
image = io.imread(image_filename)
plt.imshow(image)

In [None]:
# my_heat_map = ndimage.filters.gaussian_filter(np_tmp4, sigma=16)
# np_tmp5 = np.hstack((np.zeros((np_tmp4.shape[0], 25)), np_tmp4[:,:-25]))
my_heat_map = ndimage.filters.gaussian_filter(np_tmp2, sigma=16)
# my_heat_map = ndimage.filters.gaussian_filter(np_tmp4, sigma=16)
plt.figure(figsize=(15,15))
heatmap.add(image, my_heat_map, alpha=0.7,cmap='jet')#, save='../data/face_heat_map.png')
# plt.colorbar()

In [None]:
right_shifted = np_tmp4.copy()