In [None]:
import sqlite3
import random
import itertools
import subprocess
import os
import shlex
import time
import pickle

In [None]:
import bz2

In [None]:
from PIL import Image, ImageDraw, ImageFont

# from pillow import Image

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.cm as cm
from matplotlib.colors import Normalize

In [None]:
import keras
from keras.preprocessing import image
from keras.applications.imagenet_utils import decode_predictions, preprocess_input
from keras.models import Model

In [None]:
# this seems to help with some GPU memory issues

import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

In [None]:
model = keras.applications.VGG16(weights='imagenet', include_top=True)
print("model loaded")
model.summary()

In [None]:
def load_image(path):
    img = image.load_img(path, target_size=model.input_shape[1:3])
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return img, x

In [None]:
# set up the feature extractor -- only needed if doing something with second last layer

feat_extractor = Model(inputs=model.input, outputs=model.get_layer("fc2").output)
print("feature extractor setup")
feat_extractor.summary()

### Analyse samples and save csv

In [None]:
current_path = "/home/rte/data/images/random/100k/classification/"
# current_path = convert_path

image_extensions = ['.jpg', '.png', '.jpeg']   # case-insensitive (upper/lower doesn't matter)
max_num_images = 100000

images = [os.path.join(dp, f) for dp, dn, filenames in os.walk(current_path) for f in filenames if os.path.splitext(f)[1].lower() in image_extensions]
num_x = len(images)
print("keeping %d images to analyze" % num_x)

In [None]:
start = time.time()

all_predictions = []

features = []

writefile = "predictions_100k.txt"
f = open(writefile, "a+")
        
for i, image_path in enumerate(images):
    if i % 500 == 0:
        end = time.time()
        elap = end - start;
        print("analyzing image %d / %d. Time: %4.4f seconds." % (i, len(images),elap))
        start = time.time()
    img, x = load_image(image_path)
    
    f.write(os.path.basename(image_path))

#     feat = feat_extractor.predict(x)[0]
#     features.append(feat)
    
    predictions = model.predict(x)
    
#     print(predictions)
#     print(decode_predictions(predictions))
    
    for _, pred, prob in decode_predictions(predictions)[0]:
#         print("predicted %s with probability %0.3f" % (pred, prob))
        f.write("," + str(pred) + "," + str(prob))
                
    f.write("\n")
        
print('finished predicting class for %d images' % len(images))

f.close()

In [None]:
# write images, predictions to a pickle file

f = "classification_vgg_subset" + ".pickle"

print(f)

# WRITE
with open(f, "wb") as write_file:
    pickle.dump([images, features], write_file)
    write_file.close()


### Decode predictions from pickle

In [None]:
p = "/home/rte/re-imaging/visualisation/features/features_0_50000_vgg.pkl.pbz2"
predictions = []
with bz2.open(p, "r") as read_file:
    predictions = pickle.load(read_file)
for p in predictions[:5]:
    print(p)

In [None]:
p = "/home/rte/re-imaging/visualisation/features/v1/features_0_50000_vgg.pkl.pbz2"
# predictions = []
with bz2.open(p, "r") as read_file:
    predictions = pickle.load(read_file)

writefile = "predictions_1m.txt"
f = open(writefile, "a+")

for pred in predictions[:1]:
    for _, pred, prob in decode_predictions(pred):
        print("predicted %s with probability %0.3f" % (pred, prob))
        f.write("," + str(pred) + "," + str(prob))

        f.write("\n")
        print('finished predicting class for %d images' % len(images))

f.close()

In [None]:
len(predictions[0])

In [None]:
image_folder = "/mnt/hd2/images/all/"
for i, image_path in enumerate(filepaths[:1]):
    img, x = load_image(image_folder + image_path)

    predictions = model.predict(x)
    print(predictions)

In [None]:
len(predictions[0])

In [None]:
print(predictions)

In [None]:
predictions

In [None]:
for _, pred, prob in decode_predictions(predictions)[0]:
        print("predicted %s with probability %0.3f" % (pred, prob))

In [None]:
print(decode_predictions(predictions))

### Save predictions images

In [None]:
filepaths = []

with open("/home/rte/data/paths/all_converted_jpg_images_shuf.txt", "r") as f:
    lines = f.readlines()
    print("length:",len(lines))
    print(lines[0])
for l in lines:
    # substrings = l.rsplit(",", 1)
    filepaths.append(l.strip())
    # image_ids.append(substrings[1].strip())

In [None]:
image_folder = "/mnt/hd2/images/all/"

plt.close('all')

for i, image_path in enumerate(filepaths[0:9]):
    fig, ax = plt.subplots(1, 2)
    fig.set_size_inches(10, 5)
    fig.set_dpi(150)

    img, x = load_image(image_folder + image_path)
    im = mpimg.imread(image_folder + image_path)
#     pim = Image.open(image_folder + image_path)
    width, height = pim.size
#     print(width, height)
    asp = width/height
    
    prediction = model.predict(x)
    
    classes = []
    probabilities = []
    
    for i, (_, pred, prob) in enumerate(decode_predictions(prediction)[0]):
#         print("predicted %s with probability %0.3f" % (pred, prob))
        classes.append(pred)
        probabilities.append(prob)
        
#     for i, (c, p) in enumerate(zip(classes, probabilities)):
#         y = i * 30 + 50
# #         x = width 
#         s = str(c) + " : "
#         plt.text(width + 220, y, s, horizontalalignment='right', fontsize=18)
#         plt.text(width + 290, y, "{:.2f} %".format(p*100), 
#                  horizontalalignment='right', fontsize=18)

#     fig.add_subplot(212)

#     plt.figure(1)
#     plt.subplot(111)
#     plt.imshow(im)
    imgplot = ax[0].imshow(im, aspect=asp)
    ax[0].axis('off')
    ax[0].title.set_text("Image ID: " + image_path.split(".")[0])
#     imgplot = ax[1].imshow(im, aspect=asp)

#     plt.subplot(211)
#     plt.imshow(im)

    y_pos = np.arange(len(classes))

    my_cmap = cm.get_cmap('jet')
    my_norm = Normalize(vmin=0, vmax=8)
    
    probabilities = np.array(probabilities)
#     color = np.array(['rgb(255,255,255)']*probabilities.shape[0])
#     colors = cm.winter(probabilities)
#     colors = np.array(['rgb(220,220,220)']*probabilities.shape[0])
    
    ax[1].set_xlim(0, 1)
    ax[1].barh(y_pos, probabilities, align='center', color='0.75')

#     ax[1].barh(y_pos, probabilities, align='center', color=my_cmap(my_norm(probabilities)))
    ax[1].set_yticks(y_pos)
    ax[1].set_yticklabels(classes)
    ax[1].invert_yaxis()
    ax[1].title.set_text('VGG16 Prediction')
    ax[1].set_xlabel('Percentage')
    
    plt.tight_layout(pad=5.0)

    
#     plt.axis('off')
    save_path = image_path
#     plt.savefig(save_path, dpi=150)
# bbox_inches='tight'

In [None]:
import sys
image_folder = "/mnt/hd2/images/all/"

for i, image_path in enumerate(filepaths[0:1]):
#     if i % 500 == 0:
#         end = time.time()
#         elap = end - start;
#         print("analyzing image %d / %d. Time: %4.4f seconds." % (i, len(images),elap))
#         start = time.time()
#     fig, ax = plt.subplots()
#     fig.set_size_inches(20, 10)

    img, x = load_image(image_folder + image_path)
    im = mpimg.imread(image_folder + image_path)
    width, height = pim.size
    print(width, height)
    asp = width/height
    
    pim = Image.open(image_folder + image_path)
    
#     out = Image.new('RGB', (720, 720), color = (200, 200, 200))
#     fnt = ImageFont.truetype("Pillow/Tests/fonts/FreeMono.ttf", 40)
#     d = ImageDraw.Draw(out)
    d = ImageDraw.Draw(pim)
    
#     out.show()
    pim.save(sys.stdout, "PNG")
    
    prediction = model.predict(x)
    
    classes = []
    probabilities = []
    
    for i, (_, pred, prob) in enumerate(decode_predictions(prediction)[0]):
#         print("predicted %s with probability %0.3f" % (pred, prob))
        classes.append(pred)
        probabilities.append(prob)
        
    for i, (c, p) in enumerate(zip(classes, probabilities)):
        y = i * 30 + 50
#         x = width 
        s = str(c) + " : "
#         plt.text(width + 100, y, s, horizontalalignment='right', fontsize=18)
#         plt.text(width + 170, y, "{:.2f} %".format(p*100), 
#                  horizontalalignment='right', fontsize=18)