# Save VGG predictions to SQLite database

Use pretrained keras VGG16 model (trained on ImageNet) and save the predictions to SQLite database

In [None]:
import sqlite3
import os
import time

from PIL import Image, ImageDraw, ImageFont

# from pillow import Image

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.cm as cm
from matplotlib.colors import Normalize


In [None]:
import keras
from keras.preprocessing import image
from keras.applications.imagenet_utils import decode_predictions, preprocess_input
from keras.models import Model, load_model

In [None]:
# this seems to help with some GPU memory issues

import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

In [None]:
def load_image(path):
    img = image.load_img(path, target_size=model.input_shape[1:3])
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return img, x

In [None]:
model = keras.applications.VGG16(weights='imagenet', include_top=True)
feat_extractor = Model(inputs=model.input, outputs=model.get_layer("fc2").output)
print("feature extractor setup")
feat_extractor.summary()

In [None]:
NUM_INDEXES = 600000

image_list = "/home/rte/data/paths/all_images_shuf.txt"
filepaths = []

with open(image_list, "r") as f:
    lines = f.readlines()
    print("length of image text file:",len(lines))
for l in lines[:NUM_INDEXES]:
    substring = l.split(".jpg")[0]
    filepaths.append(substring)

In [None]:
db_path = "/home/rte/data/db/arxiv_db_images_600k.sqlite3"
db = sqlite3.connect(db_path)
c = db.cursor()

In [None]:
# get indexes from SQLite
get_sql = """
            SELECT id from images
"""
c.execute(get_sql)
rows = c.fetchall()
indexes = [r[0] for r in rows]
print(len(indexes))

In [None]:
write_db = sqlite3.connect(db_path)
write_cursor = write_db.cursor()

image_folder = "/mnt/hd2/images/all/"

set_sql = """
            UPDATE images
            SET "vggpred" = ?
            WHERE id = ?
        """

write_cursor.execute("BEGIN TRANSACTION;")

for i, f in enumerate(indexes[:]):
    img, x = load_image(os.path.join(image_folder, str(f) + ".jpg"))
    predictions = model.predict(x)
    print("-----", f)
    pred_text = ""
    count = 0

    for _, pred, prob in decode_predictions(predictions)[0]:
#         print(pred, prob)
        pred_text += str(pred) + " " + "{:06.4f}".format(prob)
        if count < 4:
            pred_text += ","
        count += 1
#     print(pred_text)
    write_cursor.execute(set_sql, (pred_text, f))
    
    if i % 1000 == 0:
        print("committing changes:", i)
        write_db.commit()
        write_cursor.execute("BEGIN TRANSACTION;")

write_db.commit()
