# 1 - Image Embeddings

In [1]:
import pandas as pd
import requests
import re
import os
import glob
import numpy as np

from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image

### Create VGG16 model

In [2]:
base_model = VGG16(weights='imagenet')
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc2').output)
image_size = (224, 224)

### Generate image embeddings

In [3]:
filenames = glob.glob('../data/images/*jpg')

features = []
item_id = []

for i, filename in enumerate(filenames):
    try:
        img = image.load_img(filename, target_size=image_size)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # extract features  
        feature = model.predict(x)
        flat = feature.flatten()
        features.append(flat)
        item_id.append(filename.split('/')[-1].split('.')[0])
    except:
        continue
    if i % 100 == 0:
        print ("Processed image: " + str(i))

Processed image: 0
Processed image: 100
Processed image: 200
Processed image: 300
Processed image: 400
Processed image: 500
Processed image: 600
Processed image: 700
Processed image: 800
Processed image: 900
Processed image: 1000
Processed image: 1100
Processed image: 1200


In [4]:
items = pd.DataFrame({'item_id': item_id, 'image_features': features})
items

Unnamed: 0,image_features,item_id
0,"[0.0, 0.0, 0.0, 1.210973, 0.0, 0.0, 2.1445055,...",433
1,"[0.0, 0.88430643, 0.51349455, 0.0, 0.0, 7.3472...",175
2,"[0.0, 0.0, 1.5304266, 0.29221326, 0.0, 0.0, 0....",510
3,"[0.14858103, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0...",27
4,"[0.0, 0.0, 1.5763377, 0.0, 0.0, 5.456106, 0.0,...",793
...,...,...
1231,"[1.8058506, 4.474393, 0.0, 0.0, 0.18054026, 0....",252
1232,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.34754306, 0.0, 0.0...",727
1233,"[3.2949443, 0.9603162, 0.0, 0.0, 0.0, 0.0, 0.0...",365
1234,"[0.0, 0.0, 0.16038784, 0.0, 0.0, 3.285229, 0.0...",611


### Serialize VGG16 embeddings

In [5]:
out_path = '../data/image_vecs_vgg16.pkl'

items.to_pickle(out_path)