In [29]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymysql
import io
import sys
import tensorflow_recommenders as tfrs

from detection import generate_download_signed_url_v4
from detection import get_similar_products_uri
from detection import query_product

from tensorflow.keras import backend
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from google.cloud import storage
from google.cloud import vision
from urllib.parse import urlparse
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from PIL import Image

In [6]:
#Setup Connection to mysql database
conn = pymysql.connect(
    host='35.221.181.94',
    port=int(3306),
    user="mkhoa",
    passwd='NTMK261194@dng',
    db="project",
    charset='utf8mb4')

cur = conn.cursor()

def query_item2room():
    '''
    Make dataframe for ImageDataGenerator
    
    '''
    query = f'''
    SELECT a.room, a.room_path, a.item, b.bucket, b.bucket_path
    FROM project.item2room a
    LEFT JOIN project.Files b ON a.item = b.id
    WHERE b.bucket_path like '%Products%'
    '''
    try:
        cur.execute(query)
    except Exception as err:
        print('ERROR BY SELECT:', err)
    result = cur.fetchall()
    result = pd.DataFrame(result, columns=['RoomID', 'RoomPath', 'ItemID', 'Bucket', 'BucketPath'])
    return result

def query_classes():
    '''
    Query for list of distinct room
    
    '''
    query = f'''
    SELECT distinct room
    FROM project.item2room
    '''
    try:
        cur.execute(query)
    except Exception as err:
        print('ERROR BY SELECT:', err)
    result = cur.fetchall()
    result = pd.DataFrame(result, columns=['RoomID'])
    return result

In [7]:
item2room = query_item2room()

In [8]:
classes = query_classes()

In [9]:
df = item2room[['RoomID', 'BucketPath']]
df_train, df_test = train_test_split(df, test_size=0.1)

In [10]:
df_train

Unnamed: 0,RoomID,BucketPath
130,30573,Images/Products/203793-0-903.347.30.jpg
157,30709,Images/Products/204202-0-391.505.88.jpg
158,30664,Images/Products/204221-0-702.858.58.jpg
12,30598,Images/Products/202816-0-502.954.72.jpg
8,30482,Images/Products/202549-0-490.473.41.jpg
...,...,...
90,30665,Images/Products/203457-0-802.340.19.jpg
195,30717,Images/Products/204384-0-902.874.13.jpg
32,30604,Images/Products/203063-0-201.150.38.jpg
147,30708,Images/Products/204114-0-802.396.82.jpg


In [11]:
project_id = 'abstract-veld-289612'
bucket_name = 'ftmle'
storage_client = storage.Client.from_service_account_json("./Credentials/abstract-veld-289612-327ddac80eba.json")

In [12]:
# Fit multi-label binarizer on the training set
lb = LabelBinarizer()
lb.fit(df_train['RoomID'])

# Print out label map 
#print('Labels: ')
label_map = {}
for i, label in enumerate(lb.classes_):
    label_map[i] = label
    #print('{}. {}'.format(i, label))

# Define num. of labels 
N_LABELS = len(lb.classes_)

# Fit multi-label binarizer on the training set
y_train = lb.transform(df_train['RoomID'])
y_val = lb.transform(df_test['RoomID'])

In [13]:
X_train = df_train['BucketPath'].values
X_test = df_test['BucketPath'].values

In [14]:
Y_train =  y_train
Y_test =  y_val

In [15]:
def load_bucket_image(path):
    '''
    Load GCS iamge from bucket
    
    '''
    path = str(path.numpy().decode("utf-8"))
    blob = storage_client.bucket(bucket_name).get_blob(path)
    img = blob.download_as_string()

    return img

def preprocess_image(bucket_path):
    '''
    Preprocess image from bucket path
    
    '''
    img = tf.py_function(load_bucket_image, [bucket_path], tf.string)
    img = tf.image.decode_image(img, channels=3, expand_animations = False)
    img = tf.image.resize(img, (244, 244))
    img = img/255
    
    return img

# The tuples are unpacked into the positional arguments of the mapped function
def load_and_preprocess_from_path_label(path, label):
  return preprocess_image(path), label

In [16]:
ds_train = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
ds_train_map = ds_train.map(load_and_preprocess_from_path_label)

In [17]:
ds_test = tf.data.Dataset.from_tensor_slices((X_test, Y_test))
ds_test_map = ds_test.map(load_and_preprocess_from_path_label)

In [18]:
#Parameter
BATCH_SIZE = 32
AUTOTUNE = tf.data.experimental.AUTOTUNE
final_train_dataset = ds_train_map.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)
final_test_dataset = ds_test_map.batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)

In [19]:
final_train_dataset

<PrefetchDataset shapes: ((None, 244, 244, None), (None, 125)), types: (tf.float32, tf.int64)>

In [20]:
# define cnn model
def create_model(in_shape=(244, 244, 3), out_shape=N_LABELS):
    # load model
    model = VGG16(include_top=False, input_shape=in_shape)
    # mark loaded layers as not trainable
    for layer in model.layers:
        layer.trainable = False
    # allow last vgg block to be trainable
    model.get_layer('block5_conv1').trainable = True
    model.get_layer('block5_conv2').trainable = True
    model.get_layer('block5_conv3').trainable = True
    model.get_layer('block5_pool').trainable = True
    # add new classifier layers
    flat1 = Flatten()(model.layers[-1].output)
    class1 = Dense(244, activation='relu', kernel_initializer='he_uniform')(flat1)
    output = Dense(out_shape, activation='sigmoid')(class1)
    # define new model
    model = Model(inputs=model.inputs, outputs=output)
    # compile model
    opt = SGD(lr=0.01, momentum=0.9)
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics='acc')
    return model

In [21]:
model = create_model()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [22]:
history = model.fit_generator(final_train_dataset, 
    steps_per_epoch=len(final_train_dataset),
    validation_data=final_test_dataset, 
    validation_steps=len(final_test_dataset), 
    epochs=10, 
    verbose=1)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [23]:
model.save('model.h5')