In [None]:
import numpy as np
import pandas as pd
from keras.applications.vgg19 import VGG19
from keras.layers import Input, Dense, Dropout, Flatten
from keras.models import Model, model_from_json
from sklearn.preprocessing import MultiLabelBinarizer
from keras.preprocessing.image import image
from tqdm import tqdm
from keras.optimizers import SGD
from keras.regularizers import l2
import h5py
from model_functions import f1_m, bp_mll_loss
from generators import batch_generator_model3
import config
from helper_functions import get_filenames

In [None]:
#Dirs with photos
TRAIN_DIR = config.TRAIN_DIR
#TEST_DIR = "D:/test_photos/"

#CSV files with photos ids and corresponding business ids 
#test_photo_to_biz = pd.read_csv("D:/test_photo_to_biz.csv")
train_photo_to_biz = config.train_photo_to_biz

#CSV file with labels corresponding to business
labels = config.labels

In [None]:
lr = 0.001
#Using pretrained model as backbone
VGG_model = VGG19(weights='imagenet', include_top = False)
regularizer = l2(5e-4)
# #Adding layers
input_layer = Input(shape=(224, 224, 3), name='image_input')
conv_out = VGG_model(input_layer)
flattened = Flatten()(conv_out)
dense1 = Dense(4096, activation="relu", kernel_regularizer=regularizer)(flattened)
dropout1 = Dropout(0.2)(dense1)
dense2 = Dense(4096, activation="relu", kernel_regularizer=regularizer)(dropout1)
dropout2 = Dropout(0.5)(dense2)
out = Dense(9, activation="softmax", kernel_regularizer=regularizer)(dropout2)

model = Model(inputs=input_layer, outputs=out)

sgd = SGD(lr=lr, momentum=0.5, decay=.98)

model.compile(optimizer=sgd, loss=bp_mll_loss, metrics=[f1_m])


MB = MultiLabelBinarizer()
MB.fit(np.array([[0,1,2,3,4,5,6,7,8]]))

files = get_filenames(TRAIN_DIR)

businesses = []

validation_set = files[:1000]
test_set = files[201000:]
files = files[1000:201000]

gen = batch_generator_model3(files, [], train_photo_to_biz, labels)
validation_gen = batch_generator_model3(validation_set, [], train_photo_to_biz, labels)
test_gen = batch_generator_model3(test_set, businesses, train_photo_to_biz, labels)

In [None]:
for l in range(len(VGG_model.layers)):
    
    VGG_model.layers[l].trainable = False

for l in range(len(model.layers)):
    
    model.layers[l].trainable = False

model.layers[-1].trainable = True

model.compile(optimizer=sgd, loss=bp_mll_loss, metrics=[f1_m])

In [None]:
best_loss = 1000

def training(model, lr, best_loss):
    
    evl = []
    print("")
    
    for i in tqdm(range(11693)):
        
        batch = next(gen)
        
        if batch[0].shape!=(0, 1):
            
            model.train_on_batch(batch[0], batch[1])
                
    for i in range(50):
        
        batch = next(validation_gen)
        
        evl.append(model.evaluate(batch[0], batch[1]))
            
    Loss = sum(list(map(lambda x:x[0], evl)))/len(evl)
    F1_score = sum(list(map(lambda x:x[1], evl)))/len(evl)
    
    print("Loss: "+str(Loss))
    print("F1_score: "+str(F1_score))
    
    model_json = model.to_json()
    
    with open("model3.json", "w+") as f:
        
        f.write(model_json)
    
    model.save_weights("weights3.h5")
    
    if Loss<best_loss:
        
        with open("best_model3.json", "w+") as f:
            
            f.write(model_json)
        
        model.save_weights("best_weights3.h5")
        
        best_loss = Loss
            
    json_file = open('model3.json', 'r')
    
    loaded_model_json = json_file.read()
    
    json_file.close()
        
    model = model_from_json(loaded_model_json)
        
    model.load_weights("weights3.h5")
    
    lr /= 10
    
    sgd = SGD(lr=lr, momentum=0.5, decay=.98)
    
    model.compile(optimizer=sgd, loss=bp_mll_loss, metrics=[f1_m])

In [None]:
for i in range(3):
    training(model, lr, best_loss)

In [None]:
for l in range(1,6):
    
    VGG_model.layers[-l].trainable = True

for l in range(len(model.layers)):
    
    model.layers[l].trainable = True

model.layers[-1].trainable = False

lr = 0.0001

sgd = SGD(lr=lr, momentum=0.5, decay=.98)

model.compile(optimizer=sgd, loss=bp_mll_loss, metrics=[f1_m])

In [None]:
for i in range(8):
    training(model, lr, best_loss)

In [None]:
for l in range(len(VGG_model.layers)):
    
    VGG_model.layers[l].trainable = True

for l in range(1,6):
    
    VGG_model.layers[-l].trainable = False

for l in range(len(model.layers)):
    
    model.layers[l].trainable = False

VGG_model.layers[0].trainable = False

lr = 0.0001

sgd = SGD(lr=lr, momentum=0.5, decay=.98)

model.compile(optimizer=sgd, loss=bp_mll_loss, metrics=[f1_m])

In [None]:
for i in range(12):
    training(model, lr, best_loss)

In [None]:
json_file = open('best_model3.json', 'r')

loaded_model_json = json_file.read()

json_file.close()

model = model_from_json(loaded_model_json)

model.load_weights("best_weights3.h5")

sgd = SGD(lr=lr, momentum=0.5, decay=.98)

model.compile(optimizer=sgd, loss=bp_mll_loss, metrics=[f1_m])

preds = []
true = []

for i in tqdm(range(1650)):
    
    batch = next(test_gen)
    
    preds.append(model.predict(batch[0]))
    
    true.append(batch[1])

In [None]:
file = h5py.File("model3_output.h5", "w")

for i in range(len(preds)):
    
    file.create_dataset(f"preds{i}", data=np.array(preds[i]))

for i in range(len(true)):
    
    file.create_dataset(f"true{i}", data=np.array(true[i]))

file.create_dataset("businesses", data=np.array(businesses))

file.close()