In [None]:
import numpy as np
import pandas as pd
from keras.applications.vgg16 import VGG16
from keras.layers import Input, Dense, Dropout, Flatten, Lambda
from keras.models import Model, model_from_json
from sklearn.preprocessing import MultiLabelBinarizer
from keras.preprocessing.image import image
from tqdm import tqdm
import h5py
import sys
import tensorflow as tf
sys.setrecursionlimit(4000)
from sklearn.metrics import f1_score
from model_functions import groupwise_average, bp_mll_loss, f1_m
from generators import batch_generator
from helper_functions import get_batches
from images_functions import get_indxes
import config

In [None]:
#Dirs with photos
TRAIN_DIR = config.TRAIN_DIR
#TEST_DIR = "D:/test_photos/"

#CSV files with photos ids and corresponding business ids 
#test_photo_to_biz = pd.read_csv("D:/test_photo_to_biz.csv")
train_photo_to_biz = config.train_photo_to_biz

#CSV file with labels corresponding to business
labels = config.labels

In [None]:
MB = MultiLabelBinarizer()
#Using pretrained model as backbone
VGG_model = VGG16(weights='imagenet', include_top = False)
# #Adding layers
input1 = Input(shape=(224, 224, 3), name='image_input')

conv_out = VGG_model(input1)

flattened = Flatten()(conv_out)

input2 = Input(shape=(1,))

mean = Lambda(groupwise_average, output_shape=(25088,))([flattened, input2])

dense1 = Dense(4096, activation="relu")(mean)

dropout = Dropout(.25)(dense1)

dense2 = Dense(256, activation="relu")(dropout)

out = Dense(9, activation="softmax")(dense2)


# #Model for extracting features from images
model = Model(inputs=[input1, input2], outputs=out)
model.compile(optimizer="adam", loss=bp_mll_loss, metrics=[f1_m])

In [None]:
batches, labels = get_batches(train_photo_to_biz, labels)

In [None]:
Y_train = MB.fit_transform(labels)

#Uploading the list of sorted lists with indexes of photos according to their distances to othres
file = h5py.File("sorted_indxes.h5", "r")
sorted_indxes = []
datasets = [i for i in file]
for i in datasets:
    sorted_indxes.append(list(file[i][:]))
file.close()
sorted_indxes = [i for i,_ in sorted(zip(sorted_indxes, datasets), key=lambda x:int(x[1][6:]))]

# sorted_indxes = get_indxes(batches, TRAIN_DIR)

train_batches = batches[20:1900]
train_indxes = sorted_indxes[20:1900]
validation_batches = batches[:20]

validation_indxes = sorted_indxes[:20]
test_batches = batches[1900:]
test_indxes = sorted_indxes[1900:]



In [None]:
#Training process
gen = batch_generator(train_batches, train_indxes, TRAIN_DIR, Y_train, [])
validation_gen = batch_generator(validation_batches, validation_indxes, TRAIN_DIR, Y_train, [])
best_loss = 1000
def training(model):
        
    evl = []
    print("")
    global best_loss
        
    #Training
    for i in tqdm(range(6000)):
            
        batch = next(gen)
            
        model.train_on_batch([batch[0], batch[1]], batch[2])
                    
    #Validating
    for i in range(50):
            
        batch = next(validation_gen)
            
        evl.append(model.evaluate([batch[0], batch[1]], batch[2]))
                    
    Loss = sum(list(map(lambda x:x[0], evl)))/len(evl)
    F1_score = sum(list(map(lambda x:x[1], evl)))/len(evl)
        
    print("Loss: "+str(Loss))
    print("F1_score: "+str(F1_score))
        
    model_json = model.to_json()
        
    with open("model2.json", "w+") as f:
            
        f.write(model_json)
        
    model.save_weights("weights2.h5")
    
    #Saving best model
    if Loss<best_loss:
            
        print(1)    
        with open("best_model2.json", "w+") as f:
                
            f.write(model_json)
            
        model.save_weights("best_weights2.h5")
        
        best_loss = Loss
                        
    json_file = open('model2.json', 'r')
        
    loaded_model_json = json_file.read()
        
    json_file.close()
            
    model = model_from_json(loaded_model_json)
            
    model.load_weights("weights2.h5")
        
    model.compile(optimizer="adam", loss=bp_mll_loss, metrics=[f1_m])

In [None]:
for j in range(12):
    training(model)

In [None]:
model.load_weights("best_weights2.h5")

model.compile(optimizer="adam", loss=bp_mll_loss, metrics=[f1_m])

#Testing process
businesses = []

gen = batch_generator(test_batches, test_indxes, TRAIN_DIR, Y_train, businesses, 2, 1)

preds = []

def testing(model):
        
    for i in tqdm(range(300)):
            
        batch = next(gen)
        if len(batches)!=0:
            preds.append(model.predict([batch[0], batch[1]]))
    
        del batch

In [None]:
testing(model)

In [None]:
file = h5py.File("model2_result.h5", "w")

for i in range(len(preds)):
    
    file.create_dataset(f"preds{i}", data=np.array(preds[i]))

file.create_dataset("businesses", data=np.array(businesses))

file.close()