# Import packages

In [None]:
import tensorflow as tf
import pandas as pd

In [None]:
from src.similarity_search.utils import *
from src.siamese.siamese_image import *
from src.similarity_search.similarity_search_image import *

# Train embedding model for Siamese CNN model

The embedding models will be trained with triplet loss.  
Requisites: generate_training_data.ipynb was run and traning datasets were generated.

In [None]:
input_path = "TrainingData/SingleCategory/"
image_path = "Images/"
ditto_input_path = "TrainingData/AllCategories/"

In [None]:
import matplotlib.pyplot as plt
def plot(losses, val_losses):
    plt.plot(losses, label='Loss')
    plt.plot(val_losses, label='val_loss')
    plt.legend(loc='best')
    plt.grid(True)
    plt.show()

In [None]:
# You can replace these paths with other datasets in SingleCategory or AllCategories folders.
# In generate_training_data.ipynb we splitted the generated triplets into training, test and valid datasets.
# Since the triplets are only necessary for training the embedding models, we make an union of them all for training.
data_paths = [input_path+"art_train_img_triplets.jsonl", input_path+"art_test_img_triplets.jsonl", input_path+"art_valid_img_triplets.jsonl"]
anchors_train, pos_train, neg_train, Y_train = load_triplett_training_data(data_paths, image_path)
# Specify which kind of building network should be used to train: "VGG", "ResNet" or the simple selfdefinded network "CNN"
siamese_net = SiamesImage("CNN") # VGG ResNet CNN
# Path to save the trained model
if not os.path.exists("Model"):
    os.makedirs("Model")
losses, val_losses = siamese_net.train(anchors_train, pos_train, neg_train, Y_train,
                 epochs = 20, save_model_name="Model/embedding_model", alpha=0.2)
plot(losses, val_losses)

# Test image models trained on all categories

Requisites: ResNet was used as embedding model and trained with triplet loss and data of all categories.  
The trained model should be saved into Model/CNN_trained_with_all_categories

In [None]:
model_path = "Model/CNN_trained_with_all_categories/"
input_path = "TrainingData/AllCategories/"
image_path = "Images/"

In [None]:
## Daten aller Kategorien
X_train_image_left, X_train_image_right,\
    X_test_image_left, X_test_image_right, Y_train, Y_test = load_and_resize_image(input_path+"train.jsonl",\
                                                                  input_path+"test.jsonl", image_path, triplet=False)
# Modell laden: Use "embedding_self_all_02" for selfdefined model, "embedding_resnet_02" for ResNet model
img_model = ImageClassifier(model_path+"embedding_resnet_02", load=True)
# Fitten
Precision, Recall, interpolated_precision,\
F1, optimal_threshold, scores = img_model.train(X_train_image_left, X_train_image_right, Y_train)
# scores.dump('result/img_cosine_sia_resnet_allcat.txt')
print(f"Optimal threshold for image similarity search: {optimal_threshold}")
print(f"Maximal F1 of text similarity search: {np.max(F1)}")
plot(F1, Precision, Recall, interpolated_precision)
# Testen
F1_test, Precision_test, Recall_test, Accuracy_test = img_model.test(\
                                        X_test_image_left,\
                                        X_test_image_right,\
                                        Y_test, optimal_threshold)
print("Evaluate image model on test data")
print(f" F1 = {F1_test}\n Precision = {Precision_test}\n Recall = {Recall_test}\n Accuracy = {Accuracy_test}")  

# Test CNN model

Requisites: Selfdefined CNN was used as embedding model and trained with triplet loss and different margin value on 
different product categories.  
The trained model are saved in Model/CNN_per_category

In [None]:
model_path = "Model/CNN_per_category/"
input_path = "TrainingData/SingleCategory/"
image_path = "Images/"

In [None]:
margins = ["02", "06", "08", "09"]
cats = ['art', 'pet', 'home', 'garden', 'sport', 'toy', 'tool']
result = {"category": cats, "02":[], "06":[], "08":[], "09":[]}

In [None]:
for margin in margins:
    F1_scores = []
    for cat in cats:
        X_train_image_left, X_train_image_right,\
            X_test_image_left, X_test_image_right, Y_train, Y_test = load_and_resize_image(input_path+f"{cat}_train.jsonl",\
                                                                          input_path+f"{cat}_test.jsonl", image_path, triplet=False)
        # Modell laden
        img_model = ImageClassifier(model_path+f"alpha_{margin}/embedding_{cat}", load=True)
        # Fitten
        Precision, Recall, interpolated_precision,\
        F1, optimal_threshold, scores = img_model.train(X_train_image_left, X_train_image_right, Y_train)
        # Testen
        F1_test, Precision_test, Recall_test, Accuracy_test = img_model.test(\
                                                X_test_image_left,\
                                                X_test_image_right,\
                                                Y_test, optimal_threshold)
        print(f"Evaluate image model on test data of category {cat} and margin {margin}")
        print(f" F1 = {F1_test}\n Precision = {Precision_test}\n Recall = {Recall_test}\n Accuracy = {Accuracy_test}")  
        F1_scores.append(F1_test)
    result[margin] = F1_scores

In [None]:
df_result = pd.DataFrame.from_dict(result)
df_result