# Import packages

In [1]:
import tensorflow as tf
import pandas as pd

In [2]:
from src.similarity_search.metric import *
from src.similarity_search.similarity_search_image import *
from src.similarity_search.similarity_search_text import *
from src.similarity_search.mixed_model import *
from src.similarity_search.utils import *

# Evaluate tf-idf, Siamese CNN and multi-modal baseline model

Requisites: generate_training_data.ipynb was run and traning datasets were generated.

In [7]:
input_path = "TrainingData/SingleCategory/"
image_path = "Images/"
model_path = "Model/TrainedModel/"
ditto_input_path = "TrainingData/AllCategories/"

In [None]:
cats = ['art', 'pet', 'home', 'garden', 'sport', 'toy', 'tool']
res = {}
for cat in cats:
    print("category: ", cat)
    res[cat]={}
    X_ID_left_train, X_ID_right_train, X_text_left_train, X_text_right_train, Y_train =\
                                                                    get_data(input_path+f"{cat}_train.jsonl")
    X_ID_left_test, X_ID_right_test, X_text_left_test, X_text_right_test, Y_test =\
                                                                    get_data(input_path+f"{cat}_test.jsonl")
    Y_train = np.asarray(Y_train)
    Y_test = np.asarray(Y_test)
    res[cat]["X_ID_left_train"]=X_ID_left_train
    res[cat]["X_ID_right_train"]=X_ID_right_train
    res[cat]["X_ID_left_test"]=X_ID_left_test
    res[cat]["X_ID_right_test"]=X_ID_right_test
    res[cat]["Y_train"]=Y_train
    res[cat]["Y_test"]=Y_test
    # Fit Text
    similarity_search_text_model = TextClassifier()    
    Precision, Recall, interpolated_precision,\
         F1, optimal_threshold_text, scores_text = similarity_search_text_model.train(\
                                                    X_text_left_train, X_text_right_train, Y_train)
    res[cat]["Precision_text_train"]=Precision
    res[cat]["Recall_text_train"]=Recall
    res[cat]["interpolated_precision_text_train"]=interpolated_precision
    res[cat]["F1_text_train"]=F1
    res[cat]["optimal_threshold_text"]=optimal_threshold_text
    res[cat]["scores_text_train"]=scores_text
    print(f"Optimal threshold for image similarity search: {optimal_threshold_text}")
    print(f"Maximal F1 of text similarity search: {np.max(F1)}")

    # Test
    metrics, scores_text_test = similarity_search_text_model.test(\
                                X_text_left_test, X_text_right_test,\
                                Y_test, optimal_threshold_text, return_score=True)
    F1_test, Precision_test, Recall_test, Accuracy_test = metrics
    res[cat]["F1_text_test"]=F1_test
    res[cat]["Precision_text_test"]=Precision_test
    res[cat]["Recall_text_test"]=Recall_test
    res[cat]["Accuracy_test"]=Accuracy_test
    res[cat]["scores_text_test"]=scores_text_test
    print("Evaluate text model on test data")
    print(f" F1 = {F1_test}\n Precision = {Precision_test}\n Recall = {Recall_test}\n Accuracy = {Accuracy_test}")
    
    # load image data
    X_train_image_left = load_image_per_ID(X_ID_left_train, image_path, target_size = (32, 32))
    X_train_image_right = load_image_per_ID(X_ID_right_train, image_path, target_size = (32, 32))
    X_test_image_left = load_image_per_ID(X_ID_left_test, image_path, target_size = (32, 32))
    X_test_image_right = load_image_per_ID(X_ID_right_test, image_path, target_size = (32, 32))
    # Resize images
    X_train_image_left = X_train_image_left.reshape((len(X_train_image_left), 1, 32, 32, 3))
    X_train_image_right = X_train_image_right.reshape((len(X_train_image_right), 1, 32, 32, 3))
    X_test_image_left = X_test_image_left.reshape((len(X_test_image_left), 1, 32, 32, 3))
    X_test_image_right = X_test_image_right.reshape((len(X_test_image_right), 1, 32, 32, 3))
    
    # Load embedding model
    img_model = ImageClassifier(f"{model_path}embedding_{cat}", load=True) 
    # Fit
    Precision, Recall, interpolated_precision,\
    F1, optimal_threshold_img, scores_img = img_model.train(X_train_image_left, X_train_image_right, Y_train)
    print(f"Optimal threshold for image similarity search: {optimal_threshold_img}")
    print(f"Maximal F1 of text similarity search: {np.max(F1)}")
    res[cat]["Precision_img_train"]=Precision
    res[cat]["Recall_img_train"]=Recall
    res[cat]["interpolated_precision_img_train"]=interpolated_precision
    res[cat]["F1_img_train"]=F1
    res[cat]["optimal_threshold_img"]=optimal_threshold_img
    res[cat]["scores_img_train"]=scores_img
    # Test
    metrics, scores_img_test = img_model.test(X_test_image_left,\
                                    X_test_image_right,\
                                    Y_test, optimal_threshold_img, return_score=True)
    F1_test, Precision_test, Recall_test, Accuracy_test = metrics
    res[cat]["F1_img_test"]=F1_test
    res[cat]["Precision_img_test"]=Precision_test
    res[cat]["Recall_img_test"]=Recall_test
    res[cat]["Accuracy_test"]=Accuracy_test
    res[cat]["scores_img_test"]=scores_img_test
    print("Evaluate image model on test data")
    print(f" F1 = {F1_test}\n Precision = {Precision_test}\n Recall = {Recall_test}\n Accuracy = {Accuracy_test}")   
    
    # Test both
    mixed_model = MixClassifier(text_model=similarity_search_text_model, image_model=img_model)
    max_f1, coef_text, coef_image, opt_theta, scores = find_optimal_coef(scores_text, scores_img, Y_train)
    res[cat]["max_f1_mixed"]=max_f1
    res[cat]["coef_text"]=coef_text
    res[cat]["coef_image"]=coef_image
    res[cat]["opt_theta"]=opt_theta
    res[cat]["scores_train"]=scores
    print("Optimal values:")
    print(f"Maximal F1 = {max_f1}")
    print(f"Coefficient text = {coef_text}")
    print(f"Coefficient image = {coef_image}")
    print(f"Threshold = {opt_theta}")
    F1, Precision, Recall, Accuracy, score_text, score_image, scores_test =  mixed_model.test_combine_model(\
                            X_text_left_test, X_text_right_test, \
                            X_test_image_left,\
                            X_test_image_right,\
                            coef_text, coef_image, opt_theta, Y_test, return_score=True)
    res[cat]["F1_mixed"]=F1
    res[cat]["Precision_mixed"]=Precision
    res[cat]["Recall_mixed"]=Recall
    res[cat]["Accuracy_mixed"]=Accuracy
    res[cat]["scores_test_mixed"]=scores_test
    print("Evaluate mixed model on test data")
    print(f" F1 = {F1}\n Precision = {Precision}\n Recall = {Recall}\n Accuracy = {Accuracy}")  

In [None]:
df = pd.DataFrame(columns=["category"]+list(res["art"].keys()))
for k,v in res.items():    
    df.loc[len(df)] = [k]+list(v.values())
df.to_pickle("Result/tfidf_cnn_multimodalbaseline.pkl")

# Evaluate Ditto per category

Requisites: Ditto model was trained and tested on the generated test dataset. The output was then saved into Result folder.

In [None]:
def map_ditto_output(input_filename, output_filename):
    preds=[]
    scores=[]
    labels=[]
    with jsonlines.open(output_filename) as fh:
        for line in fh:
            pred = int(line["match"])
            preds.append(pred)
            if pred==1:
                scores.append(line["match_confidence"])
            else:
                scores.append(1-line["match_confidence"])
    mapper = {}
    with jsonlines.open(input_filename) as fh:
        for i, line in enumerate(fh):
            ID1 = re.search(f'https://www.amazon.com/dp/(.+)', line['ID1']).group(1)
            ID2 = re.search(f'https://www.amazon.com/dp/(.+)', line['ID2']).group(1)
            mapper[(ID1, ID2)] = [line["label"], scores[i], preds[i]]
            labels.append(line["label"])
    return mapper

In [None]:
mapper_test = map_ditto_output(f"{ditto_input_path}test.jsonl", "Result/ditto_output_test.jsonl")
mapper_train = map_ditto_output(f"{ditto_input_path}train.jsonl", "Result/ditto_output_train.jsonl")

In [None]:
cats = ['art', 'pet', 'home', 'garden', 'sport', 'toy', 'tool']
res = {}

for cat in cats:
    res[cat]={}
    X_ID_left_train, X_ID_right_train, X_text_left_train, X_text_right_train, Y_train = get_data(input_path+f"{cat}_train.jsonl")
    X_ID_left_test, X_ID_right_test, X_text_left_test, X_text_right_test, Y_test = get_data(input_path+f"{cat}_test.jsonl")
    Y_train = np.asarray(Y_train)
    Y_test = np.asarray(Y_test)
    ditto_scores_train=[]; ditto_preds_train=[]
    ditto_scores_test=[]; ditto_preds_test=[]
    for i in range(len(X_ID_left_test)):
        (ID1, ID2) = X_ID_left_test[i], X_ID_right_test[i]
        ditto_preds_test.append(mapper_test[(ID1, ID2)][2])        
        ditto_scores_test.append(mapper_test[(ID1, ID2)][1])
    for i in range(len(X_ID_left_train)):
        (ID1, ID2) = X_ID_left_train[i], X_ID_right_train[i]
        ditto_preds_train.append(mapper_train[(ID1, ID2)][2])        
        ditto_scores_train.append(mapper_train[(ID1, ID2)][1])
    res[cat]["ditto_scores_train"] = ditto_scores_train
    res[cat]["ditto_preds_train"] = ditto_preds_train
    res[cat]["Y_train"] = Y_train
    res[cat]["ditto_scores_test"] = ditto_scores_test
    res[cat]["ditto_preds_test"] = ditto_preds_test
    res[cat]["Y_test"] = Y_test

In [None]:
df = pd.DataFrame(columns=['Category', "ditto_scores_train", "ditto_preds_train", "Y_train",\
                           "ditto_scores_test","ditto_preds_test", "Y_test"])
for cat in cats:
    df.loc[len(df)] = [cat, res[cat]["ditto_scores_train"], res[cat]["ditto_preds_train"], res[cat]["Y_train"],\
                      res[cat]["ditto_scores_test"], res[cat]["ditto_preds_test"], res[cat]["Y_test"]]
df.to_pickle("ditto_result_per_cat.pkl")

# Evaluate multi-modal model

In [None]:
# result of image model
df_img = pd.read_pickle("Result/tfidf_cnn_multimodalbaseline.pkl")
# result of ditto model
df_ditto = pd.read_pickle("Result/ditto_result_per_cat.pkl")

In [None]:
def eval_simple(labels, preds):
    F1 = f1_score(labels, preds)
    Precision = precision_score(labels, preds)
    Recall = recall_score(labels, preds)
    Accuracy = accuracy_score(labels, preds)
    return F1, Precision, Recall, Accuracy

In [None]:
cats = ['art', 'pet', 'home', 'garden', 'sport', 'toy', 'tool']
res={}
for i,cat in enumerate(cats):
    print("category: ", cat)
    res[cat]={}
    scores_ditto, scores_img = np.asarray(df_ditto.loc[i]["ditto_scores_test"]), df_img.loc[i]["scores_img_test"]
    Y_test = df_ditto.loc[i]["Y_test"]
    max_f1, coef_text, coef_image, opt_theta, scores = find_optimal_coef(scores_ditto, scores_img, Y_test)
    res[cat]["max_f1_mixed"]=max_f1
    res[cat]["coef_text"]=coef_text
    res[cat]["coef_image"]=coef_image
    res[cat]["opt_theta"]=opt_theta
    res[cat]["scores_ditto"]=scores_ditto
    res[cat]["scores_img"]=scores_img
    res[cat]["scores_mixed"]=scores
    res[cat]["Y_test"]=Y_test
    scores_test = scores_ditto*coef_text + scores_img*coef_image
    F1, Precision, Recall, Accuracy = evaluate(Y_test, scores_test, opt_theta)
    
    F1_ditto_test, Precision_ditto_test, Recall_ditto_test, Accuracy_ditto_test = eval_simple(Y_test, df_ditto.loc[i]["ditto_preds_test"])
    F1_text_test,F1_img_test, F1_mixed_text_img_test = df_img.loc[i]['F1_text_test'],\
                                                    df_img.loc[i]['F1_img_test'],df_img.loc[i]['F1_mixed']
    res[cat]["F1_mixed_ditto_img_test"]=F1
    res[cat]["Precision_mixed_ditto_img_test"]=Precision
    res[cat]["Recall_mixed_ditto_img_test"]=Recall
    res[cat]["Accuracy_mixed_ditto_img_test"]=Accuracy
    res[cat]["scores_test_mixed_ditto_img_test"]=scores_test
    
    res[cat]["F1_ditto_test"]=F1_ditto_test
    res[cat]["Precision_ditto_test"]=Precision_ditto_test
    res[cat]["Recall_ditto_test"]=Recall_ditto_test
    res[cat]["Accuracy_ditto_test"]=Accuracy_ditto_test
    
    res[cat]["F1_text_test"]=F1_text_test
    res[cat]["F1_img_test"]=F1_img_test
    res[cat]["F1_mixed_text_img_test"]=F1_mixed_text_img_test
    print("Optimal values:")
    print(f"Maximal F1 = {max_f1}")
    print(f"Coefficient text = {coef_text}")
    print(f"Coefficient image = {coef_image}")
    print(f"Threshold = {opt_theta}")


In [None]:
df = pd.DataFrame(columns=["category"]+list(res["art"].keys()))
for k,v in res.items():    
    df.loc[len(df)] = [k]+list(v.values())
df.to_pickle("Result/ditto_cnn_multimodalbaseline.pkl")

# Results of all models

#### Table 1 left

In [3]:
df = pd.read_pickle("Result/result_all_models.pkl")
df = df.rename(columns={"F1_text_test": "tf-idf", "F1_img_test": "Siamese CNN", "F1_ditto_test": "Ditto", 
                   "F1_mixed_text_img_test": "Multi-modal baseline", "F1_mixed_ditto_img_test": "Multi-modal model"})
df[['category','tf-idf', 'Siamese CNN', 'Ditto', 'Multi-modal baseline', 'Multi-modal model']]

Unnamed: 0,category,tf-idf,Siamese CNN,Ditto,Multi-modal baseline,Multi-modal model
0,art,0.880325,0.885375,0.947791,0.965235,0.979839
1,pet,0.640693,0.582781,0.743875,0.680244,0.754717
2,home,0.656904,0.588921,0.840764,0.691511,0.864035
3,garden,0.707566,0.585714,0.829365,0.742972,0.825462
4,sport,0.794926,0.636364,0.836292,0.829569,0.873418
5,toy,0.654397,0.57223,0.808853,0.697769,0.822511
6,tool,0.653061,0.535613,0.831224,0.644,0.840708


#### Table 1 right

In [4]:
df = pd.read_pickle("Result/result_all_models.pkl")
df = df.rename(columns={"coef_text": "β", "coef_image": "1-β"})
df[["category", "β", "1-β"]]

Unnamed: 0,category,β,1-β
0,art,0.47,0.53
1,pet,0.84,0.16
2,home,0.66,0.34
3,garden,0.72,0.28
4,sport,0.65,0.35
5,toy,0.67,0.33
6,tool,0.63,0.37
