## Evaluation Metrics for Ingredients and Cooking Instructions

### Importing Libraries

In [None]:
import evaluate
import glob
import numpy as np
from tqdm import tqdm
import json

### Evaluation Metrics

In [None]:
rouge = evaluate.load('rouge')
bleu = evaluate.load('sacrebleu')

### Loading Ingredients Mapping to Check for Ingredient synonyms (cheddar cheese --> cheese)

In [None]:
with open('ingrs_mapping.json') as json_file:
    original_dict = json.load(json_file)

new_dict = {}

for key, values in original_dict.items():
    for value in values:
        value = value.replace('_', ' ')
        value = value.lower()
        new_dict[value] = key

### Evaluation for Ingredients 

In [None]:
def evaluate_ingredients(actual_file, pred_file, ret_metrics):
    """"Summmary: Calculate the metrics for ingredients
    Args:
        actual_file: path to the file containing actual ingredients
        pred_file: path to the file containing predicted ingredients
        ret_metrics: dictionary containing the metrics
    """
    with open(actual_file, 'r') as f:
        actual_ingredients = f.readlines()
        #remove spaces and newlines
        actual_ingredients = [x.strip() for x in actual_ingredients]

    with open(pred_file, 'r') as f:
        predicted_ingredients = f.readlines()
        #remove spaces and newlines
        predicted_ingredients = [x.strip() for x in predicted_ingredients]
    #if any of the actual_instruction and predicted_instruction are empty, return
    if len (actual_ingredients) == 0 or len(predicted_ingredients) == 0:
        return
    
    if predicted_ingredients[0] == '-1':
        return
    

    #synonym replacement
    for i in range(len(actual_ingredients)):
        if actual_ingredients[i] in new_dict and new_dict[actual_ingredients[i]] in predicted_ingredients:
            actual_ingredients[i] = new_dict[actual_ingredients[i]]

    # print(f'predicted: {predicted_ingredients} \n actual: {actual_ingredients}')

    
    #convert the list of ingredients to a set
    actual_ingredients = set(actual_ingredients)
    predicted_ingredients = set(predicted_ingredients)

    # len(actual_ingredients.intersection(predicted_instruction)) / len(actual_ingredients)

    try:
        #find accuracy
        accuracy = len(actual_ingredients.intersection(predicted_ingredients))/len(actual_ingredients)

        #find precision
        precision = len(actual_ingredients.intersection(predicted_ingredients))/len(predicted_ingredients)

        #find recall
        recall = len(actual_ingredients.intersection(predicted_ingredients))/len(actual_ingredients)

        #find f1 score
        f1 = 2*precision*recall/(precision+recall)

        #find dice score
        dice = 2*len(actual_ingredients.intersection(predicted_ingredients))/(len(actual_ingredients)+len(predicted_ingredients))

        #find IoU
        iou = len(actual_ingredients.intersection(predicted_ingredients))/len(actual_ingredients.union(predicted_ingredients))

        ret_metrics['ingredient_accuracy'].append(accuracy)
        ret_metrics['ingredient_precision'].append(precision)
        ret_metrics['ingredient_recall'].append(recall)
        ret_metrics['ingredient_f1'].append(f1)
        ret_metrics['ingredient_dice'].append(dice)
        ret_metrics['ingredient_iou'].append(iou)
    
    except Exception as e:
        print(e)
        print(f'predicted: {predicted_ingredients} \n actual: {actual_ingredients}')


### Evaluation for cooking instructions

In [None]:
def evaluate_recipe(actual_file, pred_file, ret_metrics):
    """"Summmary: Calculate the metrics for recipe
    Args:
        actual_file: path to the file containing actual recipe
        pred_file: path to the file containing predicted recipe
        ret_metrics: dictionary containing the metrics
    """
    
    with open(actual_file, 'r') as f:
        actual_instruction = ""
        actual_doc = f.readlines()
        for line in actual_doc:
            actual_instruction = actual_instruction + line.strip() + " "

    with open(pred_file, 'r') as f:
        predicted_instruction = ""
        predicted_doc = f.readlines()
        for line in predicted_doc:
            predicted_instruction = predicted_instruction + line.strip() + " "
        if predicted_instruction == '-1 ':
            return
        
    #if any of the actual_instruction and predicted_instruction are empty, return
    if not actual_instruction.strip() or not predicted_instruction.strip():
        return
            
    try:
        rouge_score = rouge.compute(predictions=[predicted_instruction], references=[[actual_instruction]])
        blue_score = bleu.compute(predictions=[predicted_instruction], references = [[actual_instruction]])
        # print(f'predicted: {predicted_instruction} \n actual: {actual_instruction} \n rouge: {rouge_score} \n blue: {blue_score}')
        # pdb.set_trace()
        ret_metrics['recipe_rouge1'].append(rouge_score['rouge1'])
        ret_metrics['recipe_rouge2'].append(rouge_score['rouge2'])
        ret_metrics['recipe_rougeL'].append(rouge_score['rougeL'])
        ret_metrics['recipe_bleu'].append(blue_score['score'])
  
        
    except Exception as e:
        print(e)
        print(f'predicted: {predicted_instruction} \n actual: {actual_instruction}')
    # print(f'ret_metrics: {ret_metrics}')
    

### Running the evaluation for ingredients and cooking instructions of each recipe in test dataset

In [None]:
def evaluate_metrics(GT, PRED):
    """Summary: Calculate the metrics for ingredients and recipe
    Args:
        GT: path to the file containing actual ingredients/recipe
        PRED: path to the file containing predicted ingredients/recipe
        typee: ingredients/recipe
    """
    ret_metrics = {'recipe_bleu': [], 'recipe_rouge1': [], 'recipe_rouge2': [], 'recipe_rougeL': [], 'ingredient_accuracy': [], 'ingredient_precision': [], 'ingredient_recall': [], 'ingredient_f1': [], 'ingredient_dice': [], 'ingredient_iou': []}
    
    GT  = sorted(glob.glob(GT))
    PRED  = sorted(glob.glob(PRED))
    # count  = 0
    for actual_file, pred_file in tqdm(zip(GT, PRED)):
        evaluate_ingredients(actual_file, pred_file, ret_metrics)
        evaluate_recipe(actual_file, pred_file, ret_metrics)

    for k, v in ret_metrics.items():
        if len(v) == 0:
            ret_metrics[k] = 0
        ret_metrics[k] = np.mean(v)
    print("FINAL", ret_metrics)
    
    return ret_metrics


In [None]:
GT_PATH = "GT/ingredients/*txt"
PRED_PATH = "Predicted/ingredients/*"

print(evaluate_metrics(GT_PATH, PRED_PATH))