In [None]:
def evaluate_model(test_data, actual_data, nlp):
    predicted_data = [(text, {"entities": []}) for text in test_data]
    
    # Generate predictions for all texts
    for j, (text, annotations) in enumerate(predicted_data):
        doc = nlp(text)
        entities = [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]
        predicted_data[j] = (text, {"entities": entities})

    examples = []
    for actual, predicted in zip(actual_data, predicted_data):
        text = actual[0]
        actual_anns = actual[1]
        predicted_anns = predicted[1]

        doc_actual = nlp.make_doc(text)
        example = Example.from_dict(doc_actual, actual_anns)

        doc_predicted = nlp.make_doc(text)
        predicted_spans = []
        for start, end, label in predicted_anns["entities"]:
            span = doc_predicted.char_span(start, end, label=label)
            if span is not None:
                predicted_spans.append(span)
        doc_predicted.ents = predicted_spans
        example.predicted = doc_predicted
        examples.append(example)

    scorer = Scorer()
    metrics = scorer.score(examples)
    
    return metrics
    
def print_metrics(metrics):
    print("Model Metrics:")
    print(f"Token Accuracy: {metrics['token_acc']}")
    print(f"Token Precision: {metrics['token_p']}")
    print(f"Token Recall: {metrics['token_r']}")
    print(f"Token F1 Score: {metrics['token_f']}")
    print(f"Entity Precision: {metrics['ents_p']}")
    print(f"Entity Recall: {metrics['ents_r']}")
    print(f"Entity F1 Score: {metrics['ents_f']}")

    # Prepare data for entity metrics in a table format
    entity_data = []
    for entity, scores in metrics['ents_per_type'].items():
        entity_data.append([entity, scores['p'], scores['r'], scores['f']])
    
    # Print the table
    print(tabulate(entity_data, headers=["Entity", "Precision", "Recall", "F1 Score"], tablefmt="grid"))
    print("\n")

def save_metrics_and_plot(parameter_sets, all_metrics):
    # Checking if the file exists and if it has been written before
    file_exists = os.path.isfile("metrics_summary.csv")
    
    # Open the file and append metrics
    with open("metrics_summary.csv", "a") as f:
        # Only write the header if the file doesn't exist or is empty
        if not file_exists:
            f.write("model,parameter,precision,recall,f1_score\n")

        # Write the metrics for each parameter set
        for i, metric in enumerate(all_metrics):
            f.write(f"model_{i + 1},{parameter_sets[i]},{metric['ents_p']},{metric['ents_r']},{metric['ents_f']}\n")

    # Collect metrics for visualization
    precisions = [metric["ents_p"] for metrics in all_metrics]
    recalls = [metric["ents_r"] for metrics in all_metrics]
    f1_scores = [metric["ents_f"] for metrics in all_metrics]
    
    return precisions, recalls, f1_scores

parameter_sets = [
    {"max_iterations": 40, "initial_lr": 1e-3, "dropout_rate": 0.5, "min_loss_improvement": 7000, "patience": 6, "decay_interval":5},
    {"max_iterations": 35, "initial_lr": 5e-3, "dropout_rate": 0.5, "min_loss_improvement": 7500, "patience": 6, "decay_interval":5},
    {"max_iterations": 35, "initial_lr": 1e-2, "dropout_rate": 0.5, "min_loss_improvement": 5000, "patience": 6, "decay_interval":5},
    {"max_iterations": 35, "initial_lr": 1e-2, "dropout_rate": 0.6, "min_loss_improvement": 6000, "patience": 5, "decay_interval":4}
]

precisions = []
recalls = []
f1_scores = []
all_metrics = []

for i in range(1, 5):
        print(f"Evaluating model: ner_model_{i}")
        model_path = f"ner_model_{i}"
        nlp = spacy.load(model_path)

        metrics = evaluate_model(test_df['test_recipe'].tolist(), actual_data, nlp)

        #Call the custom metrics printing function
        print_metrics(metrics)
        all_metrics.append(metrics)
    
        precision, recall, f1_score = save_metrics_and_plot(parameter_sets, all_metrics)
        
        # Append metrics to lists for plotting
        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1_score)