In [1]:
import spacy
from spacy import displacy

# Load medium English model
nlp = spacy.load("en_core_web_md")

In [3]:
import spacy
import json
import csv
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Load spaCy model
#nlp = spacy.load("en_core_web_md")

# Inference Function
def infer(sentence, target_noun):
    doc = nlp(sentence)
    
    root = [token for token in doc if token.head == token][0]
    
    related_adjs = []
    related_adps = []
    is_part_of_root_noun = False

    target_noun = target_noun.lower().strip()

    for chunk in doc.noun_chunks:
        chunk_text = chunk.text.lower().strip()
        
        if target_noun in chunk_text:
            for token in chunk:
                if token == root or token.head == root:
                    is_part_of_root_noun = True

            for token in chunk:
                for child in token.children:
                    if child.pos_ == "ADJ":
                        related_adjs.append(child.text.lower().strip())
                    if child.pos_ == "ADP":
                        related_adps.append(child.text.lower().strip())
    
    related_adjs = list(set(related_adjs))
    related_adps = list(set(related_adps))

    return {
        "adj": related_adjs,
        "adp": related_adps,
        "part_of_root": is_part_of_root_noun
    }

# Load Ground Truth Data
with open('test_json.json', 'r') as f:
    test_data = json.load(f)

# Containers for Metrics and Reports
true_adj = []
pred_adj = []
adj_report = []

true_adp = []
pred_adp = []
adp_report = []

true_root = []
pred_root = []
root_report = []

# Evaluation Loop
for item in test_data:
    query = item['input']
    gt_nouns = item['nouns']

    for gt in gt_nouns:
        target_noun = gt['noun']
        
        pred = infer(query, target_noun)
        
        gt_adjs = [a.lower().strip() for a in gt['adj']]
        gt_adps = [a.lower().strip() for a in gt['adp']]

        pred_adjs = [a.lower().strip() for a in pred['adj']]
        pred_adps = [a.lower().strip() for a in pred['adp']]

        # Adjective Evaluation & Report
        for adj in gt_adjs:
            true_adj.append(1)
            pred_adj.append(1 if adj in pred_adjs else 0)
            adj_report.append({
                "input_query": query,
                "noun": target_noun,
                "gt_adj": adj,
                "pred_adj": ", ".join(pred_adjs),
                "match": adj in pred_adjs
            })

        # Adposition Evaluation & Report
        for adp in gt_adps:
            true_adp.append(1)
            pred_adp.append(1 if adp in pred_adps else 0)
            adp_report.append({
                "input_query": query,
                "noun": target_noun,
                "gt_adp": adp,
                "pred_adp": ", ".join(pred_adps),
                "match": adp in pred_adps
            })

        # Part of Root Evaluation & Report
        true_root.append(gt['part_of_root'])
        pred_root.append(pred['part_of_root'])
        root_report.append({
            "input_query": query,
            "noun": target_noun,
            "gt_part_of_root": gt['part_of_root'],
            "pred_part_of_root": pred['part_of_root'],
            "match": gt['part_of_root'] == pred['part_of_root']
        })

# Metric Helper
def print_metrics(name, true_list, pred_list):
    if len(true_list) == 0:
        print(f"{name} Metrics: No ground truth labels provided.\n")
        return
    print(f"{name} Metrics:")
    print(f"Precision: {precision_score(true_list, pred_list):.2f}")
    print(f"Recall: {recall_score(true_list, pred_list):.2f}")
    print(f"F1 Score: {f1_score(true_list, pred_list):.2f}")
    print(f"Accuracy: {accuracy_score(true_list, pred_list):.2f}\n")

# 🎯 Output Metrics
print_metrics("Adjective", true_adj, pred_adj)
print_metrics("Adposition", true_adp, pred_adp)
print_metrics("Part of Root", [int(r) for r in true_root], [int(r) for r in pred_root])

# Write CSV Reports
with open("adj_report.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["input_query", "noun", "gt_adj", "pred_adj", "match"])
    writer.writeheader()
    writer.writerows(adj_report)

with open("adp_report.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["input_query", "noun", "gt_adp", "pred_adp", "match"])
    writer.writeheader()
    writer.writerows(adp_report)

with open("part_of_root_report.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["input_query", "noun", "gt_part_of_root", "pred_part_of_root", "match"])
    writer.writeheader()
    writer.writerows(root_report)

print("✅ Reports generated: adj_report.csv, adp_report.csv, part_of_root_report.csv")


Adjective Metrics:
Precision: 1.00
Recall: 0.76
F1 Score: 0.87
Accuracy: 0.76

Adposition Metrics:
Precision: 0.00
Recall: 0.00
F1 Score: 0.00
Accuracy: 0.00

Part of Root Metrics:
Precision: 1.00
Recall: 0.80
F1 Score: 0.89
Accuracy: 0.91

✅ Reports generated: adj_report.csv, adp_report.csv, part_of_root_report.csv


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
