In [38]:
import json
import pandas as pd

# -------------------------------
# Helper: Parse compound spans
# -------------------------------
def parse_spans(analysis_json):
    """
    Extract spans as list of tuples (start, end, label) from JSON.
    Handles string or dict input.
    """
    if pd.isna(analysis_json):
        return []
    if isinstance(analysis_json, str):
        try:
            analysis = json.loads(analysis_json)
        except Exception:
            # fallback for single quotes / malformed json
            try:
                analysis = eval(analysis_json)
            except Exception:
                return []
    else:
        analysis = analysis_json

    spans = []
    for c in analysis.get("compounds", []):
        if not isinstance(c, dict):
            continue
        span = c.get("span", [])
        label = str(c.get("label", "")).strip()
        if len(span) < 2:
            continue
        try:
            start = int(span[0])
            end = int(span[1])
            spans.append((start, end, label))
        except Exception:
            continue
    return spans


# -------------------------------
# Helper: Compute metrics for one sentence
# -------------------------------
def span_metrics(pred_spans, gold_spans):
    """
    Compute USS, LSS, EM for one sample.
    """
    gold_unlabeled = {(s, e) for s, e, _ in gold_spans}
    pred_unlabeled = {(s, e) for s, e, _ in pred_spans}

    gold_labeled = set(gold_spans)
    pred_labeled = set(pred_spans)

    # overlaps
    unlabeled_overlap = len(gold_unlabeled & pred_unlabeled)
    labeled_overlap = len(gold_labeled & pred_labeled)

    USS = 0.0 if (len(pred_unlabeled) + len(gold_unlabeled)) == 0 else (
        2 * unlabeled_overlap / (len(pred_unlabeled) + len(gold_unlabeled))
    )
    LSS = 0.0 if (len(pred_labeled) + len(gold_labeled)) == 0 else (
        2 * labeled_overlap / (len(pred_labeled) + len(gold_labeled))
    )
    EM = 1.0 if gold_labeled == pred_labeled else 0.0

    return USS, LSS, EM


# -------------------------------
# Main evaluation function
# -------------------------------
def evaluate_inplace(df, gold_col, pred_col, out_prefix="eval"):
    """
    Compare gold and predicted JSONs row-wise inside a DataFrame.

    Args:
        df: pandas DataFrame with both columns.
        gold_col: column name containing gold JSONs.
        pred_col: column name containing predicted JSONs.
        out_prefix: prefix for new columns.

    Returns:
        summary dict of mean USS, LSS, EM
    """
    if gold_col not in df.columns or pred_col not in df.columns:
        raise ValueError(f"Columns '{gold_col}' or '{pred_col}' not found in DataFrame")

    uss_list, lss_list, em_list = [], [], []

    for _, row in df.iterrows():
        gold_spans = parse_spans(row[gold_col])
        pred_spans = parse_spans(row[pred_col])
        USS, LSS, EM = span_metrics(pred_spans, gold_spans)
        uss_list.append(USS)
        lss_list.append(LSS)
        em_list.append(EM)

    df[f"{out_prefix}_USS"] = uss_list
    df[f"{out_prefix}_LSS"] = lss_list
    df[f"{out_prefix}_EM"] = em_list

    n = len(df)
    summary = {
        "USS": round(sum(uss_list) / n, 4) if n > 0 else 0.0,
        "LSS": round(sum(lss_list) / n, 4) if n > 0 else 0.0,
        "EM": round(sum(em_list) / n, 4) if n > 0 else 0.0
    }

    return summary

In [39]:
import pandas as pd

In [40]:
df = pd.read_csv("/home/shivraj-pg/DEPNECT/DATASETS/without_context_coarse_ashtangrudyam.csv")

In [41]:
df.head()

Unnamed: 0.1,Unnamed: 0,sentence,gold
0,0,रागआदिरोगान् सततअनुषक्तान् अशेषकायप्रसृतान् अश...,"{'tokens': ['राग', 'आदि', 'रोगान्', 'सतत', 'अन..."
1,1,अपूर्ववैद्याय नमः अस्तु तस्मै .,"{'tokens': ['अ', 'पूर्व', 'वैद्याय', 'नमः', 'अ..."
2,2,अथ अतः आयुष्कामीयम् अध्यायम् व्याख्यास्यामः .,"{'tokens': ['अथ', 'अतः', 'आयुष्कामीयम्', 'अध्य..."
3,3,इति ह स्म आहुः आत्रेयआदयः महाऋषयः .,"{'tokens': ['इति', 'ह', 'स्म', 'आहुः', 'आत्रेय..."
4,4,आयुः कामयमानेन धर्मअर्थसुखसाधनम् आयुःवेदउपदेशे...,"{'tokens': ['आयुः', 'कामयमानेन', 'धर्म', 'अर्थ..."


In [42]:
df["model-output"] = df['gold']

In [43]:
df = df.head()

In [44]:
df

Unnamed: 0.1,Unnamed: 0,sentence,gold,model-output
0,0,रागआदिरोगान् सततअनुषक्तान् अशेषकायप्रसृतान् अश...,"{'tokens': ['राग', 'आदि', 'रोगान्', 'सतत', 'अन...","{'tokens': ['राग', 'आदि', 'रोगान्', 'सतत', 'अन..."
1,1,अपूर्ववैद्याय नमः अस्तु तस्मै .,"{'tokens': ['अ', 'पूर्व', 'वैद्याय', 'नमः', 'अ...","{'tokens': ['अ', 'पूर्व', 'वैद्याय', 'नमः', 'अ..."
2,2,अथ अतः आयुष्कामीयम् अध्यायम् व्याख्यास्यामः .,"{'tokens': ['अथ', 'अतः', 'आयुष्कामीयम्', 'अध्य...","{'tokens': ['अथ', 'अतः', 'आयुष्कामीयम्', 'अध्य..."
3,3,इति ह स्म आहुः आत्रेयआदयः महाऋषयः .,"{'tokens': ['इति', 'ह', 'स्म', 'आहुः', 'आत्रेय...","{'tokens': ['इति', 'ह', 'स्म', 'आहुः', 'आत्रेय..."
4,4,आयुः कामयमानेन धर्मअर्थसुखसाधनम् आयुःवेदउपदेशे...,"{'tokens': ['आयुः', 'कामयमानेन', 'धर्म', 'अर्थ...","{'tokens': ['आयुः', 'कामयमानेन', 'धर्म', 'अर्थ..."


In [45]:
df.to_csv("test.csv")

In [46]:
# Run evaluation
summary = evaluate_inplace(df, gold_col="gold", pred_col="model-output", out_prefix="eval")


In [47]:
summary

{'USS': 1.0, 'LSS': 1.0, 'EM': 1.0}

In [48]:
! python3 scores.py /home/shivraj-pg/DEPNECT/Scores/test.csv gold model-output