# Human Scoring Results - Analyzer

In [None]:
import pandas as pd
from pathlib import Path

excel_path = Path("Weed-VLM Data Set/Comprehensive_WeedVLM.xlsx")  # <- change this to your actual file name
sheet_name = "Gemini_Flash_2.5"  # can be a sheet name (str) or index (int), e.g. "Sheet1" or 0

print(f"Configured Excel path: {excel_path.resolve()}")
print(f"Sheet name: {sheet_name}")


Configured Excel path: D:\Khalifa University 2024\Conferences - Research\Al-Ain 2025\Weed-VLM Data Set\Comprehensive_WeedVLM.xlsx
Sheet name: Gemini_Flash_2.5


In [None]:
# --- After reading Excel ---
df = pd.read_excel(excel_path, sheet_name=sheet_name)
df = df.iloc[0:441].copy()  # keep rows 2..442

# Show header info
print("Column headers (in order):")
for i, col in enumerate(df.columns):
    print(f"{i+1:>3}: {col}")


# Normalize AB column to string for robust matching
if "AB" in df.columns:
    ab_series = df["AB"].astype(str).str.strip().str.lower()
else:
    # If your file does not use lettered headers, try positional fallback:
    # AB is the 28th Excel column (A=1 -> AB=28). Zero-based index => 27.
    # We attempt to create a temporary 'AB' view from that position.
    try:
        ab_series = df.iloc[:, 27].astype(str).str.strip().str.lower()
        print("Note: 'AB' not found by name. Used positional column 28 (index 27) as fallback.")
    except Exception as e:
        raise KeyError("Could not find 'AB' by name or position. "
                       "Please ensure your dataset has a column named 'AB' or that AB is the 28th column.") from e

mask_yes = ab_series.eq("yes")

# Determine the 5-score columns: W..AA
# Preferred: use named headers 'W','X','Y','Z','AA' if present
preferred_cols = ["W", "X", "Y", "Z", "AA"]
available_preferred = [c for c in preferred_cols if c in df.columns]

if len(available_preferred) == 5:
    score_cols = available_preferred
else:
    # Fallback to positional slice if letter headers aren't present.
    # Excel letters: W=23rd, X=24th, Y=25th, Z=26th, AA=27th (1-based)
    # Zero-based indices: 22..26
    try:
        score_cols = list(df.columns[22:27])
        if len(score_cols) != 5:
            raise IndexError("Positional slice did not yield 5 columns.")
        print(f"Note: Using positional columns W..AA -> indices 22..26 mapped to {score_cols}")
    except Exception as e:
        raise KeyError("Could not determine columns W..AA by name or position. "
                       "Ensure your data either has headers named W..AA or that these are columns 23..27.") from e

# Filter and compute means
filtered = df.loc[mask_yes, score_cols]
means = filtered.mean(numeric_only=True)  # skips non-numeric columns automatically

print("Count of rows where AB == 'Yes':", mask_yes.sum())
print("\nColumn means for W..AA (based on AB == 'Yes'):\n")
display(means.to_frame("mean").T)


Column headers (in order):
  1: #
  2: Image Name
  3: Image
  4: Weed Visualizer
  5: Weed Detection (GT)
  6: Weed Location (GT)
  7: Crop Growth (GT)
  8: Crop Type (GT)
  9: Gemini_Response
 10: Weed Detection (Prediction)
 11: Weed Location (Prediction)
 12: Crop Growth (Prediction)
 13: Crop Type (Prediction)
 14: Reasoning (Prediction)
 15: V2_Weed Detection (Prediction)
 16: V2_Weed Location (Prediction)
 17: V2_Crop Growth (Prediction)
 18: V2_Crop Type (Prediction)
 19: V2_Reasoning (Prediction)
 20: Weed Detection Penalty (Prompt Entry)
 21: Penalty Details
 22: Human Review Score
 23: Grounding
(0-5)
 24: Specificity
(0-5)
 25: Plausibility
(0-5)
 26: Non-Hallucination
(0-5)
 27: Actionability
(0-5)
 28: Original Prompt
 29: EPP Prompt
Note: 'AB' not found by name. Used positional column 28 (index 27) as fallback.
Note: Using positional columns W..AA -> indices 22..26 mapped to ['Grounding\n(0-5)', 'Specificity\n(0-5)', 'Plausibility\n(0-5)', 'Non-Hallucination\n(0-5)', 'Ac

Unnamed: 0,Grounding\n(0-5),Specificity\n(0-5),Plausibility\n(0-5),Non-Hallucination\n(0-5),Actionability\n(0-5)
mean,2.243161,2.066869,4.537994,4.893617,2.620061


### Version 2.0

In [None]:
import pandas as pd
from pathlib import Path

# --- Config ---
excel_path = Path("Weed-VLM Data Set/Comprehensive_WeedVLM.xlsx")  # <- change this to your actual file name
sheet_name = "ChatGPT_4o"  # can be a sheet name (str) or index (int), e.g. "Sheet1" or 0


# --- Load & Trim ---
df = pd.read_excel(excel_path, sheet_name=sheet_name)
df = df.iloc[0:441].copy()  # Excel rows 2–442

# --- Filter ---
mask = df["Original Prompt"].astype(str).str.strip().str.lower().eq("yes")

# --- Score Columns ---
score_cols = [
    "Grounding\n(0-5)",
    "Specificity\n(0-5)",
    "Plausibility\n(0-5)",
    "Non-Hallucination\n(0-5)",
    "Actionability\n(0-5)"
]

# --- Clean and Convert ---
filtered = df.loc[mask, score_cols].copy()
for c in score_cols:
    filtered[c] = (
        filtered[c].astype(str)
        .str.replace(",", "", regex=False)
        .str.replace("%", "", regex=False)
        .str.strip()
    )
    filtered[c] = pd.to_numeric(filtered[c], errors="coerce")

# --- Compute Means ---
means = filtered.mean(numeric_only=True)
print(f"Count of rows where Original Prompt == 'Yes': {mask.sum()}\n")
print("Column means:\n")
display(means.to_frame("mean").T)


Count of rows where Original Prompt == 'Yes': 198

Column means:



Unnamed: 0,Grounding\n(0-5),Specificity\n(0-5),Plausibility\n(0-5),Non-Hallucination\n(0-5),Actionability\n(0-5)
mean,1.373737,1.257576,4.176768,4.949495,1.914141
