In [23]:
from pathlib import Path
import pandas as pd
import json, re
import numpy as np

# === config ===
quizPath = Path(r"C:\Users\Panos\OneDrive - nyu.edu\Desktop\CP-lab-2025\quiz5")

responses_path  = quizPath / "quiz5_responses.csv"
answer_key_path = quizPath / "quiz5_answers.json"
roster_path     = quizPath.parent / "studentsList.json"
output_path     = quizPath / "quiz5_grades.csv"

# === normalization helpers ===
_whitespace_re = re.compile(r'\s+')

def norm_text(s: str) -> str:
    if not isinstance(s, str):
        return ""
    s = s.strip()
    s = re.sub(r'\*+$', '', s)
    s = _whitespace_re.sub(' ', s)
    return s.lower()

def norm_netid(raw: str) -> str:
    if not isinstance(raw, str):
        raw = str(raw)
    s = raw.strip().lower()
    if '@' in s:
        s = s.split('@', 1)[0]
    s = _whitespace_re.sub('', s)
    return s

def split_multi(ans) -> list:
    parts = re.split(r'[;,]', '' if ans is None else str(ans))
    return sorted([p.strip().lower() for p in parts if p and p.strip()])

# === load data ===
df = pd.read_csv(responses_path, engine="python")

with open(answer_key_path, "r", encoding="utf-8") as f:
    key_raw = json.load(f)

with open(roster_path, "r", encoding="utf-8") as f:
    roster_list = json.load(f)

# roster dict: netid -> official name
if isinstance(roster_list, dict):
    roster = {norm_netid(k): str(v).strip() for k, v in roster_list.items()}
else:
    roster = {norm_netid(str(r.get("netid", ""))): str(r.get("name", "")).strip()
              for r in roster_list}

# normalize answer key
key_norm = {}
for q_text, correct in key_raw.items():
    qn = norm_text(q_text)
    if isinstance(correct, list):
        key_norm[qn] = sorted([str(c).strip().lower() for c in correct])
    else:
        key_norm[qn] = str(correct).strip().lower()

# map headers
col_map = {norm_text(c): c for c in df.columns}

name_col = (
    col_map.get("full name:") or
    col_map.get("full name")  or
    col_map.get("name")       or
    None
)

netid_col = (
    col_map.get("net id:") or
    col_map.get("net id")  or
    col_map.get("netid")   or
    next((c for c in df.columns if re.search(r'net.?id|email', c, flags=re.I)), None)
)

if not netid_col:
    raise ValueError("Could not locate a NetID/Email column. Please rename accordingly.")

# question columns
qcols = [orig for normed, orig in col_map.items() if normed in key_norm]
n_questions = len(qcols)
if n_questions == 0:
    raise ValueError("No question columns matched the answer key.")

# === grade ===
graded_rows = []
unknown_netids = set()

for _, row in df.iterrows():
    raw_net = row.get(netid_col, "")
    netid = norm_netid(str(raw_net))
    official_name = roster.get(netid, np.nan)

    if pd.isna(official_name):
        unknown_netids.add(str(raw_net).strip())

    submitted_name = str(row.get(name_col, "")).strip() if name_col else ""

    rec = {
        "NetID": netid,
        "Name (official)": official_name,
        "Name (submitted)": submitted_name
    }

    total = 0
    for qc in qcols:
        qn = norm_text(qc)
        student_raw = row.get(qc, "")
        correct = key_norm[qn]

        if isinstance(correct, list):
            stud_list = split_multi(student_raw)
            score = int(stud_list == correct)
        else:
            score = int(str(student_raw).strip().lower() == correct)

        rec[qc] = score
        total += score

    rec["Total"] = total
    rec["Percent Correct"] = round(100 * total / n_questions, 2)
    graded_rows.append(rec)

# === create DataFrame and fill in missing students ===
out_cols = ["NetID", "Name (official)", "Name (submitted)"] + qcols + ["Total", "Percent Correct"]
out_df = pd.DataFrame(graded_rows, columns=out_cols)

# Add missing students (those in roster but not in quiz)
roster_df = pd.DataFrame({
    "NetID": list(roster.keys()),
    "Name (official)": list(roster.values())
})
merged_df = pd.merge(roster_df, out_df, on=["NetID", "Name (official)"], how="left")

# Sort alphabetically by official name
merged_df.sort_values(by="Name (official)", inplace=True, ignore_index=True)

# Save output
merged_df.to_csv(output_path, index=False)

if unknown_netids:
    print("⚠️  These NetIDs/emails weren’t found in the roster:")
    for u in sorted(unknown_netids):
        print(" -", u)
print(f"✅ Saved {output_path.resolve()}")


✅ Saved C:\Users\Panos\OneDrive - nyu.edu\Desktop\CP-lab-2025\quiz5\quiz5_grades.csv
