Imports

In [1]:
from dotenv import load_dotenv
load_dotenv()
import os
import re
import json
import fitz  
from PIL import Image
from io import BytesIO
from urllib.parse import urlparse
import spacy
import google.generativeai as genai
from dotenv import load_dotenv
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
nlp = spacy.load("en_core_web_sm")

  from .autonotebook import tqdm as notebook_tqdm


Configure Gemini

In [2]:
load_dotenv()
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)
text_model = genai.GenerativeModel("gemini-2.0-flash")
vision_model = genai.GenerativeModel("gemini-2.0-flash")

Rubric

In [3]:
rubric = {
    "architect_chosen": 5,
    "doc_and_slides": 5,
    "bio_750_words": 5,
    "bio_structure": 5,
    "bio_references": 5,
    "10_buildings_with_images": 5,
    "image_quality": 5,
    "image_citations": 5,
    "image_relevance": 5,
    "personal_bio_photo": 5,
    "presentation_polish": 5
}

rubric_descriptions = {
    "architect_chosen": "Is the architect selected from Book Two and clearly identified?",
    "doc_and_slides": "Is the document structured well with table of contents and all required sections?",
    "bio_750_words": "Does the biography meet the 750-word requirement?",
    "bio_structure": "Does the biography cover who they are, where they studied, etc.?",
    "bio_references": "Are there 5–10 APA references with DOIs and citation counts?",
    "10_buildings_with_images": "Are 10 buildings covered with names, locations, significance, and image suggestions?",
    "image_quality": "Are the images high-resolution and well-composed?",
    "image_citations": "Do all images have proper attribution (photographer/source)?",
    "image_relevance": "Do images clearly relate to the architect’s work?",
    "personal_bio_photo": "Is a professional student photo and 1–2 sentence bio included?",
    "presentation_polish": "Is the document polished, well-formatted, and web-publishable?"
}

 Extract text from PDF

In [4]:
pdf_path = "/Users/ziyaozhou/desktop/submissions/dasilvatheo_LATE_171930_14930244_HW A1.pdf"

In [5]:
def extract_text_from_pdf(pdf_path):
    print(f" Extracting text from: {pdf_path}")
    text = ""
    doc = fitz.open(pdf_path)
    for page in doc:
        text += page.get_text()
    print(" Extracted text from PDF")
    return text

 Extract images from PDF

In [6]:
def extract_images_from_pdf(pdf_path, min_width=1200, save_folder="/Users/ziyaozhou/Desktop/XR_Lab/Extracted_images"):
    print(f" Extracting images from: {pdf_path}")
    doc = fitz.open(pdf_path)
    os.makedirs(save_folder, exist_ok=True)
    image_data = []
    for page_index in range(len(doc)):
        page = doc[page_index]
        images = page.get_images(full=True)
        for img_index, img in enumerate(images):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            img_pil = Image.open(BytesIO(image_bytes))
            width, height = img_pil.size
            img_pil.save(os.path.join(save_folder, f"page{page_index+1}_img{img_index+1}.png"))
            image_data.append({
                "page": page_index + 1,
                "width": width,
                "height": height,
                "coordinates": img[1:5],
                "image": img_pil,
                "filename": f"page{page_index+1}_img{img_index+1}.png",
                "is_high_res": width >= min_width
            })
    print(f" Extracted {len(image_data)} images")
    return image_data

Match Image with Nearby Text

In [7]:
def get_caption_candidates(text, image_data):
    print("Scanning for image captions...")
    lines = text.split("\n")
    results = []

    for img in image_data:
        context = {
            "page": img["page"],
            "image": img["filename"],
            "matched_caption": "",
            "has_citation": False,
            "has_building_name": False,
            "has_interior_note": False
        }
        for i, line in enumerate(lines):
            if f"{img['filename'].split('.')[0]}" in line:
                nearby_lines = lines[max(i-2, 0): i+3]
                caption_text = " ".join(nearby_lines)
                context["matched_caption"] = caption_text
                context["has_citation"] = any(x in caption_text.lower() for x in ["source", "http", "photographer"])
                context["has_building_name"] = bool(re.search(r"(building|tower|museum|villa|house|center)", caption_text, re.IGNORECASE))
                context["has_interior_note"] = bool(re.search(r"(interior|lobby|hall|inside)", caption_text, re.IGNORECASE))
                break
        results.append(context)
    return results

Evaluate Image Relevance, Building Match, and Attribution via Gemini

In [8]:
def evaluate_images_with_gemini(image_data, architect_name, debug=False):
    print(" Evaluating image content and relevance using Gemini...")
    enriched_image_feedback = []

    for img in tqdm(image_data, desc="Evaluating images"):
        prompt = f"""
You are reviewing an image submitted for a university architecture project about {architect_name}.
Please analyze this image and answer:

1. Does this image show a building designed by {architect_name}? If yes, specify the building.
2. Is this an interior or exterior shot?
3. Does this image clearly show architectural features (e.g., lighting, geometry, layout)?
4. How relevant is this image for an academic project about {architect_name}?

Give your feedback in the following JSON format:
{{
  "building_detected": "...",
  "interior_or_exterior": "...",
  "relevance_score": "x/10",
  "justification": "...",
  "architectural_features_visible": true/false
}}
"""
        try:
            response = vision_model.generate_content([img["image"], prompt])
            if debug:
                print(f"Image {img['filename']} feedback:\n", response.text)
            cleaned_text = response.text.strip()
            if cleaned_text.startswith("```"):
                cleaned_text = re.sub(r"```(?:json)?", "", cleaned_text)
                cleaned_text = cleaned_text.replace("```", "").strip()

            try:
                data = json.loads(cleaned_text)
            except Exception as e:
                print(f" Still failed to parse JSON from {img['filename']}: {e}")
                data = {
                    "building_detected": "Unknown",
                    "interior_or_exterior": "Unknown",
                    "relevance_score": "5/10",
                    "justification": "Could not parse feedback from Gemini.",
                    "architectural_features_visible": False
                }
        except Exception as e:
            print(f"⚠️ Error processing {img['filename']}: {e}")
            data = {
                "building_detected": "Unknown",
                "interior_or_exterior": "Unknown",
                "relevance_score": "5/10",
                "justification": "Could not extract structured feedback.",
                "architectural_features_visible": False
            }

        data.update({
            "filename": img["filename"],
            "page": img["page"],
            "width": img["width"],
            "height": img["height"],
            "is_high_res": img["is_high_res"]
        })
        enriched_image_feedback.append(data)
    return enriched_image_feedback

Score Image Citations and Structure Compliance

In [9]:
def evaluate_image_structure_and_captions(image_feedback, caption_context):
    print("Evaluating caption presence and structure")

    scores = []
    per_image_feedback = []

    for img in image_feedback:
        caption = next((c for c in caption_context if c["image"] == img["filename"]), {})
        has_citation = caption.get("has_citation", False)
        has_building_name = caption.get("has_building_name", False)
        has_interior_note = caption.get("has_interior_note", False)

        score = 0
        if has_citation: score += 3
        if has_building_name: score += 3
        if has_interior_note: score += 2
        if img["is_high_res"]: score += 2

        scores.append(score)
        per_image_feedback.append({
            "image": img["filename"],
            "page": img["page"],
            "relevance_score": img.get("relevance_score", "5/10"),
            "justification": img.get("justification", ""),
            "caption_found": caption.get("matched_caption", ""),
            "has_proper_caption": score >= 7,
            "score": score
        })

    avg_score = sum(scores) / len(scores) if scores else 0
    return {
        "score": int((avg_score / 10) * rubric["image_citations"]),
        "details": per_image_feedback
    }

Gemini-Based Rubric Evaluation with Chain-of-Thought

In [10]:
def gemini_detailed_rubric_eval(text, architect_name):
    print(" Gemini evaluating full rubric with explanations")

    prompt = f"""
You are evaluating a student's architecture assignment on the architect {architect_name}.

This is a formal submission for university credit. You are receiving the full document as **images**, so you can directly observe the formatting, embedded images, captions, structure, and layout.

---

###  How to Grade:

- Be **fair and constructive**. If formatting is inconsistent, information is missing, or citations are weak, **please call it out clearly**.
- Do not sugarcoat — students are expected to revise based on your feedback.
- If something is strong, note it. If it's flawed, critique it.
- When scoring, **prioritize**:
  - Accuracy of academic citations
  - Caption and image attribution clarity
  - Clear distinction between interior vs exterior images
  - Overall layout and visual professionalism

---

###  Additional Clarifications:

-  Images are embedded (not just links)
-  Captions below images include attribution (URLs or photographer names)
-  A student photo and bio appear on Page 2
-  Table of Contents is present
-  10 buildings are described
-  Redundant links are likely citations, not missing content

---

###  RUBRIC CRITERIA

Please assess each of the following categories. For every criterion:

1. Give a **detailed justification** (1–2 paragraphs)
2. Assign a score **out of 5** based on the detailed rubric below

Format:
**[Category Name]**
Justification: ...
Score: x/5

---

###  Categories and Rubric Anchors

**1. Architect Selection & Scope**
- 5 = Clearly identifies one architect from Book Two, explicitly stated, on-topic
- 3–4 = Identifies Book Two architect, but clarity or justification could improve
- 1–2 = Architect unclear, off-topic, or not from Book Two

**2. Organization & Document Setup**
- 5 = Clear Table of Contents + labeled sections for bio, buildings, refs, student bio
- 3–4 = Minor issues with layout or missing headers
- 1–2 = Poor organization, missing sections, or hard to follow

**3. Biographical Content (750 words)**
- 5 = Covers who they are, achievements, education, significance, 1st building, typologies
- 3–4 = Mostly complete, with slight omissions or light detail
- 1–2 = Underdeveloped or below word count, missing major points

**4. Citation of Architect Biography**
- 5 = 5–10 academic references, correct APA formatting, includes DOIs and citation counts
- 3–4 = APA errors or missing DOIs, but still academically relevant
- 1–2 = Few or no academic references, poor or irrelevant sources

**5. Selection & Quality of Images**
- 5 = 10 buildings, 3+ exterior + 5+ interior per building, high-res
- 3–4 = Most buildings meet criteria; a few lack resolution or quantity
- 1–2 = Many buildings missing images or poor quality

**6. Image Citation & Attribution**
- 5 = Every image has clear, consistent source or photographer citation
- 3–4 = Most are cited but with some inconsistencies
- 1–2 = Citations mostly missing, inconsistent, or improperly formatted

**7. Coverage of 10 Famous Buildings**
- 5 = All 10 named + location + significance statement (1–2 sentences)
- 3–4 = Buildings listed but some lack significance or location
- 1–2 = Several missing or incomplete building descriptions

**8. Image Relevance**
- 5 = All images relate directly to described buildings, match descriptions, show architectural value
- 3–4 = Most images relevant, some generic or misaligned
- 1–2 = Several images are off-topic or not associated with described buildings

**9. Personal Bio & Photo**
- 5 = Professional photo and bio (1–2 sentences), correctly placed after TOC
- 3–4 = Present but minor formatting/image issues
- 1–2 = Photo or bio is low quality, misplaced, or absent

**10. Overall Completeness & Presentation**
- 5 = Fully polished, clean layout, minimal repetition, suitable for web/publication
- 3–4 = Clear submission, but lacks design polish or has formatting repetition
- 1–2 = Sloppy or rushed presentation; visual issues hurt readability

---

 Please start your rubric-based analysis below:
"""

    doc = fitz.open(pdf_path)
    all_pages_as_images = [page.get_pixmap(dpi=300).pil_tobytes("png") for page in doc]
    response = vision_model.generate_content(
         [prompt] + [Image.open(BytesIO(img_bytes)) for img_bytes in all_pages_as_images]
      )

    print(response.text)

    def extract_score(label, out_of):
        match = re.search(label + r".*?Score:\s*(\d+)/" + str(out_of), response.text, re.IGNORECASE | re.DOTALL)
        return int(match.group(1)) if match else 0

    return {
        "architect_chosen": {"score": extract_score("Architect Selection", 5)},
        "doc_and_slides": {"score": extract_score("Organization", 5)},
        "bio_750_words": {"score": extract_score("Biographical Content", 5)},
        "bio_structure": {"score": extract_score("Biographical Structure", 5)},
        "bio_references": {"score": extract_score("Citation of Architect Biography", 5)},
        "10_buildings_with_images": {"score": extract_score("Coverage of 10 Famous Buildings", 5)},
        "image_quality": {"score": extract_score("Selection & Quality of Images", 5)},
        "image_citations": {"score": extract_score("Image Citation & Attribution", 5)},
        "image_relevance": {"score": extract_score("Image Relevance", 5)},
        "personal_bio_photo": {"score": extract_score("Personal Bio", 5)},
        "presentation_polish": {"score": extract_score("Presentation Polish", 5)},
    }

Final Aggregator + Full Diagnostic Report

In [11]:
# def generate_detailed_scorecard(scores, image_caption_details=None):
#     print(" Compiling final scorecard")

#     # Total and max only for defined rubric keys
#     total = sum([scores[k]["score"] for k in scores if k in rubric])
#     max_total = sum([rubric[k] for k in scores if k in rubric])
#     final_percentage = (total / max_total) * 100 if max_total else 0

#     grade = "A" if final_percentage >= 50 else "B" if final_percentage >= 46 else "C" if final_percentage >= 42 else "D"

#     # print(f"Final Grade: {grade} ({round(final_percentage, 2)}%)")
#     rubric_table = pd.DataFrame([
#         {
#             "Criterion": k.replace("_", " ").title(),
#             "Score": scores[k]["score"],
#             "Max": rubric[k],
#             "Description": rubric_descriptions.get(k, "")
#         }
#         for k in rubric if k in scores
#     ])
#     display(rubric_table)
#     if image_caption_details:
#         print("\n Image Caption & Relevance Feedback:")
#         df = pd.DataFrame(image_caption_details["details"])
#         display(df)

#     return {
#         "rubric_scores": {k: scores[k]["score"] for k in rubric if k in scores},
#         "final_percent": round(final_percentage, 2),
#         "grade": grade,
#         "image_feedback_table": image_caption_details
#     }

def generate_detailed_scorecard(scores, image_caption_details=None):
    print(" Compiling final scorecard")

    # Total and max only for defined rubric keys
    total = sum([scores[k]["score"] for k in scores if k in rubric])
    max_total = sum([rubric[k] for k in scores if k in rubric])
    final_percentage = (total / max_total) * 100 if max_total else 0

    grade = "A" if final_percentage >= 50 else "B" if final_percentage >= 46 else "C" if final_percentage >= 42 else "D"

    # print(f"Final Grade: {grade} ({round(final_percentage, 2)}%)")
    rubric_table = pd.DataFrame([
        {
            "Criterion": k.replace("_", " ").title(),
            "Score": scores[k]["score"],
            "Max": rubric[k],
            "Description": rubric_descriptions.get(k, "")
        }
        for k in rubric if k in scores
    ])
    return rubric_table

In [12]:
def extract_references_from_text(text):
    print(" Extracting references from text")
    lines = text.split("\n")
    references = []
    for line in lines:
        if re.search(r"\(\d{4}\)", line) and any(x in line.lower() for x in ["doi", "archdaily", "e-architect", "https://", "http://"]):
            references.append(line.strip())
    return references


In [13]:
def evaluate_biography(text):
    print(" Evaluating biography: checking word count and required sections")
    result = {}
    doc = nlp(text)
    result["word_count"] = len([token.text for token in doc if token.is_alpha])

    required_sections = [
        "who they are",
        "famous for",
        "studied",
        "significance",
        "influence",
        "types of buildings",
        "first building"
    ]

    section_hits = sum([1 for section in required_sections if section.lower() in text.lower()])
    result["structure_score"] = int((section_hits / len(required_sections)) * rubric["bio_structure"])
    result["score"] = rubric["bio_750_words"] if result["word_count"] >= 700 else int((result["word_count"] / 750) * rubric["bio_750_words"])

    return result


In [14]:
def evaluate_image_quality(image_data):
    print(" Evaluating image resolution")
    high_res_count = sum(1 for img in image_data if img["is_high_res"])
    total_images = len(image_data)
    
    quality_score = int((high_res_count / max(1, total_images)) * rubric["image_quality"])
    
    print(f" {high_res_count}/{total_images} images are high resolution")
    return {"high_res_count": high_res_count, "score": quality_score}

Main Runner Pipeline

In [18]:
# def run_autograder_full(pdf_path, architect_name="Bjarke Ingels", debug=False):
#     print("Starting full grading pipeline")
#     text = extract_text_from_pdf(pdf_path)
#     images = extract_images_from_pdf(pdf_path)
#     references = extract_references_from_text(text)
#     image_caption_context = get_caption_candidates(text, images)
#     enriched_feedback = evaluate_images_with_gemini(images, architect_name, debug)
#     image_caption_score = evaluate_image_structure_and_captions(enriched_feedback, image_caption_context)
#     gemini_scores = gemini_detailed_rubric_eval(text, architect_name)
#     manual_scores = {}
#     renamed_gemini = {
#         "architect_chosen": gemini_scores["architect_chosen"],
#         "doc_and_slides": gemini_scores["doc_and_slides"],
#         "personal_bio_photo": gemini_scores["personal_bio_photo"],
#         "presentation_polish": gemini_scores["presentation_polish"]
#     }
#     all_scores = {**manual_scores, **renamed_gemini}
#     result = generate_detailed_scorecard(all_scores, image_caption_score)
#     return result

def run_autograder_full(pdf_path, architect_name="Bjarke Ingels", debug=False):
    print("Starting full grading pipeline")
    text = extract_text_from_pdf(pdf_path)
    images = extract_images_from_pdf(pdf_path)
    references = extract_references_from_text(text)
    image_caption_context = get_caption_candidates(text, images)
    enriched_feedback = evaluate_images_with_gemini(images, architect_name, debug)
    image_caption_score = evaluate_image_structure_and_captions(enriched_feedback, image_caption_context)
    gemini_scores = gemini_detailed_rubric_eval(text, architect_name)
    manual_scores = {}
    renamed_gemini = {
        "architect_chosen":           gemini_scores["architect_chosen"],
        "doc_and_slides":             gemini_scores["doc_and_slides"],
        "bio_750_words":              gemini_scores["bio_750_words"],
        "bio_structure":              gemini_scores["bio_structure"],
        "bio_references":             gemini_scores["bio_references"],
        "10_buildings_with_images":   gemini_scores["10_buildings_with_images"],
        "image_quality":              gemini_scores["image_quality"],
        "image_citations":            gemini_scores["image_citations"],
        "image_relevance":            gemini_scores["image_relevance"],
        "personal_bio_photo":         gemini_scores["personal_bio_photo"],
        "presentation_polish":        gemini_scores["presentation_polish"],
    }
    all_scores = {**manual_scores, **renamed_gemini}
    result = generate_detailed_scorecard(all_scores, image_caption_score)
    return result

Run 

In [19]:
result = run_autograder_full("/Users/ziyaozhou/desktop/submissions/dasilvatheo_LATE_171930_14930244_HW A1.pdf", "Bjarke Ingels", debug=True)

Starting full grading pipeline
 Extracting text from: /Users/ziyaozhou/desktop/submissions/dasilvatheo_LATE_171930_14930244_HW A1.pdf
 Extracted text from PDF
 Extracting images from: /Users/ziyaozhou/desktop/submissions/dasilvatheo_LATE_171930_14930244_HW A1.pdf
 Extracted 76 images
 Extracting references from text
Scanning for image captions...
 Evaluating image content and relevance using Gemini...


Evaluating images:   1%|▏         | 1/76 [00:02<03:11,  2.55s/it]

Image page1_img1.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "N/A",
  "relevance_score": "1/10",
  "justification": "The image shows a person wearing a graduation cap and gown, likely related to a graduation ceremony. It does not depict any architectural structure, let alone one designed by Bjarke Ingels. It is highly irrelevant to an architecture project about Bjarke Ingels.",
  "architectural_features_visible": false
}
```


Evaluating images:   3%|▎         | 2/76 [00:04<02:55,  2.37s/it]

Image page8_img1.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "The image shows the interior of the Cathedral of Brasília, designed by Oscar Niemeyer, not Bjarke Ingels. Thus, it is not relevant to a project about Bjarke Ingels' architecture. Architectural features are clearly visible (roof structure, layout, sculptures), but they're relevant to Niemeyer, not Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:   4%|▍         | 3/76 [00:07<02:52,  2.37s/it]

Image page9_img1.png feedback:
 {"building_detected": "No", "interior_or_exterior": "Interior", "relevance_score": "1/10", "justification": "The building in this image is the Cathedral of Brasilia, designed by Oscar Niemeyer. It has nothing to do with Bjarke Ingels. This image is not relevant for a project on Bjarke Ingels. The image does showcase prominent architectural features, like the geometry of the roof, the use of stained glass, and the layout of the space.", "architectural_features_visible": true}


Evaluating images:   5%|▌         | 4/76 [00:08<02:35,  2.16s/it]

Image page9_img2.png feedback:
 ```json
{
  "building_detected": "Cathedral of Brasília",
  "interior_or_exterior": "exterior",
  "relevance_score": "1/10",
  "justification": "The image shows the Cathedral of Brasília, designed by Oscar Niemeyer, not Bjarke Ingels. Therefore, it is not relevant to a project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:   7%|▋         | 5/76 [00:10<02:17,  1.93s/it]

Image page10_img1.png feedback:
 {"building_detected": "No",
  "interior_or_exterior": "Exterior",
  "relevance_score": "1/10",
  "justification": "This image shows the Cathedral of Brasilia designed by Oscar Niemeyer, not a building by Bjarke Ingels. Therefore, it is not relevant for an academic project about Bjarke Ingels.",
  "architectural_features_visible": true
}


Evaluating images:   8%|▊         | 6/76 [00:11<02:03,  1.77s/it]

Image page10_img2.png feedback:
 ```json
{
  "building_detected": "Brasilia Cathedral",
  "interior_or_exterior": "Exterior",
  "relevance_score": "0/10",
  "justification": "The image shows the Brasilia Cathedral, which was designed by Oscar Niemeyer, not Bjarke Ingels. Therefore, it is irrelevant for a project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:   9%|▉         | 7/76 [00:14<02:17,  1.99s/it]

Image page11_img1.png feedback:
 {"building_detected": "No", "interior_or_exterior": "exterior", "relevance_score": "1/10", "justification": "The image shows the Cathedral of Brasília, designed by Oscar Niemeyer, not Bjarke Ingels. The architectural features (geometry, structural elements) are visible. Thus, the image has low relevance for a project focused on Bjarke Ingels' work.", "architectural_features_visible": true}


Evaluating images:  11%|█         | 8/76 [00:16<02:09,  1.91s/it]

Image page11_img2.png feedback:
 ```json
{
  "building_detected": "São Francisco de Assis Church, Pampulha",
  "interior_or_exterior": "Exterior",
  "relevance_score": "1/10",
  "justification": "The building is the São Francisco de Assis Church, Pampulha, designed by Oscar Niemeyer. This image is irrelevant to a project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  12%|█▏        | 9/76 [00:18<02:10,  1.95s/it]

Image page12_img1.png feedback:
 {"building_detected": "No", "interior_or_exterior": "Interior", "relevance_score": "1/10", "justification": "This image does not depict any recognizable building designed by Bjarke Ingels. The interior space has architectural elements, but they are not associated with Ingels' work. The wall mural and tilework are not typical of his designs.", "architectural_features_visible": true}


Evaluating images:  13%|█▎        | 10/76 [00:21<02:31,  2.30s/it]

Image page12_img2.png feedback:
 {"building_detected": "No", "interior_or_exterior": "Exterior", "relevance_score": "1/10", "justification": "This image does not show a building designed by Bjarke Ingels. The architectural style is different and not representative of his designs. The building appears to be the Pombahila Palace in Brazil designed by Oscar Niemeyer, which is not relevant to a project about Bjarke Ingels.", "architectural_features_visible": true}


Evaluating images:  14%|█▍        | 11/76 [00:23<02:35,  2.40s/it]

Image page13_img1.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Exterior",
  "relevance_score": "1/10",
  "justification": "The building in the image is not designed by Bjarke Ingels. It appears to be the Church of Saint Francis of Assisi in Pampulha, Brazil, designed by Oscar Niemeyer. Thus, the image is not relevant to a project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  16%|█▌        | 12/76 [00:25<02:25,  2.27s/it]

Image page13_img2.png feedback:
 ```json
{
  "building_detected": "Casa da Cultura (House of Culture) in Paraty, Brazil by Studio MK27",
  "interior_or_exterior": "Interior",
  "relevance_score": "2/10",
  "justification": "This image is not relevant to Bjarke Ingels as it shows a building designed by Studio MK27. While it is architecturally interesting and shows interior features, it doesn't align with the stated project's focus.",
  "architectural_features_visible": true
}
```


Evaluating images:  17%|█▋        | 13/76 [00:28<02:32,  2.42s/it]

Image page14_img1.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "2/10",
  "justification": "The image depicts the interior of a church, specifically the Igreja de São Francisco de Assis in Pampulha, Brazil. This church was designed by Oscar Niemeyer, not Bjarke Ingels.  Therefore, it has low relevance for a project focused on Bjarke Ingels. While the image shows the layout, artwork, and seating arrangement, it doesn't align with the architectural subject.",
  "architectural_features_visible": true
}
```


Evaluating images:  18%|█▊        | 14/76 [00:32<02:50,  2.74s/it]

Image page14_img2.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "The image does not depict a building designed by Bjarke Ingels. It shows the interior of what appears to be a building with curved architectural elements. It does not align with the common characteristics of BIG's design.",
  "architectural_features_visible": true
}
```


Evaluating images:  20%|█▉        | 15/76 [00:34<02:40,  2.63s/it]

Image page15_img1.png feedback:
 {"building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "The image does not depict any building designed by Bjarke Ingels. It shows the interior of a building with a gold-tiled wall, which doesn't align with BIG's architectural style. Therefore, the image is not relevant to an academic project about Bjarke Ingels.",
  "architectural_features_visible": true
}


Evaluating images:  21%|██        | 16/76 [00:38<03:04,  3.08s/it]

Image page15_img2.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "This image does not appear to be a building designed by Bjarke Ingels. The architectural style is very different from his designs. The architecture in this picture is reminiscent of Oscar Niemeyer and likely taken inside the Planalto Palace in Brasilia. Therefore, it is not relevant for a project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  22%|██▏       | 17/76 [00:40<02:37,  2.67s/it]

Image page16_img1.png feedback:
 {"building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "This image shows the interior of a library or office space, but it does not appear to be designed by Bjarke Ingels. It is more likely to be a traditional library setting, with a large collection of books and classic furniture. Bjarke Ingels is known for more contemporary and innovative designs.",
  "architectural_features_visible": true
}


Evaluating images:  24%|██▎       | 18/76 [00:44<02:57,  3.07s/it]

Image page16_img2.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "The image depicts an interior space with large windows looking out onto a landscape. The architectural style is not characteristic of Bjarke Ingels' work. It appears to be a modernist building, potentially by Oscar Niemeyer based on the visual cues (columns and curves in the facade). Hence, it is not relevant to a project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  25%|██▌       | 19/76 [00:47<02:51,  3.00s/it]

Image page17_img1.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "This image does not appear to show any building designed by Bjarke Ingels. While it is an interior shot with some architectural features visible, it lacks any discernible elements associated with Ingels' design style. The image depicts a formal room with a long table, chairs, and a unique lighting fixture, but the style is not reminiscent of his projects. Therefore, its relevance to a project about Bjarke Ingels is very low.",
  "architectural_features_visible": true
}
```


Evaluating images:  26%|██▋       | 20/76 [00:49<02:37,  2.81s/it]

Image page17_img2.png feedback:
 ```json
{
  "building_detected": "No. This is the Palácio da Alvorada in Brasília, Brazil designed by Oscar Niemeyer.",
  "interior_or_exterior": "Exterior",
  "relevance_score": "0/10",
  "justification": "This image depicts a building designed by Oscar Niemeyer, not Bjarke Ingels. Therefore, it is completely irrelevant to an academic project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  28%|██▊       | 21/76 [00:51<02:21,  2.58s/it]

Image page18_img1.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Exterior",
  "relevance_score": "1/10",
  "justification": "The building in the image is not designed by Bjarke Ingels. It is the Palácio da Alvorada, designed by Oscar Niemeyer in Brasilia. Therefore, the image is not relevant to an academic project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  29%|██▉       | 22/76 [00:53<02:12,  2.45s/it]

Image page18_img2.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Exterior",
  "relevance_score": "1/10",
  "justification": "The building depicted in the image is not designed by Bjarke Ingels. It appears to be the Palácio da Alvorada in Brasilia, Brazil, designed by Oscar Niemeyer. Therefore, the image is not relevant to an academic project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  30%|███       | 23/76 [00:55<02:00,  2.27s/it]

Image page19_img1.png feedback:
 ```json
{
  "building_detected": "Brazilian National Congress",
  "interior_or_exterior": "exterior",
  "relevance_score": "1/10",
  "justification": "This image shows the Brazilian National Congress in Brasilia, designed by Oscar Niemeyer. It has absolutely no relevance to an architectural project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  32%|███▏      | 24/76 [00:58<02:06,  2.43s/it]

Image page19_img2.png feedback:
 ```json
{
  "building_detected": "National Congress of Brazil",
  "interior_or_exterior": "Exterior",
  "relevance_score": "1/10",
  "justification": "The National Congress of Brazil was designed by Oscar Niemeyer, not Bjarke Ingels. Therefore, it is not relevant to an academic project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  33%|███▎      | 25/76 [01:01<02:17,  2.70s/it]

Image page20_img1.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Exterior",
  "relevance_score": "0/10",
  "justification": "This image depicts the National Congress of Brazil in Brasília, designed by Oscar Niemeyer. It is not related to the work of Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  34%|███▍      | 26/76 [01:04<02:20,  2.81s/it]

Image page20_img2.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "This image shows the interior of the National Congress of Brazil, designed by Oscar Niemeyer. It has nothing to do with Bjarke Ingels. Therefore, the image has extremely low relevance for an academic project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  36%|███▌      | 27/76 [01:08<02:25,  2.96s/it]

Image page21_img1.png feedback:
 {"building_detected": "No", "interior_or_exterior": "Exterior", "relevance_score": "1/10", "justification": "The building in the image is the National Congress of Brazil, designed by Oscar Niemeyer, not Bjarke Ingels. Therefore, the image is not relevant to a project about Bjarke Ingels.", "architectural_features_visible": true}


Evaluating images:  37%|███▋      | 28/76 [01:09<02:06,  2.63s/it]

Image page21_img2.png feedback:
 {"building_detected": "National Congress of Brazil",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "This building was designed by Oscar Niemeyer, not Bjarke Ingels. Therefore it is not relevant.",
  "architectural_features_visible": true
}


Evaluating images:  38%|███▊      | 29/76 [01:12<01:57,  2.51s/it]

Image page22_img1.png feedback:
 ```json
{
  "building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "This image shows the interior of a legislative assembly, specifically the Brazilian National Congress building in Brasília. This building was not designed by Bjarke Ingels. Therefore, it is not relevant to an academic project about his work.",
  "architectural_features_visible": true
}
```


Evaluating images:  39%|███▉      | 30/76 [01:14<01:56,  2.52s/it]

Image page22_img2.png feedback:
 {"building_detected": "No",
  "interior_or_exterior": "Interior",
  "relevance_score": "1/10",
  "justification": "This image shows the interior of a legislative building that is not designed by Bjarke Ingels. It appears to be the National Congress of Brazil in Brasilia. The relevance to an academic project about Bjarke Ingels is very low unless it's used as a contrast to his style.",
  "architectural_features_visible": true
}


Evaluating images:  41%|████      | 31/76 [01:17<02:00,  2.69s/it]

Image page23_img1.png feedback:
 ```json
{
  "building_detected": "Niteroi Contemporary Art Museum (MAC) by Oscar Niemeyer",
  "interior_or_exterior": "exterior",
  "relevance_score": "1/10",
  "justification": "The image shows the Niteroi Contemporary Art Museum, which was designed by Oscar Niemeyer, not Bjarke Ingels. Therefore, it is not relevant for an architecture project about Bjarke Ingels.",
  "architectural_features_visible": true
}
```


Evaluating images:  41%|████      | 31/76 [01:19<01:54,  2.55s/it]


KeyboardInterrupt: 

In [None]:
import pandas as pd
from pathlib import Path

# 1. Your mapping of PDF filenames to chosen architects
pdf_dict = {
    "dasilvatheo_LATE_171930_14930244_HW A1.pdf": "Oscar Niemeyer",
    "davidmatthew_LATE_134808_14949557_COGS 160_ A1.pdf": "Kazuyo Sejima",
    "delacruzrenier_LATE_226065_14930691_Paul Rudolph_ Life, Work, and Enduring Influence.pdf": "Paul Rudolph",
    "emralinolalaine_LATE_162831_14938886_Glenn Murcutt_ “Touching the Earth Lightly”.pdf": "Glenn Murcutt",
    "hsucalvin_166834_14967682_Cogs 160 Slide and Doc Links for Turn-in.pdf": "Frank Lloyd Wright",
    "khirwadkarisha_166304_14925482_Report.pdf": "Ken Yeang",
    "krukjulia_LATE_198551_15046680_Eero Saarinen_ Cogs 160 Research Document.pdf": "Eero Saarinen",
    "liangmichael_188529_14924464_COGS 160 - Docs.pdf": "Zaha Hadid",
    "mainayardaniel_127050_14924649_COGS 160 Le Corbusier Doc.pdf": "Le Corbusier",
    "marvanalicia_212624_14925657_A.Marvan_LuisBarragan.pdf": "Luis Barragan",
    "spavenchristine_LATE_96300_14929508_COGS 160_1 (1).pdf": "Richard Rogers",
    "vidyalatanvi_LATE_218146_14937211_COGS 160 A1-1.docx": "Norman Robert Foster",
    "wucynthia_LATE_167097_15019933_COGS 160.pdf": "Philip Johnson",
    "yangheiman_LATE_190478_14963531_COGS160.pdf": "Sou Fujimoto",
}

def batch_scorecards(pdf_dict, submissions_folder, debug=False):
    submissions_folder = Path(submissions_folder)
    records = []

    for filename, architect in pdf_dict.items():
        pdf_path = submissions_folder / filename
        if not pdf_path.exists():
            print(f"⚠️ Warning: file not found: {pdf_path}")
            continue

        rubric_df = run_autograder_full(str(pdf_path), architect, debug=debug)
        rubric_df = rubric_df.assign(File=filename, Architect=architect)
        records.append(rubric_df)

    long_df = pd.concat(records, ignore_index=True)
    return long_df

submissions_dir = "/Users/ziyaozhou/desktop/submissions"
long_df = batch_scorecards(pdf_dict, submissions_dir, debug=False)
print(long_df.head())

wide_df = long_df.pivot_table(
    index=["File", "Architect"],
    columns="Criterion",
    values="Score"
).reset_index()

wide_df.columns.name = None
print(wide_df.head())


Starting full grading pipeline
 Extracting text from: /Users/ziyaozhou/desktop/submissions/dasilvatheo_LATE_171930_14930244_HW A1.pdf
 Extracted text from PDF
 Extracting images from: /Users/ziyaozhou/desktop/submissions/dasilvatheo_LATE_171930_14930244_HW A1.pdf
 Extracted 76 images
 Extracting references from text
Scanning for image captions...
 Evaluating image content and relevance using Gemini...


Evaluating images:  13%|█▎        | 10/76 [00:19<02:20,  2.13s/it]

 Still failed to parse JSON from page12_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  49%|████▊     | 37/76 [01:27<01:48,  2.79s/it]

 Still failed to parse JSON from page26_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  66%|██████▌   | 50/76 [02:01<01:05,  2.53s/it]

 Still failed to parse JSON from page32_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  78%|███████▊  | 59/76 [02:33<01:11,  4.18s/it]

 Still failed to parse JSON from page37_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  92%|█████████▏| 70/76 [03:05<00:14,  2.47s/it]

 Still failed to parse JSON from page42_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  95%|█████████▍| 72/76 [03:09<00:09,  2.33s/it]

 Still failed to parse JSON from page43_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 76/76 [03:20<00:00,  2.64s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I've reviewed the student's submission on Oscar Niemeyer. Here's my rubric-based analysis:

**1. Architect Selection & Scope**
Justification: The student has selected Oscar Niemeyer, a well-known architect included in Book Two, making the selection appropriate and on-topic. This is explicitly stated as well.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document contains sections for a student bio, buildings, and references. There isn't a Table of Contents, nor is there a dedicated Biography Section. Sections are somewhat labeled (e.g., "Step 1"), but this is not as effective as dedicated headings. The organization could be improved with consistent headings and a TOC.
Score: 3/5

**3. Biographical Content (750 words)**
Justification: A biographical section seems to be present under the heading "Step 3: Bibliography". The writing there covers key aspects of Niemeyer's l

Evaluating images:   6%|▌         | 4/71 [00:11<03:28,  3.12s/it]

 Still failed to parse JSON from page6_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  51%|█████     | 36/71 [01:49<01:36,  2.77s/it]

 Still failed to parse JSON from page24_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  94%|█████████▍| 67/71 [03:41<00:12,  3.21s/it]

 Still failed to parse JSON from page41_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  97%|█████████▋| 69/71 [03:47<00:06,  3.09s/it]

 Still failed to parse JSON from page42_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 71/71 [03:53<00:00,  3.29s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I've reviewed the document. Here's my rubric-based analysis:

**1. Architect Selection & Scope**
Justification: The student has selected Oscar Niemeyer. While the submission does not *explicitly* state this is an architect found in "Book Two," this is an incredibly famous architect, and highly plausible to be found in the required book. Therefore, I'm willing to assume the student is following the instructions.
Score: 4/5

**2. Organization & Document Setup**
Justification: There isn't a Table of Contents, which is a clear failure. The document is split into sections, but the labeling is strange (e.g. "Step 1," "Step 2," "Step 3," "Step 4"). The sections themselves are not consistently labeled or logically organized. It goes straight from Niemeyer's basic information to "10 famous buildings." There's no proper section for the biographical information.
Score: 1/5

**3. Biographical Content (

Evaluating images: 0it [00:00, ?it/s]

Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations





Okay, here's a breakdown of the student's submission based on the provided rubric.

**1. Architect Selection & Scope**
Justification: The student chose Oscar Niemeyer. Based on the prompt ("Book Two"), this is a valid architect. The submission is entirely focused on Niemeyer's work, making it very much on-topic.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document includes a student bio and photo, sections describing buildings, and a reference section. The Table of Contents is missing which is a critical component and there is a weird labeling "Step 1: Personal Information" and so on.
Score: 3/5

**3. Biographical Content (750 words)**
Justification: The biographical content appears at the very end before the references. It likely meets the 750-word requirement. However, it's difficult to ascertain whether it covers all the specified areas (achievements, education, significance, 1st building, typologies) without a close reading. From a scan, it does address achi

Evaluating images:  74%|███████▍  | 23/31 [01:07<00:26,  3.29s/it]

 Still failed to parse JSON from page9_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 31/31 [01:39<00:00,  3.22s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I will evaluate the submission based on the provided rubric and images.

**1. Architect Selection & Scope**
Justification: The student selected Oscar Niemeyer. While there is no explicit claim that he is "from Book Two," Niemeyer is a very famous architect and a reasonable choice. The student focuses entirely on buildings designed by Niemeyer, making the scope appropriate.
Score: 5/5

**2. Organization & Document Setup**
Justification: The organization is unusual but functional. There is no clear table of contents to start. However, there are distinct labeled sections for the student bio, 10 buildings, and bibliography. The student uses "Step" numbers as section headers instead of standard TOC-style labels. It's usable, but very non-standard and therefore loses some points.
Score: 3/5

**3. Biographical Content (750 words)**
Justification: The biographical content appears only on the final 

Evaluating images:   5%|▌         | 5/100 [00:20<06:57,  4.39s/it]

 Still failed to parse JSON from page7_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  29%|██▉       | 29/100 [02:04<05:20,  4.52s/it]

 Still failed to parse JSON from page20_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  30%|███       | 30/100 [02:08<04:48,  4.12s/it]

 Still failed to parse JSON from page21_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  35%|███▌      | 35/100 [02:31<05:17,  4.89s/it]

 Still failed to parse JSON from page23_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  49%|████▉     | 49/100 [03:25<03:18,  3.89s/it]

 Still failed to parse JSON from page31_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  52%|█████▏    | 52/100 [03:34<02:37,  3.28s/it]

 Still failed to parse JSON from page33_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  62%|██████▏   | 62/100 [04:14<02:45,  4.37s/it]

 Still failed to parse JSON from page39_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  64%|██████▍   | 64/100 [04:21<02:23,  3.97s/it]

 Still failed to parse JSON from page40_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  68%|██████▊   | 68/100 [04:35<01:58,  3.70s/it]

 Still failed to parse JSON from page42_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  74%|███████▍  | 74/100 [04:59<01:36,  3.72s/it]

 Still failed to parse JSON from page46_img3.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  86%|████████▌ | 86/100 [05:49<01:10,  5.00s/it]

 Still failed to parse JSON from page49_img3.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  94%|█████████▍| 94/100 [06:20<00:23,  3.88s/it]

 Still failed to parse JSON from page51_img4.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 100/100 [06:43<00:00,  4.03s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I will assess this submission based on the provided rubric.

**1. Architect Selection & Scope**
Justification: The submission focuses on Oscar Niemeyer, who is a well-known architect and would be considered within the scope of "Book Two." The architect is clearly identified at the start of the "Step 2: 10 famous buildings" section, and the entire report is dedicated to his work.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document is structured with clear sections, including the student bio/photo, the discussion of the 10 buildings, and a bibliography. The sections are labeled with headers like "Step 1: Personal Information," "Step 2: 10 famous buildings," "Step 3: Bibliography," and "Step 4: Reference." However, there's no table of contents.
Score: 4/5

**3. Biographical Content (750 words)**
Justification: The biographical content appears in the "Step 3: Bibliograp

Evaluating images:   2%|▏         | 1/52 [00:04<03:57,  4.66s/it]

 Still failed to parse JSON from page3_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  17%|█▋        | 9/52 [00:35<03:02,  4.24s/it]

 Still failed to parse JSON from page11_img3.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  25%|██▌       | 13/52 [00:49<02:20,  3.60s/it]

 Still failed to parse JSON from page13_img3.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  29%|██▉       | 15/52 [00:56<02:11,  3.56s/it]

 Still failed to parse JSON from page14_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  33%|███▎      | 17/52 [01:04<02:10,  3.73s/it]

 Still failed to parse JSON from page15_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  40%|████      | 21/52 [01:20<02:01,  3.91s/it]

 Still failed to parse JSON from page17_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  42%|████▏     | 22/52 [01:27<02:28,  4.94s/it]

 Still failed to parse JSON from page18_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  48%|████▊     | 25/52 [01:42<02:15,  5.02s/it]

 Still failed to parse JSON from page19_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  56%|█████▌    | 29/52 [01:59<01:44,  4.53s/it]

 Still failed to parse JSON from page21_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  58%|█████▊    | 30/52 [02:03<01:37,  4.42s/it]

 Still failed to parse JSON from page22_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  67%|██████▋   | 35/52 [02:21<01:00,  3.58s/it]

 Still failed to parse JSON from page24_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  77%|███████▋  | 40/52 [02:39<00:44,  3.75s/it]

 Still failed to parse JSON from page27_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  88%|████████▊ | 46/52 [03:00<00:21,  3.56s/it]

 Still failed to parse JSON from page30_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  90%|█████████ | 47/52 [03:04<00:17,  3.57s/it]

 Still failed to parse JSON from page31_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 52/52 [03:22<00:00,  3.89s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I will now assess this student's submission based on the provided rubric and images.

**1. Architect Selection & Scope**
Justification: The student chose Oscar Niemeyer. I do not have Book Two available to confirm, but based on his fame and contributions, it is HIGHLY likely he appears there. The selection is definitely on-topic for an architectural assignment.
Score: 5/5

**2. Organization & Document Setup**
Justification: There is an attempt at organization, but it's not well-executed. There's no Table of Contents. There are "Step" labels (Personal Information, 10 Famous Buildings, Bibliography, References) that attempt to delineate sections, but they are inconsistently applied and don't function like true section headers. The "steps" are also numbered strangely, starting with "Step 1: Personal Information" on page 3. The student bio is present with a photo, but it is on page 2 of the doc

Evaluating images:   2%|▏         | 2/128 [00:04<04:57,  2.36s/it]

 Still failed to parse JSON from page3_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  37%|███▋      | 47/128 [02:24<06:01,  4.47s/it]

 Still failed to parse JSON from page20_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  39%|███▉      | 50/128 [02:36<05:41,  4.38s/it]

 Still failed to parse JSON from page21_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  45%|████▍     | 57/128 [02:59<03:57,  3.35s/it]

 Still failed to parse JSON from page23_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  55%|█████▌    | 71/128 [03:46<03:56,  4.15s/it]

 Still failed to parse JSON from page27_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  71%|███████   | 91/128 [04:43<01:48,  2.93s/it]

 Still failed to parse JSON from page33_img5.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  91%|█████████ | 116/128 [06:08<00:37,  3.10s/it]

 Still failed to parse JSON from page40_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 128/128 [06:50<00:00,  3.20s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I will evaluate the student submission based on the provided rubric and images.

**1. Architect Selection & Scope**
Justification: The student selected Oscar Niemeyer. I don't have Book Two to confirm that Niemeyer is in there, but he is a well-known architect and this is likely correct. The architect is clearly stated.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document includes sections for personal information (student bio), 10 buildings and a bibliography. However, there is no table of contents.
Score: 3/5

**3. Biographical Content**
Justification: The student's bio is roughly one page long, and includes basic biographical information. The quality of the writing is not high, and the information is presented as bullet points. The information about Niemeyer is spread across two pages in the "Bibliography" section, and the bio is only roughly 300 words.
Score: 2/5

Evaluating images:  15%|█▌        | 14/91 [00:46<03:44,  2.91s/it]

 Still failed to parse JSON from page9_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  23%|██▎       | 21/91 [01:10<03:37,  3.11s/it]

 Still failed to parse JSON from page13_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  31%|███       | 28/91 [01:35<03:56,  3.75s/it]

 Still failed to parse JSON from page17_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  96%|█████████▌| 87/91 [05:18<00:13,  3.29s/it]

 Still failed to parse JSON from page52_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 91/91 [05:31<00:00,  3.64s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I will evaluate the submission based on the provided rubric.

**1. Architect Selection & Scope**
Justification: The student has selected Oscar Niemeyer. While I don't have access to "Book Two," Oscar Niemeyer is a major architect and the selection is relevant.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document includes labeled sections: Personal Information, 10 famous buildings, Bibliography, and Reference. There is no table of contents. This reduces readability and makes navigating the document more difficult. The stepping of "Step 1" etc is also awkward, instead of using standard headings.
Score: 3/5

**3. Biographical Content (750 words)**
Justification: The document includes biographical information on the architect on page 6. It covers his life, style and design contributions. The content is less than 750 words. It seems to cover most of the requirements in te

Evaluating images:   8%|▊         | 6/79 [00:16<03:28,  2.85s/it]

 Still failed to parse JSON from page7_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  19%|█▉        | 15/79 [00:52<04:04,  3.83s/it]

 Still failed to parse JSON from page12_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  32%|███▏      | 25/79 [01:30<03:30,  3.90s/it]

 Still failed to parse JSON from page18_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  42%|████▏     | 33/79 [02:05<03:27,  4.51s/it]

 Still failed to parse JSON from page23_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  43%|████▎     | 34/79 [02:09<03:24,  4.55s/it]

 Still failed to parse JSON from page23_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  80%|███████▉  | 63/79 [03:48<00:53,  3.37s/it]

 Still failed to parse JSON from page42_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 79/79 [04:33<00:00,  3.46s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I will now provide a detailed rubric-based analysis of the submitted assignment.

**1. Architect Selection & Scope**
Justification: The student chose Oscar Niemeyer, who would have been listed under the broad architecture category of Book Two. This choice aligns with the instructions. Niemeyer is a suitable architect for the assignment.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document lacks a proper Table of Contents. It has labeled sections (Personal Information, Buildings, Bio, References), but the formatting is not professional and the section titles are oddly labelled as "Step 1," "Step 2," etc. which is unconventional for a formal academic submission. The image layout is also problematic (each image appears on its own page).
Score: 2/5

**3. Biographical Content (750 words)**
Justification: The student includes a short biographical summary of Oscar Niemeyer 

Evaluating images:  20%|██        | 1/5 [00:03<00:12,  3.24s/it]

 Still failed to parse JSON from page1_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 5/5 [00:12<00:00,  2.57s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I've reviewed the student's submission on Oscar Niemeyer. Here's my rubric-based analysis:

**1. Architect Selection & Scope**
Justification: The student has correctly identified Oscar Niemeyer, a prominent architect from Book Two. The submission is clearly focused on his work and relevant to the assignment's scope.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document is organized into sections, but there is no table of contents. The sections are labelled, however, the labels don't follow a conventional structure. (e.g. "Step 1: Personal Information", "Step 2: 10 Famous Buildings", "Step 3: Bibliography", "Step 4: Reference")
Score: 2/5

**3. Biographical Content (750 words)**
Justification: The student has included a biographical section. However, the word count is significantly below the required 750 words. The content covers basic information about Oscar Niemeyer 

Evaluating images:  46%|████▋     | 32/69 [01:52<02:18,  3.74s/it]

 Still failed to parse JSON from page19_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  51%|█████     | 35/69 [02:05<02:22,  4.20s/it]

 Still failed to parse JSON from page20_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  70%|██████▉   | 48/69 [02:53<01:37,  4.64s/it]

 Still failed to parse JSON from page28_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  72%|███████▏  | 50/69 [03:01<01:20,  4.26s/it]

 Still failed to parse JSON from page29_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  90%|████████▉ | 62/69 [03:48<00:32,  4.66s/it]

 Still failed to parse JSON from page36_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  91%|█████████▏| 63/69 [03:52<00:27,  4.56s/it]

 Still failed to parse JSON from page37_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  93%|█████████▎| 64/69 [03:57<00:23,  4.76s/it]

 Still failed to parse JSON from page37_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 69/69 [04:18<00:00,  3.75s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I will now assess the student's submission based on the provided rubric.

**1. Architect Selection & Scope**
Justification: The student chose Oscar Niemeyer, who is definitely an architect covered in Book Two. The selection is clearly stated and on-topic.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document is set up with identifiable sections, including a personal bio & photo, buildings, and references. However, the layout is not consistent and is confusing. There isn't a table of contents, the document uses Step 1-4, instead of titles and there isn't clear headings that are easily discernible.
Score: 2/5

**3. Biographical Content (750 words)**
Justification: The biographical content appears to be in the "Step 3: Bibliography" section, which is a mislabeling. The length appears adequate, exceeding the word count. However, it's less about a structured biography and 

Evaluating images: 0it [00:00, ?it/s]

Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations





Okay, I will evaluate this student's submission on Oscar Niemeyer according to the rubric provided.

**1. Architect Selection & Scope**
Justification: The student has clearly selected Oscar Niemeyer, a very appropriate choice from "Book Two". This is explicitly stated in multiple locations. The entire report focuses solely on Niemeyer's work, fulfilling the "on-topic" requirement.
Score: 5/5

**2. Organization & Document Setup**
Justification: The organization is somewhat present, but it's far from clear. There isn't a proper Table of Contents. There are headers, but they are very basic (e.g., "Step 1: Personal Information", "Step 2: 10 famous buildings", "Step 3: Bibliography", "Step 4: Reference") and not descriptive enough to serve as section titles in a TOC. The student bio is present, as are the sections on buildings and references. There is no Architect Bio at all, only a very small snippet introducing his bibliography. A real, dedicated architect biography section is missing.
Sc

Evaluating images:   5%|▍         | 4/81 [00:11<03:32,  2.75s/it]

 Still failed to parse JSON from page4_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  10%|▉         | 8/81 [00:20<02:57,  2.43s/it]

 Still failed to parse JSON from page6_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  44%|████▍     | 36/81 [01:35<01:43,  2.30s/it]

 Still failed to parse JSON from page20_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  56%|█████▌    | 45/81 [01:57<01:27,  2.44s/it]

 Still failed to parse JSON from page24_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  95%|█████████▌| 77/81 [03:40<00:11,  2.96s/it]

 Still failed to parse JSON from page42_img2.png: Expecting value: line 1 column 1 (char 0)


Evaluating images:  98%|█████████▊| 79/81 [03:45<00:05,  2.82s/it]

 Still failed to parse JSON from page43_img1.png: Expecting value: line 1 column 1 (char 0)


Evaluating images: 100%|██████████| 81/81 [03:50<00:00,  2.85s/it]


Evaluating caption presence and structure
 Gemini evaluating full rubric with explanations
Okay, I will evaluate the student's submission based on the provided rubric and images.

**1. Architect Selection & Scope**
Justification: The student chose Oscar Niemeyer, who is a well-known architect. Assuming this architect is included in "Book Two" (which I cannot verify), the selection is appropriate and clearly stated.
Score: 5/5

**2. Organization & Document Setup**
Justification: The document is *partially* organized. There is a rudimentary list of buildings, and a section for the student bio. However, there is no Table of Contents, and there are no clear headers for the biography, building descriptions, or reference sections *in the main body*. The student uses titles such as "Step 1: Personal Information," which is an odd, informal approach. This needs a formal Table of Contents (which is missing) and better section labeling.
Score: 2/5

**3. Biographical Content (750 words)**
Justific

In [None]:
import pandas as pd

def compute_weights_by_variance(long_df, score_col="Score", criterion_col="Criterion"):
    var_series = long_df.groupby(criterion_col)[score_col].var(ddof=0)
    variance_df = var_series.reset_index(name="Variance")
    total_var = variance_df["Variance"].sum()
    if total_var == 0:
        variance_df["Weight"] = 1.0 / len(variance_df)
    else:
        variance_df["Weight"] = variance_df["Variance"] / total_var
    return variance_df.sort_values("Variance", ascending=False).reset_index(drop=True)

def apply_weighted_grades(wide_df, variance_df):
    weight_map = dict(zip(variance_df["Criterion"], variance_df["Weight"]))
    criterion_cols = [c for c in wide_df.columns if c not in ["File", "Architect"]]
    wide_df["WeightedScore"] = 0.0
    for c in criterion_cols:
        weight = weight_map.get(c, 0.0)
        wide_df["WeightedScore"] += (wide_df[c] / 5.0) * weight
    wide_df["WeightedScorePercent"] = wide_df["WeightedScore"] * 100
    return wide_df

variance_df = compute_weights_by_variance(long_df)
wide_df = apply_weighted_grades(wide_df, variance_df)

print(variance_df)
print(wide_df[["File", "Architect", "WeightedScorePercent"]])
wide_df.to_csv('wide_df.csv', index=False)

criterion_cols = [c for c in wide_df.columns if c not in ["File", "Architect", "WeightedScore", "WeightedScorePercent"]]
wide_df['RawScorePercent'] = wide_df[criterion_cols].sum(axis=1) / (5 * len(criterion_cols)) * 100

wide_df['RawRank']      = wide_df['RawScorePercent'].rank(ascending=False, method='dense').astype(int)
wide_df['WeightedRank'] = wide_df['WeightedScorePercent'].rank(ascending=False, method='dense').astype(int)

print(wide_df[['File','Architect','RawScorePercent','RawRank','WeightedScorePercent','WeightedRank']])

wide_df.to_csv('wide_df_with_raw_and_weighted_ranks.csv', index=False)

                   Criterion  Variance    Weight
0              Bio 750 Words  0.698225  0.196013
1         Personal Bio Photo  0.686391  0.192691
2             Doc And Slides  0.556213  0.156146
3             Bio References  0.390533  0.109635
4            Image Citations  0.378698  0.106312
5   10 Buildings With Images  0.366864  0.102990
6              Image Quality  0.236686  0.066445
7            Image Relevance  0.177515  0.049834
8           Architect Chosen  0.071006  0.019934
9              Bio Structure  0.000000  0.000000
10       Presentation Polish  0.000000  0.000000
                                                 File           Architect  \
0          dasilvatheo_LATE_171930_14930244_HW A1.pdf      Oscar Niemeyer   
1   davidmatthew_LATE_134808_14949557_COGS 160_ A1...       Kazuyo Sejima   
2   delacruzrenier_LATE_226065_14930691_Paul Rudol...        Paul Rudolph   
3   emralinolalaine_LATE_162831_14938886_Glenn Mur...       Glenn Murcutt   
4   hsucalvin_166834_149676