# **code for both**

In [1]:
import os
import getpass

# **gemini-2.5-flash to transcribe exam**

In [None]:
from google import genai
from tkinter import Tk, filedialog

# =============================
# 1. Ask for API key securely
# =============================
if not os.getenv("GEMINI_API_KEY"):
    os.environ["GEMINI_API_KEY"] = "AIzaSyC_uYzO4pxlE4E4E6jbWRRO2OOIhgHWiEU"

client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])

# =============================
# 2. System Prompt
# =============================
SYSTEM_PROMPT = """
You are an OCR and transcription model for handwritten or printed academic exam work.
Your ONLY task is to convert the student's work from one or more provided images into a continuous text transcript, following the exact formatting rules below.
Do NOT assess correctness or provide feedback.
Do NOT add extra commentary.
Do NOT translate text.
Do NOT add page breaks.

====================
TRANSCRIPTION RULES
====================

1. Problem detection:
   - Problems start with "Q<number>)".
   - If a problem has subparts, write them as "Q<number>) a." (or b., c., etc.).
   - Assume problems start with a visible label in the student's work.
   - Merge all pages/images into one continuous transcript.

2. Step formatting:
   - Use "-" for bullet points.
   - Each bullet may contain:
       • Words in original language (do not translate).
       • Inline LaTeX for math and equations.
       • Mixed words and math are allowed.
   - Inline LaTeX must be enclosed in single dollar signs: `$...$`
   - Multi-line derivations: each line is its own bullet.

3. Diagrams and figures:
   - If identifiable: `[diagram: <best-guess label>]`
       Example: `[diagram: graph (rational function)]`
   - If unsure: `[diagram?]`
   - Never describe the diagram in full, just label it.

4. Uncertain handwriting:
   - If text/equation is uncertain, enclose it in:
       `[uncertain: text](confidence)`
       Example: `[uncertain: boundary was $0$ to $l$?](0.6)`
   - Confidence is one decimal between 0.0 and 1.0.

5. Crossed-out work:
   - Exclude all crossed-out content completely.

6. Mixed languages:
   - Keep text exactly as written, including all punctuation and characters.
   - Keep math notation as LaTeX even if surrounding text is in another language.

7. Fallbacks:
   - If an equation is unreadable, attempt best guess and mark it as uncertain.
   - If no problems detected, output: `No content detected`.

8. Output:
   - Return ONLY the transcription text in plain text, no JSON, no Markdown fences, no commentary.
   - Maintain exact sequence of problems and steps as in the student's work.

====================
EXAMPLES
====================

Example 1:
Q1)
- Given $f(x)=\frac{x^2+5}{x-3}$, simplify.
- Multiply numerator and denominator by $(x-3)$.
- $f(x)=\frac{x^2+5}{x-3}$
- [diagram: graph (rational function)]

Q2) a.
- Solve for $x$: $2x+5=15$.
- Subtract $5$: $2x=10$.
- Divide by $2$: $x=5$.

Example 2:
Q3)
- Compute $\int_0^1 x^2\,dx$.
- Antiderivative: $\frac{x^3}{3}\Big|_0^1$.
- Result: $\frac{1}{3}$.
- [uncertain: boundary was $0$ to $l$?](0.6)

Q4) b.
- La velocidad es $v(t) = 3t^2 + 2$.
- Calcular desplazamiento: $\int_0^5 3t^2 + 2 \, dt$.

====================
END OF RULES
====================

Your job: read the student's work from all provided images, follow these rules exactly, and output the final continuous transcript.
"""

# =============================
# 3. Upload file
# =============================
def upload_file(path: str):
    """Uploads PDF/JPG/PNG to Gemini Files API."""
    return client.files.upload(file_path=path)

# =============================
# 4. Build request parts
# =============================
def parts_with_exam(prompt_text: str, exam):
    """Builds Gemini message parts with text and file(s)."""
    parts = [{"text": prompt_text}]
    if isinstance(exam, list):
        for f in exam:
            parts.append({"file_data": {"file_uri": f.uri, "mime_type": f.mime_type}})
    else:
        parts.append({"file_data": {"file_uri": exam.uri, "mime_type": exam.mime_type}})
    return parts

# =============================
# 5. Main transcription function
# =============================
def transcribe_exam(file_paths):
    """Takes one or more file paths, uploads them, sends to Gemini, and returns transcript."""
    # Upload
    if isinstance(file_paths, str):
        exam_files = upload_file(file_paths)
    else:
        exam_files = [upload_file(p) for p in file_paths]

    # Request
    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=[
            {"role": "system", "parts": [{"text": SYSTEM_PROMPT}]},
            {
                "role": "user",
                "parts": parts_with_exam(
                    "Read all pages/images and output ONLY the transcription text per the rules.",
                    exam_files
                ),
            },
        ],
    )
    return response.text

In [None]:
if __name__ == "__main__":
    Tk().withdraw()  # Hide root window
    selected_file = filedialog.askopenfilename(
        title="Select Exam PDF or Image",
        filetypes=[
            ("PDF files", "*.pdf"),
            ("Image files", "*.jpg *.jpeg *.png"),
            ("All files", "*.*"),
        ]
    )

    if not selected_file:
        print("No file selected. Exiting...")
    else:
        transcript = transcribe_exam(selected_file)
        print("\n--- Transcript ---\n")
        print(transcript)

#------------------WILL FAIL IF USING COLAB BECAUSE ONE IMPORT REQUIRES GUI------------------#

TclError: no display name and no $DISPLAY environment variable

# **gpt-oss-20b to classify the work**

In [None]:
os.environ["OPENAI_API_KEY"] = "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"

In [None]:
!pip install -qU openai

In [None]:
from openai import OpenAI

client = OpenAI(
    base_url = "https://integrate.api.nvidia.com/v1",
    api_key = os.environ["OPENAI_API_KEY"]
)

In [None]:
prompt = "<Fill in later>: " + transcript

response = client.responses.create(
  model="openai/gpt-oss-20b",
  input=[prompt],
  reasoning={"effort" : "high"},
  max_output_tokens=4096,
  top_p=0.7,
  temperature=0.6,
  stream=True
)

reasoning_done = False
for chunk in response:
    if chunk.type == "response.output_text.delta":
        print(chunk.delta, end="")
