In [None]:
!pip install python-docx

In [None]:
from dotenv import load_dotenv
import pandas as pd

load_dotenv()

from sqlalchemy import create_engine, inspect
import os

# build URL from the locally‑forwarded port
user     = os.getenv("DB_USER")
pw       = os.getenv("DB_PASSWORD")
host     = os.getenv("DB_HOST")
port     = os.getenv("DB_PORT")
db       = os.getenv("DB_NAME")
engine   = create_engine(f"postgresql://{user}:{pw}@{host}:{port}/{db}")

In [None]:
snapshots = pd.read_sql("SELECT * FROM text_snapshots;", engine)
snapshots.head()

In [None]:
surveys = pd.read_sql("SELECT * FROM survey_responses;", engine)
surveys.head()

In [None]:
pid_list = []

with open("pid_accepted.txt", "r") as fle:
    for line in fle:
        pid_list.append(line.strip())
        
len(pid_list), pid_list[0]

In [None]:
essay_prompts = {
    "a1": "Should public schools ban smartphones during the school day, or permit limited use for learning and emergencies?",
    "a2": "Should we bring back extinct species like woolly mammoths using genetic engineering, or leave extinction as a natural boundary that shouldn't be crossed?",
    "a3": "Should large employers require workers to return to the office several days a week, or allow fully remote schedules by default?",
    "b1": "Should parents have the right to genetically edit their unborn children to prevent diseases, or should we ban genetic modifications to preserve natural human diversity?",
    "b2": "Should cities ban gas-powered leaf blowers to reduce noise and pollution, or leave equipment choices to homeowners and landscapers?",
    "b3": "Should the federal government broaden student-loan forgiveness programs, or prioritize other ways of addressing education debt?",
}

essay_prompts

In [None]:
def get_final_essay(pid):
    filtered = snapshots.loc[
        (snapshots["participant_id"] == pid) & 
        (snapshots["type"] == "final") &
        (snapshots["stage"] == "revision")
    ]
    if len(filtered) == 0:
        print("error: no final submission")
        return "error"
    if len(filtered) > 1:
        print("warning: more than one final submission")
    
    return filtered.iloc[0]["text_content"]

print(get_final_essay(pid_list[0]))

In [None]:
import random

n = 50
pid_sample = []

for pid in random.sample(pid_list,n):
    filtered = surveys.loc[surveys["participant_id"] == pid]
    prompt_id = filtered.iloc[0]["prompt_id"]
    data = {
        "pid": pid,
        "prompt_id": prompt_id,
        "essay_prompt": essay_prompts[prompt_id],
        "essay": get_final_essay(pid)
    }
    pid_sample.append(data)

# for data in pid_sample:
#     print(f"ID: {data['pid']}\n\nPROMPT: {data['essay_prompt']}\n\n----\n\n{data['essay']}\n\n----------\n\n")


In [None]:
from docx import Document
from docx.enum.text import WD_BREAK

document = Document()

for data in pid_sample:

    document.add_paragraph(f"ID: {data['pid']}")
    document.add_paragraph(f"PROMPT: {data['essay_prompt']}")
    document.add_paragraph("---")
    document.add_paragraph(data['essay'])
    document.add_paragraph("------------------")

    # Add a page break
    paragraph = document.add_paragraph()
    run = paragraph.add_run()
    run.add_break(WD_BREAK.PAGE)

# Save the document
document.save("essays_for_grading.docx")