In [1]:
import os
import base64
from typing import Dict, List
from app.schemas.mark_bisep_subjective_sheet import MarkSubjectiveSheetRequest, MarkSubjectiveSheetResponse, QuestionResponse
from app.database.mongodb import get_answer_sheet
from app.core.extract_pages import extract_pages_from_pdf
from app.core.ocr_answer_sheet import ocr_answer_sheet
from app.core.mark_answer_sheet import mark_answer_sheet
from app.core.crop_answer_sheet import crop_pdf_pages
from app.core.filter_attempted import filter_attempted_questions
from temp_data.sample_request import sample_request as request
from typing import List, Tuple, Optional, Dict, Any
import json

In [2]:
def save_mark_sheet_to_json(mark_sheet: Dict[str, Any], filename: str = "temp_data/mark_sheet.json") -> None:
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(mark_sheet, f, indent=2, ensure_ascii=False)
    print(f"Mark sheet saved to {filename}")

def save_mark_sheet_to_markdown(mark_sheet: Dict[str, Any], filename: str = "temp_data/mark_sheet.md") -> None:
    lines = ["# Mark Sheet\n"]
    for question_number, result in mark_sheet.items():
        lines.append(f"## Question {question_number}\n")
        lines.append(f"**Total Marks Awarded:** {result['marks']}\n")
        lines.append("### Rubric Evaluation:\n")
        for i, (awarded, justification) in enumerate(result['rubrics'], 1):
            lines.append(f"- **Point {i}:** {awarded} marks — {justification}")
        lines.append("\n### Feedback:\n")
        lines.append(f"{result['feedback']}\n")
        lines.append("---\n")
    with open(filename, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))
    print(f"Mark sheet saved as Markdown to {filename}")

def convert_mark_sheet_to_response(mark_sheet: Dict[str, Any]) -> MarkSubjectiveSheetResponse:
    questions: List[QuestionResponse] = []
    for qn_str, data in mark_sheet.items():
        question_number = int(qn_str)
        question_response = QuestionResponse(
            question_number=question_number,
            rubrics_marks=data["rubrics"],
            feedback=data["feedback"],
            presentation_score=data.get("presentation_score", 0),
            grammer_score=data.get("grammer_score", 0.0),
            total_marks=data["marks"]
        )
        questions.append(question_response)
    total_paper_marks = sum(q.total_marks for q in questions)
    return MarkSubjectiveSheetResponse(
        list_of_questions=questions,
        total_paper_marks=total_paper_marks
    )

def write_ocr_to_markdown(ocr_result: dict, output_dir: str):
    os.makedirs(output_dir, exist_ok=True)
    for question_number, content in ocr_result.items():
        question_dir = os.path.join(output_dir, f"question_{question_number}")
        os.makedirs(question_dir, exist_ok=True)
        markdown_lines = content.get("markdown", [])
        markdown_content = "\n\n".join(markdown_lines)
        img_data = content.get('image')
        if img_data:
            image_path = os.path.join(question_dir, 'diagram.png')
            with open(image_path, "wb") as img_file:
                img_file.write(base64.b64decode(img_data))
            markdown_content += f"\n\n![Image](diagram.png)"
        markdown_file_path = os.path.join(question_dir, f"question_{question_number}.md")
        with open(markdown_file_path, "w", encoding="utf-8") as md_file:
            md_file.write(markdown_content)

In [3]:
sheet_stream = get_answer_sheet(request.answer_sheet_id)
print(f"✅ 1. Answer Sheet with id {request.answer_sheet_id} Retrived from database")

✅ 1. Answer Sheet with id biology2 Retrived from database


In [4]:
cropped_sheet_stream = crop_pdf_pages(sheet_stream,page_indices=list(range(3,30)),left=65,right=65,top=130,bottom=130)
print("✅ 2. Answer Sheet cropped")

✅ 2. Answer Sheet cropped


In [5]:
images_dict = extract_pages_from_pdf(cropped_sheet_stream, request)
print("✅ 3. Pages Extracted from pdf")

✅ 3. Pages Extracted from pdf


In [6]:
ocr_result = ocr_answer_sheet(images_dict)
write_ocr_to_markdown(ocr_result,"temp_data/ocr_output")
print("✅ 4. OCR Performed")

ValueError: max() arg is an empty sequence

In [18]:
filter_qns = filter_attempted_questions(ocr_result)
print("✅ 5. Attempted Questions Filtered")

✅ 5. Attempted Questions Filtered


In [19]:
mark_sheet = mark_answer_sheet(ocr_result, request, filter_qns)
print("✅ 6. All Answer Sheet Marked")

✅ 6. All Answer Sheet Marked


In [None]:
save_mark_sheet_to_json(mark_sheet, "temp_data/evaluated_sheet.json")
save_mark_sheet_to_markdown(mark_sheet, "temp_data/evaluated_sheet.md")
response_model = convert_mark_sheet_to_response(mark_sheet)
print("✅ 7. Response Object generated from Marked Sheet")
print(response_model.model_dump_json(indent=2))