In [13]:
from PIL import Image, ImageOps
import io
import base64
from pillow_heif import register_heif_opener
from together import Together
import json
import pandas as pd

# Register HEIF opener to support .HEIC images
register_heif_opener()

def calculate_score(student_answers, correct_answers):
    score = 0
    incorrect_questions = {}
    skipped_questions = set(correct_answers.keys()) - set(student_answers.keys())

    for question, correct_answer in correct_answers.items():
        if question in student_answers:
            if student_answers[question].upper() == correct_answer.upper():
                score += 1
            else:
                incorrect_questions[question] = {
                    "Student Answer": student_answers[question],
                    "Correct Answer": correct_answer
                }
    return score, incorrect_questions, skipped_questions

def process_image(image_path):
    try:
        # Open the image
        image = Image.open(image_path)

        # Handle image orientation (fix rotation issues)
        image = ImageOps.exif_transpose(image)

        # Resize the image to a manageable size (optional)
        max_size = 1024  # Max width or height
        image.thumbnail((max_size, max_size))

        # Convert the image to base64
        buffered = io.BytesIO()
        image.save(buffered, format="JPEG")  # Save as JPEG
        img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")

        # Initialize Together client
        client = Together()

        # Define the query
        query = """
        Extract the student's multiple-choice answers from the image and provide them as a valid JSON object. The answers may appear in different vietnamese formats (e.g., Câu 1: A, 1. A, Câu 1 - A, Bài 1: A, etc.).
        Ignore any background noise, blurred lines, or artifacts, and focus only on the clearly written text.
        Treat all formats like Bài 1: A, Câu 1: A, or 1. A as referring to Câu X: A for consistency.
        Ensures alignment between question numbers and answers
        
        Rules:

        Output only the JSON object, with no additional text, explanations, or formatting.
        Do not include backticks, code blocks, or language specifiers.
        Example output:
        {
            "Câu 1": "A",
            "Câu 2": "B",
            "Câu 3": "C",
            "Câu 4": "D"
        }
        Strictly return the JSON object, and nothing else (e.g opening something before json object).
        """

        # Send the image and query to the Together API
        response = client.chat.completions.create(
            model="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo",  # Replace with your desired model
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": query},  # Query
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{img_base64}"  # Base64-encoded image
                            }
                        }
                    ]
                }
            ],
            max_tokens=1000
        )

        # Load student answers from the response
        response_content = response.choices[0].message.content
        print("API Response Content:")
        print(response_content)

        try:
            student_answers = json.loads(response_content.strip())
        except json.JSONDecodeError:
            print("Invalid response format. Please check the input.")
            return

        # Predefined correct answers
        correct_answers = {
            "Câu 1": "D",
            "Câu 2": "C",
            "Câu 3": "D",
            "Câu 4": "C",
            "Câu 5": "C",
            "Câu 6": "C",
            "Câu 7": "A",
            "Câu 8": "A",
            "Câu 9": "B",
            "Câu 10": "B",
            "Câu 11": "C",
            "Câu 12": "D",
            "Câu 13": "B",
            "Câu 14": "B",
            "Câu 15": "B",
            "Câu 16": "A",
            "Câu 17": "B",
            "Câu 18": "A",
            "Câu 19": "C",
            "Câu 20": "C",
            "Câu 21": "B",
            "Câu 22": "A",
            "Câu 23": "B",
            "Câu 24": "D",
            "Câu 25": "B"
        }

        # Calculate the score
        score, incorrect_questions, skipped_questions = calculate_score(student_answers, correct_answers)

        # Display results
        print("\nExam analysis complete!")
        print(f"Student Score: {score}/{len(correct_answers)}")

        # Display summary table
        results = [
            {
                "Question": question,
                "Student Answer": student_answers.get(question, "Skipped"),
                "Correct Answer": correct_answer,
                "Result": "Correct" if student_answers.get(question, "").upper() == correct_answer.upper() else "Incorrect"
            }
            for question, correct_answer in correct_answers.items()
        ]

        results_df = pd.DataFrame(results)
        results_df["Result"] = results_df["Result"].apply(lambda x: "✅ Đúng" if x == "Correct" else "❌ Sai")

        print("\nSummary Table:")
        return results_df

    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
image_path = "test_image.jpeg"  # Replace with your image path

results_df =process_image(image_path)
results_df

API Response Content:
{"Câu 1": "D", "Câu 2": "C", "Câu 3": "D", "Câu 4": "C", "Câu 5": "C", "Câu 6": "A", "Câu 7": "A", "Câu 8": "B", "Câu 9": "B", "Câu 10": "C", "Câu 11": "D", "Câu 12": "B", "Câu 13": "B", "Câu 14": "B", "Câu 15": "A", "Câu 16": "A", "Câu 17": "B", "Câu 18": "A", "Câu 19": "C", "Câu 20": "D", "Câu 21": "B", "Câu 22": "A", "Câu 23": "B", "Câu 24": "D"}

Exam analysis complete!
Student Score: 17/25

Summary Table:


Unnamed: 0,Question,Student Answer,Correct Answer,Result
0,Câu 1,D,D,✅ Đúng
1,Câu 2,C,C,✅ Đúng
2,Câu 3,D,D,✅ Đúng
3,Câu 4,C,C,✅ Đúng
4,Câu 5,C,C,✅ Đúng
5,Câu 6,A,C,❌ Sai
6,Câu 7,A,A,✅ Đúng
7,Câu 8,B,A,❌ Sai
8,Câu 9,B,B,✅ Đúng
9,Câu 10,C,B,❌ Sai


 1. Extract the student's multiple-choice answers from the image and provide them as a valid JSON object. The answers may appear in different Vietnamese formats (e.g., "Câu 1: A", "1. A", "Câu 1 - A", "Bài 1: A", etc.).
2. Treat all formats like "Bài 1: A", "Câu 1: A", "Câu 1 A", "1 A" or "1. A" as referring to "Câu X: A" for consistency.
3. Ignore any background noise, blurred lines, or artifacts, and focus only on clearly legible text.
4. Ensure alignment between question numbers and their corresponding answers, even if the text is spread across multiple columns.
6. Pay special attention to multi-column layouts. Extract answers from all columns and ensure they are correctly mapped to their respective question numbers.
7. If the image contains instructions, headers, or other non-answer text, ignore them completely.
8. If a student crosses out their first choice and writes another option next to it, prioritize the **last clearly written option** as the final answer for that question.
