In [1]:
import os
import io
import csv
import base64
import openai
from dotenv import load_dotenv
import nbformat
from nbconvert import PDFExporter
from pdf2image import convert_from_path
import tempfile
from docx import Document
from pptx import Presentation
from PIL import Image, ImageDraw
from io import BytesIO
load_dotenv()
HOMEWORK_DIR = os.path.abspath(os.getenv('HOMEWORK_DIR'))
# 確認路徑是否存在
if not os.path.exists(HOMEWORK_DIR):
    raise FileNotFoundError(f"The directory {HOMEWORK_DIR} does not exist. Current path: {HOMEWORK_DIR}")
openai.api_key = os.getenv('OPENAI_API_KEY')

In [2]:
with open('assignment_requirements.txt', 'r', encoding='utf-8') as f:
    assignment_requirements = f.read()

In [3]:
def compress_image(image, max_size_mb=20):
    """Compress image to a maximum size."""
    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    size_kb = len(buffered.getvalue()) / 1024
    
    if size_kb > max_size_mb * 1024:
        scale_factor = (max_size_mb * 1024) / size_kb
        new_width = int(image.width * scale_factor)
        new_height = int(image.height * scale_factor)
        image = image.resize((new_width, new_height), Image.LANCZOS)
        
        buffered = io.BytesIO()
        image.save(buffered, format="PNG")
        
    return buffered
# 定義函數：將 ipynb 轉換為 PDF
def convert_ipynb_to_pdf(ipynb_path):
    try:
        nb = nbformat.read(ipynb_path, as_version=4)
        pdf_exporter = PDFExporter()
        pdf_exporter.template_name = 'classic'
        pdf_data, _ = pdf_exporter.from_notebook_node(nb)
        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tf:
            tf.write(pdf_data)
            pdf_path = tf.name
        return pdf_path
    except Exception as e:
        print(f"無法讀取的檔案: {str(e)}")


# 將 PDF 轉換為 Base64 圖片
def convert_pdf_to_base64_images(pdf_path):
    try:
        images = convert_from_path(pdf_path)
        base64_images = []
        for image in images:
            buffered = compress_image(image)
            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
            base64_images.append(img_base64)
        return base64_images
    except Exception as e:
        print(f"無法轉換的檔案: {str(e)}")


# 將 .docx 轉換為 Base64 圖片
def convert_docx_to_base64_images(docx_path):
    try:
        doc = Document(docx_path)
        base64_images = []
        for paragraph in doc.paragraphs:
            # 將每段文字轉換成圖片的邏輯
            img = Image.new('RGB', (800, 100), color='white')
            d = ImageDraw.Draw(img)
            d.text((10, 10), paragraph.text, fill='black')
            buffered = BytesIO()
            img.save(buffered, format="PNG")
            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
            base64_images.append(img_base64)
        return base64_images
    except Exception as e:
        print(f"無法讀取檔案: {docx_path}, 錯誤: {str(e)}")


# 將 .pptx 轉換為 Base64 圖片
def convert_pptx_to_base64_images(pptx_path):
    try:
        prs = Presentation(pptx_path)
        base64_images = []
        for slide in prs.slides:
            img = Image.new('RGB', (800, 600), color='white')
            d = ImageDraw.Draw(img)
            slide_text = '\n'.join([shape.text for shape in slide.shapes if hasattr(shape, "text")])
            d.text((10, 10), slide_text, fill='black')
            buffered = BytesIO()
            img.save(buffered, format="PNG")
            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
            base64_images.append(img_base64)
        return base64_images
    except Exception as e:
        print(f"無法讀取的檔案: {str(e)}")


# 將圖片轉換為 Base64 字符串
def image_to_base64(image):
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

In [4]:
# 讀取學生檔案並分類
def read_files(student):
    image_base64_list = []  # 用於存放Base64格式的圖片
    text_contents = []  # 用於存放文本內容或錯誤訊息
    
    for text_file in student['texts']:
        text_path = os.path.join(student['path'], text_file)
        
        # texts 型態檔案處理
        if text_file.lower().endswith(('.py', '.java', '.cpp', '.txt')):
            try:
                with open(text_path, 'r', encoding='utf-8') as file_content:
                    text_contents.append(file_content.read())
            except Exception as e:
                print(f"檔案無法讀取: {text_path,str(e)}")
    
    # 處理直接放在 images 資料夾裡的圖片 (.png, .jpg, .jpeg)
    for image_file in student['images']:
        image_path = os.path.join(student['path'], image_file)
        if image_file.lower().endswith('.ipynb'):
            pdf_path = convert_ipynb_to_pdf(image_file)
            if "無法讀取的檔案" in pdf_path:
                text_contents.append(pdf_path)  # 添加錯誤信息
            else:
                base64_images = convert_pdf_to_base64_images(image_path)
                image_base64_list.extend(base64_images)  # 加入Base64圖片列表
        
        elif image_file.lower().endswith('.docx'):
            base64_images = convert_docx_to_base64_images(image_path)
            image_base64_list.extend(base64_images)  # 加入Base64圖片列表
        
        elif image_file.lower().endswith('.pptx'):
            base64_images = convert_pptx_to_base64_images(image_path)
            image_base64_list.extend(base64_images)  # 加入Base64圖片列表
        elif image_file.lower().endswith('.pdf'):
            base64_images = convert_pdf_to_base64_images(image_path)
            image_base64_list.extend(base64_images)  # 加入Base64圖片列表
            
        else:
            try:
                with Image.open(image_path) as img:
                    buffered = BytesIO()
                    img.save(buffered, format="PNG")
                    img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
                    image_base64_list.append(img_base64)
            except Exception as e:
                print(f"圖片檔案無法讀取: {image_path,str(e)}")


    return image_base64_list, text_contents


In [5]:
SystemPrompt = '''
As an impartial and neutral educator, 
your objective is to evaluate students' submitted assignments according to the specified grading criteria. 
For each student, please assign a score out of 100 based on the quality of their work and adherence to the assignment requirements. 
Additionally, provide a brief comment in Traditional Chinese that reflects your assessment and is limited to 10 characters. 
Ensure that your output follows this structured format: "Student ID, score, comment." 
Refrain from including any additional text or information
'''


In [6]:
from langchain_openai import ChatOpenAI

# 定義評分函數，根據檔案類型進行不同處理
def grade_assignment(student, image_base64_list, text_contents):
    messages = [
        {"role": "system", "content": SystemPrompt}
    ]
    
    # 添加作業要求
    messages.append({"role": "user", "content": f"作業要求：\n{assignment_requirements}"})
    
    # 添加學生資訊
    messages.append({"role": "user", "content": f"學生資訊：{student['id']} - {student['name']}"})
    
    # 添加 images 型態的圖片（作業內容）
    for image_base64 in image_base64_list:
        if "無法轉換的檔案" in image_base64:
            messages.append({"role": "user", "content": image_base64})
        else:
            messages.append({
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{image_base64}",
                    }
                },
            ],
        })
    
    # 添加 texts 型態的文本內容（程式碼或錯誤信息）
    for content in text_contents:
        messages.append({"role": "user", "content": content})
    # 調用 OpenAI API
    llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0,max_tokens=20)
    response = llm.invoke(messages)
    text = response.content
    # 解析回應
    lines = text.strip().split(',')
    score = 0
    comment = ''
    studentID = lines[0]
    score = lines[1]
    comment = lines[2]
    return studentID, score, comment


In [7]:
# 遍歷學生資料夾
students = []
step=0
for student_folder in os.listdir(HOMEWORK_DIR):
    student_path = os.path.join(HOMEWORK_DIR, student_folder)
    if os.path.isdir(student_path):
        try:
            student_id, student_name = student_folder.split('_', 1)
        except ValueError:
            continue
        # 如果資料夾名稱以 "無附件" 結尾
        if student_folder.endswith('無附件'):
            students.append({
                'id': student_id,
                'name': student_name,
                'images': [],
                'texts': [],
                'path': student_path,
                'has_attachments': False
            })
        else:
            files = os.listdir(student_path)
            images = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg','.pdf','.ipynb','.docx', '.pptx'))]
            texts = [f for f in files if f.lower().endswith(('.py', '.java', '.cpp',  '.txt',))]
            students.append({
                'id': student_id,
                'name': student_name,
                'images': images,
                'texts': texts,
                'path': student_path,
                'has_attachments': True
            })
    # step+=1
    # if step == 5:
    #     break

In [9]:
# 開始評分
with open('grading_results.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['學號', '姓名', '分數', '評語']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for student in students:
        if not student['has_attachments']:
            # 若無附件，設定分數為 0 並給予評語
            writer.writerow({'學號': student['id'], '姓名': student['name'], '分數': 0, '評語': '未繳交'})
            print({'學號': student['id'], '姓名': student['name'], '分數': 0, '評語': '未繳交'})
        else:
            image_base64_list, text_contents = read_files(student)
            if image_base64_list or text_contents:
                #continue
                # score=100
                # comment = "ok"
                studentID,score, comment = grade_assignment(student, image_base64_list, text_contents)
                print({'學號': student['id'], '姓名': student['name'], '分數': score, '評語': comment})
            else:
                score = 100
                comment = '讀取異常'
                print({'學號': student['id'], '姓名': student['name'], '分數': score, '評語': '讀取異常'})
            writer.writerow({'學號': student['id'], '姓名': student['name'], '分數': score, '評語': comment})

{'學號': '412777343', '姓名': '張仕敬', '分數': ' 85', '評語': ' 表達清晰。'}
{'學號': '411770026', '姓名': '李佩芸', '分數': ' 85', '評語': ' 表達清晰。'}
{'學號': '411770661', '姓名': '郭翰楀', '分數': ' 70', '評語': ' 內容清晰。'}
{'學號': '412777269', '姓名': '江慶澤', '分數': ' 75', '評語': ' 表達清晰。'}
{'學號': '411770844', '姓名': '王志中', '分數': ' 75', '評語': ' 表達清晰。'}
{'學號': '411771107', '姓名': '陳星男', '分數': ' 70', '評語': ' 內容簡單。'}
{'學號': '411770349', '姓名': '黃妤涵', '分數': ' 85', '評語': ' 表達清晰。'}
{'學號': '411770984', '姓名': '王冠曄', '分數': ' 80', '評語': ' 表達清晰。'}
{'學號': '411770042', '姓名': '劉大維', '分數': ' 75', '評語': ' 表達清晰。'}
{'學號': '411770364', '姓名': '黃國維', '分數': ' 40', '評語': ' 不夠完整。'}
{'學號': '411770273', '姓名': '何長均', '分數': ' 75', '評語': ' 表達清晰。'}
{'學號': '411770968', '姓名': '曾煜翔', '分數': ' 75', '評語': ' 表達清晰。'}
{'學號': '412777160', '姓名': '劉亭妤', '分數': ' 75', '評語': ' 內容清晰。'}
{'學號': '411770646', '姓名': '王奕璿', '分數': ' 60', '評語': ' 內容不足。'}
{'學號': '411770406', '姓名': '陳之桓', '分數': ' 75', '評語': ' 內容清晰。'}
{'學號': '411770901', '姓名': '陳毅豪', '分數': ' 75', '評語': ' 表達清晰。'}
{'學號': '

In [10]:
!