## 1) 导入 & 配置
- `AI_API_KEY` **只从环境变量读取**（建议写在 `.env`）
- `HOMEWORK_URL` 按需修改

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

import time
import os
import glob
import re

from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

HOMEWORK_URL = "https://next.jinshuju.net/forms/NHaQvT/entries"
MODEL_NAME = "gpt-5-mini"

# 只从环境变量读取（不再允许硬编码）
API_KEY = os.getenv("AI_API_KEY")
BASE_URL = os.getenv("AI_BASE_URL") or "https://api.openai-proxy.org/v1"
DOWNLOAD_DIR = os.path.join(os.getcwd(), "downloads")

SCORING_CRITERIA = """
你是C++作业评分助教，按以下标准评分（满分10分，平均分8分）：
1. 代码逻辑正确性：是否符合作业需求，逻辑无漏洞；
2. 代码规范性：命名规范、缩进整齐、结构清晰；
3. 注释完整性：关键步骤有注释，便于理解；
4. 代码简洁性：无冗余代码，实现高效。
评分输出格式：
第一行：分数（仅数字，例如：8.5）
第二行：简短评语（例如：代码逻辑正确，命名规范，注释完整，建议优化循环结构以提升简洁性）
"""

os.makedirs(DOWNLOAD_DIR, exist_ok=True)
print('DOWNLOAD_DIR =', DOWNLOAD_DIR)
print('AI_API_KEY present =', bool(API_KEY))

DOWNLOAD_DIR = c:\workspace\workspace4python\selenium_operator\downloads
AI_API_KEY present = True


## 2) 函数定义（集中）
把所有函数定义集中在一个代码块里；后面每个代码块只做一步测试，便于逐步调试。

In [None]:
# 所有函数定义集中在此处（建议先运行这一格）

def setup_driver():
    chrome_options = Options()
    prefs = {
        'download.default_directory': DOWNLOAD_DIR,
        'download.prompt_for_download': False,
        'download.directory_upgrade': True,
        'safebrowsing.enabled': True,
    }
    chrome_options.add_experimental_option('prefs', prefs)

    service = Service(ChromeDriverManager().install())
    d = webdriver.Chrome(service=service, options=chrome_options)
    d.implicitly_wait(10)
    return d


def wait_for_grid(driver):
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'ag-root'))
    )
    viewport = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.CLASS_NAME, 'ag-body-viewport'))
    )
    return viewport


def get_visible_rows(driver):
    return driver.find_elements(
        By.XPATH,
        "//div[contains(@class, 'ag-center-cols-container')]//div[@role='row']",
    )


def clear_download_dir():
    for f in glob.glob(os.path.join(DOWNLOAD_DIR, '*')):
        try:
            os.remove(f)
        except Exception:
            pass


def wait_download_complete(timeout=30):
    start = time.time()
    while time.time() - start < timeout:
        files = glob.glob(os.path.join(DOWNLOAD_DIR, '*'))
        complete_files = [p for p in files if not p.endswith('.crdownload')]
        if complete_files:
            return complete_files[0]
        time.sleep(1)
    return None


def download_homework_file(driver, row, row_index):
    clear_download_dir()
    current_row_index = row.get_attribute('row-index')

    try:
        cell = row.find_element(By.XPATH, ".//div[@col-id='field_5']")
    except NoSuchElementException:
        cell = driver.find_element(
            By.XPATH,
            f"//div[@role='row' and @row-index='{current_row_index}']//div[@col-id='field_5']",
        )

    driver.execute_script(
        "arguments[0].scrollIntoView({block: 'center', inline: 'center'});",
        cell,
    )
    time.sleep(0.5)

    try:
        download_link = cell.find_element(By.XPATH, ".//a[@href]")
        file_name = download_link.get_attribute('download') or '未知文件名'
        print(f'下载第 {row_index + 1} 行: {file_name}')
        driver.execute_script("arguments[0].click();", download_link)
    except NoSuchElementException:
        print(f'第 {row_index + 1} 行未找到 a 标签，尝试点击单元格')
        driver.execute_script("arguments[0].click();", cell)

    downloaded = wait_download_complete(timeout=30)
    if downloaded:
        print('下载完成:', os.path.basename(downloaded))
    else:
        print('下载超时')
    return downloaded


def read_cpp_file(file_path):
    encodings = ['utf-8', 'gbk', 'gb2312', 'latin-1']
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding) as f:
                content = f.read()
            print('读取编码:', encoding)
            return content
        except UnicodeDecodeError:
            continue
    return None


def score_homework_with_ai(cpp_code):
    if not API_KEY:
        return None, '缺少 AI_API_KEY（环境变量/.env）'
    if not cpp_code or not cpp_code.strip():
        return None, '文件内容为空'

    client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
    resp = client.chat.completions.create(
        model=MODEL_NAME,
        messages=[
            {'role': 'system', 'content': SCORING_CRITERIA},
            {'role': 'user', 'content': f'请评分以下C++代码：\n{cpp_code}'},
        ],
        timeout=30,
    )

    result = resp.choices[0].message.content.strip()
    lines = result.split('\n')
    score = None
    comment = ''
    for line in lines:
        m = re.search(r'\d+(?:\.\d+)?', line)
        if m and score is None:
            score = m.group()
        elif line.strip() and not line.strip().isdigit():
            comment += line.strip() + ' '
    return score, comment.strip()


def fill_score_and_comment(driver, row, score, comment):
    # 教师评分 col-id=field_11
    score_cell = row.find_element(By.XPATH, ".//div[@col-id='field_11']")
    score_cell.click()
    time.sleep(1)

    try:
        edit_btn = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), '修改')]"))
        )
        edit_btn.click()
        time.sleep(1)
    except TimeoutException:
        pass

    score_input = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((
            By.XPATH,
            "//div[contains(@class, 'modal')]//input[@type='text' or @type='number'] | //input[contains(@class, 'ag-input-field-input')]",
        ))
    )
    score_input.send_keys(Keys.CONTROL + 'a')
    score_input.send_keys(Keys.DELETE)
    score_input.send_keys(str(score))

    try:
        ok_btn = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), '完成') or contains(text(), '确定')]"))
        )
        ok_btn.click()
    except TimeoutException:
        score_input.send_keys(Keys.ENTER)

    time.sleep(1)

    # 教师答复 col-id=field_12
    comment_cell = row.find_element(By.XPATH, ".//div[@col-id='field_12']")
    comment_cell.click()
    time.sleep(1)

    try:
        edit_btn = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), '修改')]"))
        )
        edit_btn.click()
        time.sleep(1)
    except TimeoutException:
        pass

    comment_input = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((
            By.XPATH,
            "//div[contains(@class, 'modal')]//textarea | //div[contains(@class, 'modal')]//input[@type='text'] | //textarea",
        ))
    )
    comment_input.send_keys(Keys.CONTROL + 'a')
    comment_input.send_keys(Keys.DELETE)
    comment_input.send_keys(comment)

    try:
        ok_btn = WebDriverWait(driver, 3).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), '完成') or contains(text(), '确定')]"))
        )
        ok_btn.click()
    except TimeoutException:
        comment_input.send_keys(Keys.ENTER)

    time.sleep(1)
    print('回填完成')


def process_all_visible_then_scroll(driver, viewport, max_loops=9999):
    processed = set()

    for _ in range(max_loops):
        rows = get_visible_rows(driver)
        new_rows = 0

        for r in rows:
            idx_str = r.get_attribute('row-index')
            if not idx_str:
                continue
            idx = int(idx_str)
            if idx in processed:
                continue

            processed.add(idx)
            new_rows += 1

            print(f"\n--- 处理第 {idx + 1} 份作业 ---")
            downloaded = download_homework_file(driver, r, idx)
            if not downloaded:
                print('下载失败，跳过')
                continue

            cpp_code = read_cpp_file(downloaded)
            if not cpp_code:
                print('读取失败，跳过')
                continue

            score, comment = score_homework_with_ai(cpp_code)
            if not score:
                print('评分失败，跳过：', comment)
                continue

            print('score =', score)
            print('comment =', comment)
            fill_score_and_comment(driver, r, score, comment)

        is_bottom = driver.execute_script(
            "return arguments[0].scrollTop + arguments[0].clientHeight >= arguments[0].scrollHeight - 50;",
            viewport,
        )

        if is_bottom and new_rows == 0:
            print('已到底部，结束。总处理:', len(processed))
            break

        print('向下滚动加载更多...')
        driver.execute_script('arguments[0].scrollTop += arguments[0].clientHeight;', viewport)
        time.sleep(2)

    return processed


## 3) 初始化浏览器（可重复运行）
如果你多次运行导致残留浏览器窗口，先手动关闭或运行下面的关闭 cell。

In [None]:
# 运行此格前，请先运行：导入配置 & 函数定义（集中）

driver = None


In [None]:
# 启动浏览器
driver = setup_driver()
driver.get(HOMEWORK_URL)
print('已打开页面：', HOMEWORK_URL)
print('请在浏览器中完成登录，然后再运行下一格。')

In [None]:
# 可选：关闭浏览器（需要时再运行）
if driver is not None:
    try:
        driver.quit()
    except Exception as e:
        print('quit error:', e)
driver = None
print('driver closed')

## 4) 等待表格加载 + 定位滚动区域
这一步用于确认 AG Grid 的关键节点都能找到。

In [None]:
viewport = wait_for_grid(driver)
print('AG Grid 已就绪')


## 5) 读取当前可视区域的行（只取中间滚动列）
先用这个确认你能拿到 row-index、以及行内是否包含 `field_5/field_11/field_12`。

In [None]:
rows = get_visible_rows(driver)
print('当前可见行数:', len(rows))

# 可选：检查第一行有哪些列（便于确认 col-id）
if rows:
    first = rows[0]
    col_ids = [c.get_attribute('col-id') for c in first.find_elements(By.XPATH, ".//div[@role='gridcell']")]
    print('第一行 col-id:', col_ids)


In [None]:
# 检查第一行是否具备关键列（调试 XPath 用）
if not rows:
    raise RuntimeError('当前没拿到可视行：请确认已登录且表格已加载')

sample = rows[0]
for col in ['field_5', 'field_11', 'field_12']:
    try:
        _ = sample.find_element(By.XPATH, f".//div[@col-id='{col}']")
        print('found col:', col)
    except Exception as e:
        print('missing col:', col, 'err=', type(e).__name__)

## 6) 下载、读文件、AI 评分（逐步测试）
建议先只处理 1 行，确认下载与评分链路是通的。

In [None]:
# 只处理当前可视区域的第 1 行（你也可以改成 rows[n]）
rows = get_visible_rows(driver)
if not rows:
    raise RuntimeError('当前无可视行')

row = rows[0]
row_index = int(row.get_attribute('row-index'))

downloaded = download_homework_file(driver, row, row_index)
if downloaded:
    cpp_code = read_cpp_file(downloaded)
    print('代码长度:', 0 if not cpp_code else len(cpp_code))
    if cpp_code:
        score, comment = score_homework_with_ai(cpp_code)
        print('score =', score)
        print('comment =', comment)

## 7) 回填评分/评语（逐步测试）
这一格只负责把上一步得到的 `score/comment` 写回表格。

In [None]:
# 只有在你确认 score/comment 正确后再运行这一格
# 依赖上一节已生成 row/score/comment
if 'score' not in globals() or 'comment' not in globals():
    raise RuntimeError('请先运行上一节得到 score/comment')
if not score:
    raise RuntimeError('score 为空，取消回填')

fill_score_and_comment(driver, row, score, comment)

## 8) 批量处理（循环滚动）
当单行链路都跑通后，再用这一格批量处理。

In [None]:
# 批量运行（确认已登录、且单行测试 OK 后再跑）
viewport = wait_for_grid(driver)
processed = process_all_visible_then_scroll(driver, viewport)
processed