In [148]:
import json
import ics
import re
import datetime
import zoneinfo
import requests

from typing import Tuple, List, Self, Any
from dataclasses import dataclass

In [149]:
COOKIE_EXAMPLE = 'route=xyz; _gscu_xyz=xyz; _gscbrs_xyz=xyz; JSESSIONID=xyz'
COOKIE = '' # 最新获取的 cookie

REQUEST_DATA_EXAMPLE = { 'xn': '2023-2024', 'xq': '2' }
REQUEST_DATA = { 'xn': '2023-2024', 'xq': '2' } # 指定学年和学期

START_DATE = datetime.date(2024, 3, 4) # 该学期开始日期
CALENDAR_NAME = '2024 春季学期课表' # ICS 文件名

JSON_PATH = './general_schedule.json'
CALENDAR_CATEGORIES = [CALENDAR_NAME]
ALARMS = [ics.DisplayAlarm(datetime.timedelta(minutes=15))] # 日程提醒

In [150]:
def to_time_span(from_hr, from_min, to_hr, to_min):
    zone = zoneinfo.ZoneInfo('Asia/Shanghai')
    return (
        datetime.time(from_hr, from_min, 0, tzinfo=zone),
        datetime.time(to_hr, to_min, 0, tzinfo=zone)
    )

time_slot_mapping = {
    1: to_time_span(8, 30, 10, 15),
    2: to_time_span(8, 30, 10, 15),
    3: to_time_span(10, 30, 12, 15),
    4: to_time_span(10, 30, 12, 15),
    5: to_time_span(14, 0, 15, 45),
    6: to_time_span(14, 0, 15, 45),
    7: to_time_span(16, 0, 17, 45),
    8: to_time_span(16, 0, 17, 45),
    9: to_time_span(18, 45, 20, 30),
    10: to_time_span(18, 45, 20, 30),
    11: to_time_span(20, 45, 22, 30),
    12: to_time_span(20, 45, 22, 30)
}
time_slot_mapping[1]

(datetime.time(8, 30, tzinfo=zoneinfo.ZoneInfo(key='Asia/Shanghai')),
 datetime.time(10, 15, tzinfo=zoneinfo.ZoneInfo(key='Asia/Shanghai')))

In [151]:
response = requests.post(
    url='http://jw.hitsz.edu.cn/xszykb/queryxszykbzong',
    headers={
        'Accept': '*/*',
        'Cache-Control': 'no-cache',
        'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
        'Cookie': COOKIE,
        'DNT': '1',
        'Origin': 'http://jw.hitsz.edu.cn',
        'Pragma': 'no-cache',
        'Proxy-Connection': 'keep-alive',
        'Referer': 'http://jw.hitsz.edu.cn/authentication/main',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
        'X-Requested-With': 'XMLHttpRequest'
    },
    data=REQUEST_DATA,
    verify=False
)

if response.ok:
    print(f'{JSON_PATH} is up-to-date')
    json_text = response.text
    with open(JSON_PATH, 'w') as json_file:
        json_file.write(response.text)
else:
    print(f'Request error: {response.status_code}')

with open(JSON_PATH, 'r') as json_file:
    schedule_json = json.load(json_file)

# Exclude MOOC
note_pattern = re.compile(r'备注:')
schedule_json = list(filter(
    lambda x: note_pattern.search(x['SKSJ']) is None,
    schedule_json
))
schedule_json[:3]

Request error: 500


[{'KCWZSM': None,
  'RWH': '2023-2024-2-COMP3052-002',
  'SFFXEXW': None,
  'FILEURL': None,
  'SKSJ': '计算机系统\n[卢瑶]\n[5-14,16周][T5206]\n第3-4节',
  'XB': 4,
  'SKSJ_EN': 'Computer System\n[LuYao]\n[5-14,16Week][T5206]\n3-4',
  'KEY': 'xq1_jc2'},
 {'KCWZSM': None,
  'RWH': None,
  'SFFXEXW': '0',
  'FILEURL': None,
  'SKSJ': '【实验】人工智能\n[5-6节][11-11周]\n[T2608]',
  'XB': 2,
  'SKSJ_EN': '【Experiment】Artificial Intelligence[11-11Week]\n[T2608]\n[5-6]',
  'KEY': 'xq1_jc3'},
 {'KCWZSM': None,
  'RWH': None,
  'SFFXEXW': '0',
  'FILEURL': None,
  'SKSJ': '【实验】人工智能\n[7-8节][13-13周]\n[T2608]',
  'XB': 2,
  'SKSJ_EN': '【Experiment】Artificial Intelligence[13-13Week]\n[T2608]\n[7-8]',
  'KEY': 'xq1_jc4'}]

In [152]:
def dump_group(pattern: re.Pattern, text: str) -> str:
    match = pattern.search(text)
    if match is None:
        return ''
    groups = match.groups()
    return groups[0]


# Teacher and classroom pattern
teacher_classroom_pattern = re.compile(r'\[([^\[\]]+)(?<!\d[周节])\]')
def extract_teacher_classroom(text: str) -> Tuple[str, str]:
    matches = teacher_classroom_pattern.findall(text)
    if len(matches) == 0:
        return '', ''
    if len(matches) == 1:
        return '', matches[0]
    return matches[0], matches[1]


# Name pattern
name_pattern = re.compile(r'^([\w\W]+?)\n')
name = dump_group(name_pattern, schedule_json[0]['SKSJ'])
print(name)

# Week spans pattern
week_spans_pattern = re.compile(r'\[([\d\,\-]+)周\]')
week_spans = dump_group(week_spans_pattern, schedule_json[0]['SKSJ'])
print(week_spans)

# Time slot pattern
time_slot_pattern = re.compile(r'[第\[]([\d\-]+)节')
time_slot = dump_group(time_slot_pattern, schedule_json[0]['SKSJ'])
print(time_slot)

# Day in a week pattern
day_week_pattern = re.compile(r'xq(\d+)')
day_week = dump_group(day_week_pattern, schedule_json[0]['KEY'])
print(day_week)

teacher, classroom = extract_teacher_classroom(schedule_json[0]['SKSJ'])
print(teacher)
print(classroom)

计算机系统
5-14,16
3-4
1
卢瑶
T5206


In [153]:
# Convert spans text to ranges
def to_ranges(text: str) -> List[Tuple[int, int]]:
    segments = text.split(',')
    ranges = []
    for segment in segments:
        span = [int(x) for x in segment.split('-')]
        if len(span) == 1:
            ranges.append((span[0], span[0]))
        elif len(span) >= 2:
            ranges.append((span[0], span[-1]))
    return ranges


print(to_ranges('5-14,16'))

[(5, 14), (16, 16)]


In [154]:
@dataclass
class CourseFragment:
    name: str
    week_ranges: List[Tuple[int, int]]
    time_slot_ranges: List[Tuple[int, int]]
    teacher: str
    classroom: str
    day_week: int
    
    @classmethod
    def from_json(cls, obj: Any) -> Self:
        text = obj['SKSJ']
        name = dump_group(name_pattern, text)
        week_spans = dump_group(week_spans_pattern, text)
        time_slot = dump_group(time_slot_pattern, text)
        teacher, classroom = extract_teacher_classroom(text)
        day_week = int(dump_group(day_week_pattern, obj['KEY']))
        return cls(name, to_ranges(week_spans), to_ranges(time_slot), teacher, classroom, day_week)


course_fragments = [CourseFragment.from_json(x) for x in schedule_json]
course_fragments[:8]


[CourseFragment(name='计算机系统', week_ranges=[(5, 14), (16, 16)], time_slot_ranges=[(3, 4)], teacher='卢瑶', classroom='T5206', day_week=1),
 CourseFragment(name='【实验】人工智能', week_ranges=[(11, 11)], time_slot_ranges=[(5, 6)], teacher='', classroom='T2608', day_week=1),
 CourseFragment(name='【实验】人工智能', week_ranges=[(13, 13)], time_slot_ranges=[(7, 8)], teacher='', classroom='T2608', day_week=1),
 CourseFragment(name='人工智能', week_ranges=[(3, 12)], time_slot_ranges=[(7, 8)], teacher='陈科海', classroom='T5405', day_week=1),
 CourseFragment(name='嵌入式计算', week_ranges=[(11, 16)], time_slot_ranges=[(3, 4)], teacher='张春慨', classroom='T2404', day_week=2),
 CourseFragment(name='【实验】人工智能', week_ranges=[(14, 14)], time_slot_ranges=[(3, 4)], teacher='', classroom='T2608', day_week=3),
 CourseFragment(name='人工智能', week_ranges=[(3, 8), (10, 12)], time_slot_ranges=[(3, 4)], teacher='陈科海', classroom='T5405', day_week=3),
 CourseFragment(name='【实验】计算机网络 1/第2组', week_ranges=[(13, 13)], time_slot_ranges=[(5, 6)], 

In [155]:
cal = ics.Calendar()

for fragment in course_fragments:
    slot_start, slot_end = fragment.time_slot_ranges[0]
    time_start, time_end = time_slot_mapping[slot_start][0], time_slot_mapping[slot_end][1]
    for week_range in fragment.week_ranges:
        for week in range(week_range[0], week_range[1] + 1):
            date = START_DATE + datetime.timedelta(days=7 * (week - 1) + (fragment.day_week - 1))
            start_date = datetime.datetime.combine(date, time_start)
            end_date = datetime.datetime.combine(date, time_end)
            event = ics.Event(
                name=fragment.name,
                begin=start_date,
                end=end_date,
                description=f'授课人：{fragment.teacher}',
                location=fragment.classroom,
                categories=CALENDAR_CATEGORIES,
                alarms=ALARMS
            )
            cal.events.add(event)

In [156]:
with open(f'{CALENDAR_NAME}.ics', 'w') as ics_file:
    ics_file.write(cal.serialize())

