In [None]:
import json
import ics
import re
import datetime
import zoneinfo
import requests
import copy
import bs4

from typing import Tuple, List, Self, Any, Optional
from dataclasses import dataclass

In [None]:
USERNAME = '' # 统一身份认证用户名（学号）
PASSWORD = '' # 统一身份认证密码

COOKIE_EXAMPLE = 'route=xyz; JSESSIONID=xyz'
COOKIE = '' # 最新获取的 cookie

REQUEST_DATA_EXAMPLE = { 'xn': '2023-2024', 'xq': '2' }
REQUEST_DATA = { 'xn': '2023-2024', 'xq': '2' } # 指定学年和学期

START_DATE = datetime.date(2024, 3, 4) # 该学期开始日期
CALENDAR_NAME = '2024春季学期课表' # ICS 文件名

SHOULD_INFER_SEMESTER = True # 是否将学期自动设置为当前学期

JSON_PATH = './general_schedule.json'
ALARMS = [ics.DisplayAlarm(datetime.timedelta(minutes=-15))] # 日程提醒
CRITICAL_ALARMS = [ics.DisplayAlarm(datetime.timedelta(minutes=-40)), ics.DisplayAlarm(datetime.timedelta(days=-1))] # 考试日程提醒

ZONE = zoneinfo.ZoneInfo('Asia/Shanghai')

In [None]:
def to_time_span(from_hr, from_min, to_hr, to_min):
    return (
        datetime.time(from_hr, from_min, 0, tzinfo=ZONE),
        datetime.time(to_hr, to_min, 0, tzinfo=ZONE)
    )

time_slot_mapping = {
    1: to_time_span(8, 30, 10, 15),
    2: to_time_span(8, 30, 10, 15),
    3: to_time_span(10, 30, 12, 15),
    4: to_time_span(10, 30, 12, 15),
    5: to_time_span(14, 0, 15, 45),
    6: to_time_span(14, 0, 15, 45),
    7: to_time_span(16, 0, 17, 45),
    8: to_time_span(16, 0, 17, 45),
    9: to_time_span(18, 45, 20, 30),
    10: to_time_span(18, 45, 20, 30),
    11: to_time_span(20, 45, 22, 30),
    12: to_time_span(20, 45, 22, 30)
}
time_slot_mapping[1]

In [None]:
session = requests.Session()
session.headers.update({
    'Pragma': 'no-cache',
    'Proxy-Connection': 'keep-alive',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
    'X-Requested-With': 'XMLHttpRequest'
})

In [None]:
try:
    response = session.get(
        url='http://jw.hitsz.edu.cn',
        verify=False
    )
    response = session.get(
        url='http://jw.hitsz.edu.cn/cas',
        verify=False
    )
    soup = bs4.BeautifulSoup(response.text, 'html.parser')
    lt = soup.find('input', attrs={'name': 'lt'}).attrs['value']
    execution = soup.find('input', attrs={'name': 'execution'}).attrs['value']
    event_id = soup.find('input', attrs={'name': '_eventId'}).attrs['value']
    response = session.post(
        url='https://sso.hitsz.edu.cn:7002/cas/login?service=http%3A%2F%2Fjw.hitsz.edu.cn%2Fcas',
        verify=False,
        data={
            'username': USERNAME,
            'password': PASSWORD,
            'lt': lt,
            'rememberMe': 'on',
            'execution': execution,
            '_eventId': event_id
        }
    )
    if 'qxdm' not in response.text:
        raise Exception()
    print(response.url)
except:
    session.headers.update({'Cookie': COOKIE})
    print('Cannot infer cookies from username and password')

In [None]:
# Try to fetch current semester information

request_data = REQUEST_DATA
calendar_name = CALENDAR_NAME

if SHOULD_INFER_SEMESTER:
    response = session.post(
        url='http://jw.hitsz.edu.cn/component/querydangqianxnxq',
        verify=False
    )

    if response.ok:
        print(f'Current semester is infered')
        json_obj = response.json()
        request_data['xn'] = json_obj['XN']
        request_data['xq'] = json_obj['XQ']
        calendar_name = f'{json_obj['XNXQ']}学期课表'
        print(request_data)
        print(calendar_name)
    else:
        print(f'Request error: {response.status_code}')

In [None]:
# Try to fetch semester start date

semester_start_date = START_DATE

first_week_request_data = copy.copy(request_data)
first_week_request_data['djz'] = '1'

response = session.post(
    url='http://jw.hitsz.edu.cn/component/queryRlZcSj',
    data=first_week_request_data,
    verify=False
)

if response.ok:
    print(f'Start date is infered')
    json_obj = response.json()
    start_date_text = json_obj['content'][0]['rq']
    semester_start_date = datetime.datetime.strptime(start_date_text, r'%Y-%m-%d')
    print(semester_start_date)
else:
    print(f'Request error: {response.status_code}')

In [None]:
# Try to fetch general course schedule

response = session.post(
    url='http://jw.hitsz.edu.cn/xszykb/queryxszykbzong',
    data=request_data,
    verify=False
)

if response.ok:
    print(f'{JSON_PATH} is up-to-date')
    json_text = response.text
    with open(JSON_PATH, 'w') as json_file:
        json_file.write(response.text)
else:
    print(f'Request error: {response.status_code}')

with open(JSON_PATH, 'r') as json_file:
    schedule_json = json.load(json_file)

# Exclude MOOC
note_pattern = re.compile(r'备注:')
schedule_json = list(filter(
    lambda x: note_pattern.search(x['SKSJ']) is None,
    schedule_json
))

# Exam pattern
exam_pattern = re.compile(r'考试')

# Courses
course_json = list(filter(
    lambda x: exam_pattern.search(x['SKSJ']) is None,
    schedule_json
))

# Exams
exam_json = list(filter(
    lambda x: exam_pattern.search(x['SKSJ']) is not None,
    schedule_json
))

course_json[:3]
exam_json[:3]

In [None]:
def dump_group(pattern: re.Pattern, text: str) -> str:
    match = pattern.search(text)
    if match is None:
        return ''
    groups = match.groups()
    return groups[0]


# Teacher and classroom pattern
teacher_classroom_pattern = re.compile(r'\[([^\[\]]+)(?<!\d[周节])\]')
def extract_teacher_classroom(text: str) -> Tuple[str, str]:
    matches = teacher_classroom_pattern.findall(text)
    if len(matches) == 0:
        return '', ''
    if len(matches) == 1:
        return '', matches[0]
    return matches[0], matches[1]


# Name pattern
name_pattern = re.compile(r'^([\w\W]+?)\n')
name = dump_group(name_pattern, course_json[0]['SKSJ'])
print(name)

# Week spans pattern
week_spans_pattern = re.compile(r'\[([\d\,\-]+)周\]')
week_spans = dump_group(week_spans_pattern, course_json[0]['SKSJ'])
print(week_spans)

# Time slot pattern
time_slot_pattern = re.compile(r'[第\[]([\d\-]+)节')
time_slot = dump_group(time_slot_pattern, course_json[0]['SKSJ'])
print(time_slot)

# Day in a week pattern
day_week_pattern = re.compile(r'xq(\d+)')
day_week = dump_group(day_week_pattern, course_json[0]['KEY'])
print(day_week)

# Exam related patterns
month_pattern = re.compile(r'(\d+)月')
day_pattern = re.compile(r'(\d+)日')
start_time_pattern = re.compile(r'([\d:]+)-')
end_time_pattern = re.compile(r'-([\d:]+)')
classroom_only_pattern = re.compile(r'\b([\w]+)$')

teacher, classroom = extract_teacher_classroom(course_json[0]['SKSJ'])
print(teacher)
print(classroom)

In [None]:
# Convert spans text to ranges
def to_ranges(text: str) -> List[Tuple[int, int]]:
    segments = text.split(',')
    ranges = []
    for segment in segments:
        span = [int(x) for x in segment.split('-')]
        if len(span) == 1:
            ranges.append((span[0], span[0]))
        elif len(span) >= 2:
            ranges.append((span[0], span[-1]))
    return ranges


print(to_ranges('5-14,16'))

In [None]:
@dataclass
class CourseFragment:
    name: str
    week_ranges: List[Tuple[int, int]]
    time_slot_ranges: List[Tuple[int, int]]
    teacher: str
    classroom: str
    day_week: int
    
    @classmethod
    def from_json(cls, obj: Any) -> Self:
        text = obj['SKSJ']
        name = dump_group(name_pattern, text)
        week_spans = dump_group(week_spans_pattern, text)
        time_slot = dump_group(time_slot_pattern, text)
        teacher, classroom = extract_teacher_classroom(text)
        day_week = int(dump_group(day_week_pattern, obj['KEY']))
        return cls(name, to_ranges(week_spans), to_ranges(time_slot), teacher, classroom, day_week)

course_fragments = [CourseFragment.from_json(x) for x in course_json]
course_fragments[:8]


In [None]:
@dataclass
class ExamFragment:
    name: str
    year: int
    month: int
    day: int
    start_time: str
    end_time: str
    classroom: str
    
    @classmethod
    def from_json(cls, obj: Any) -> Self:
        text = obj['SKSJ']
        name = dump_group(name_pattern, text)
        month = int(dump_group(month_pattern, text))
        day = int(dump_group(day_pattern, text))
        start_time = dump_group(start_time_pattern, text)
        end_time = dump_group(end_time_pattern, text)
        base_year = semester_start_date.year
        year = base_year if month >= semester_start_date.month else base_year + 1
        classroom = dump_group(classroom_only_pattern, text)
        return cls(name, year, month, day, start_time, end_time, classroom)

exam_fragments = [ExamFragment.from_json(x) for x in exam_json]
exam_fragments[:3]

In [None]:
cal = ics.Calendar()

for fragment in course_fragments:
    slot_start, slot_end = fragment.time_slot_ranges[0]
    time_start, time_end = time_slot_mapping[slot_start][0], time_slot_mapping[slot_end][1]
    for week_range in fragment.week_ranges:
        for week in range(week_range[0], week_range[1] + 1):
            date = semester_start_date + datetime.timedelta(days=7 * (week - 1) + (fragment.day_week - 1))
            start_date = datetime.datetime.combine(date, time_start)
            end_date = datetime.datetime.combine(date, time_end)
            event = ics.Event(
                name=fragment.name,
                begin=start_date,
                end=end_date,
                description=f'授课人：{fragment.teacher}',
                location=fragment.classroom,
                categories=[calendar_name],
                alarms=ALARMS
            )
            cal.events.add(event)

for fragment in exam_fragments:
    date_format = r'%Y-%m-%d %H:%M'
    start_date = datetime.datetime.strptime(f'{fragment.year}-{fragment.month}-{fragment.day} {fragment.start_time}', date_format).replace(tzinfo=ZONE)
    end_date = datetime.datetime.strptime(f'{fragment.year}-{fragment.month}-{fragment.day} {fragment.end_time}', date_format).replace(tzinfo=ZONE)
    event = ics.Event(
        name=fragment.name,
        begin=start_date,
        end=end_date,
        location=fragment.classroom,
        categories=[calendar_name],
        alarms=CRITICAL_ALARMS
    )
    cal.events.add(event)

In [None]:
with open(f'{calendar_name}.ics', 'w') as ics_file:
    ics_file.write(cal.serialize())