In [1]:
import requests as rq
from bs4 import BeautifulSoup as bs
import json
import re
import pandas as pd
import numpy as np
from tqdm import tqdm

In [3]:
# get all classrooms

all_classroom_url = 'https://gra206.aca.ntu.edu.tw/classrm/index.php/acarm/webcr-use-new?SYearDDL=1111&BuildingDDL=%25&RoomDDL=%28%E7%89%99%29%E8%A8%8E%E8%AB%96%E4%B8%80&SelectButton=%E6%9F%A5%E8%A9%A2'

all_bs_obj = bs(rq.get(all_classroom_url).text)

all_classroom_names = [a['value'] for a in all_bs_obj.select('select#RoomDDL option')]

# get url for all classroom
url_all_classroom = [f"https://gra206.aca.ntu.edu.tw/classrm/index.php/acarm/webcr-use-new?SYearDDL=1111&BuildingDDL=%25&RoomDDL={room.strip()}&SelectButton=%E6%9F%A5%E8%A9%A2" for room in all_classroom_names]


# regex pattern for extracting classroom data
data_pattern = re.compile(r"var timeDT = (\[.*?\]);")

def get_obj_in_script(url):
    script_text = bs(rq.get(url).text).select("#ContentPlaceHolder1>script")
    data_script_text = script_text[0].string.strip()
    data_extracted = data_pattern.match(data_script_text).group(1)
    return json.loads(data_extracted.encode('utf8').decode('unicode_escape'))


courses = dict()

for url in tqdm(url_all_classroom):

    for w in get_obj_in_script(url):
        wt = week_text = w['Item']
        wt = wt[wt.find("(")+1:wt.find(")")]       #星期幾

        for i in w.keys():
            if 'Info' not in i: continue
            no = i.split('Info')[1]
            course_code = w[i][0]['cr_cono']+ " " + w[i][0]['cr_clas']
            if course_code in courses.keys(): 
                session = courses[course_code]['時間'] 
                if wt in session.keys():      # 這禮拜已經加了
                    courses[course_code]['時間'][wt].append(no)
                else:
                    courses[course_code]['時間'][wt] = [no]
                continue

            
            courses[course_code] = {
                '課號'      : w[i][0]['cr_cono'],
                '班次'      : w[i][0]['cr_clas'],
                '課程名稱'   : w[i][0]['cr_cnam'],
                '老師'      : w[i][0]['cr_tenam'],
                '教室'      : w[i][0]['cr_no'],
                '時間'      : {wt:[no]}
            }

all_lecture_df = pd.DataFrame.from_dict(courses, orient='index')

  k = json.loads(data_extracted.encode('utf8').decode('unicode_escape'))
100%|██████████| 1036/1036 [11:48<00:00,  1.46it/s]


In [9]:
all_lecture_df

Unnamed: 0,課號,班次,課程名稱,老師,教室,時間
0,002 51260,02,網球初級,連玉輝,(醫)網球場,"{'三': ['3', '4']}"
1,002 50780,13,羽球初級,黃國恩,(醫)館1F,"{'一': ['1', '2']}"
2,002 50780,14,羽球初級,黃國恩,(醫)館1F,"{'一': ['3', '4']}"
3,002 50780,94,羽球初級,洪巧菱,(醫)館1F,"{'三': ['6', '7']}"
4,002 50780,95,羽球初級,洪巧菱,(醫)館1F,"{'三': ['8', '9']}"
...,...,...,...,...,...,...
3987,426 D4430,,健康與疾病之調適,羅美芳,護綜討室,"{'三': ['7', '8']}"
3988,406 001A0,,服務學習甲,胡文郁,護綜討室,"{'三': ['9', 'X']}"
3989,426 M4520,,進階臨床護理學(I),賴裕和,護綜討室,"{'四': ['3', '4']}"
3990,426 M4320,,領導與管理,張榮珍,護綜討室,"{'五': ['3', '4']}"


In [10]:
all_lecture_df.to_excel('111-1課表.xlsx', index=False)

# 靠二進位進行搜尋



In [11]:
all_lecture_df = pd.read_excel('111-1課表.xlsx', engine = 'openpyxl')

In [14]:
def course_bin_to_int(l:list) -> int:
    replacement = {'X':10, 'A':11, 'B':12, 'C':13, 'D':14}
    l = [int(replacement.get(s,s)) for s in l]
    return np.sum((2**np.array(l)), dtype='int16')

def course_session_parser(row):
    import ast
    weeks = [*"一二三四五六日"]
    course_session = ast.literal_eval(row['時間'])
    for week in weeks:
        if week in course_session:
            row[week] = course_bin_to_int(course_session[week])
        else: row[week] = int(0)
    return row


In [15]:
class_table = all_lecture_df.apply(course_session_parser, axis=1)

In [18]:
def search_class(weekday, session, classroom):
    _session = course_bin_to_int
    return class_table[
        (class_table[weekday]&_session(session)>0) & 
        class_table['教室'].str.contains(classroom)
        ]


In [19]:

search_class('二', [4,5,6,7,8], '社科')

Unnamed: 0,課號,班次,課程名稱,老師,教室,時間,一,二,三,四,五,六,日
1297,302 10410,1.0,行政學一,吳舜文,社科102,"{'二': ['3', '4']}",0,24,0,0,0,0,0
1298,303 20050,2.0,統計學暨實習,楊睿中,社科102,"{'二': ['6', '7'], '三': ['2', '3', '4']}",0,192,28,0,0,0,0
1310,302 10410,2.0,行政學一,蘇彩足,社科201,"{'二': ['3', '4']}",0,24,0,0,0,0,0
1311,302 51900,,國際關係概論,唐豪駿,社科201,"{'二': ['6', '7']}",0,192,0,0,0,0,0
1312,302 20810,,政黨與選舉制度,王業立,社科201,"{'二': ['8', '9']}",0,768,0,0,0,0,0
1321,303 20050,1.0,統計學暨實習,陳旭昇,社科202,"{'二': ['3', '4', '5'], '一': ['3', '4']}",24,56,0,0,0,0,0
1322,341 11740,,歐盟莫內講座—中國大陸研究學程導論,葉國俊,社科202,"{'二': ['6', '7']}",0,192,0,0,0,0,0
1334,302 30130,,西洋政治哲學概論,陳嘉銘,社科303,"{'二': ['3', '4']}",0,24,0,0,0,0,0
1335,322 U2370,,東亞民主化專題,黃旻華,社科303,"{'二': ['8', '9']}",0,768,0,0,0,0,0
1349,322 U2030,,比較行政專題,洪美仁,社科401,"{'二': ['3', '4']}",0,24,0,0,0,0,0
