# 國立陽明交通大學課程爬蟲 - 基本資訊版

此 Notebook 用於爬取 NYCU 課程的**基本資訊**，包括：
- 課程代碼、課程名稱
- 授課教師、學分、時數
- 上課時間、教室
- 人數限制、選課人數
- 課程類型、英文授課標記

**優點**：速度快，約 2-3 分鐘完成

**不包含**：課程綱要、先修科目、評分方式等詳細資訊

## 步驟 1: 參數設定

修改下方的學年度和學期參數：

In [None]:
# ============= 參數設定 =============
YEAR = 114          # 學年度
SEMESTER = 1        # 學期 (1=上學期, 2=下學期)
# ===================================

## 步驟 2: 安裝必要套件

執行此儲存格安裝 requests 套件：

In [None]:
!pip install requests -q

## 步驟 3: 爬蟲程式碼

執行此儲存格載入爬蟲類別：

In [None]:
import json
import re
import requests
import warnings
from datetime import datetime

warnings.filterwarnings('ignore')

class NYCUCrawler:
    def __init__(self, year, semester):
        self.year = year
        self.semester = semester
        self.acysem = str(year) + str(semester)
        self.flang = "zh-tw"
        self.headers = {
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        }
        self.dep_list = []
        self.course_data = {}
        
    def parse_time(self, tc):
        pattern = '[MTWRFSU][1-9yznabcd]+'
        tc_list = tc.split(',')
        time_list = []
        for item in tc_list:
            time = re.findall(pattern, item.split('-')[0])
            for t in time:
                for i in range(len(t)-1):
                    time_list.append(t[0]+t[i+1])
        return time_list

    def parse_classroom(self, tc):
        tc_list = tc.split(',')
        classroom_list = []
        for item in tc_list:
            try:
                classroom = item.split('-')[1]
            except IndexError:
                classroom = ''
            classroom_list.append(classroom)
        return classroom_list

    def get_type(self):
        res = requests.get('https://timetable.nycu.edu.tw/?r=main/get_type', 
                          headers=self.headers, verify=False)
        return res.json()

    def get_category(self, ftype):
        res = requests.post('https://timetable.nycu.edu.tw/?r=main/get_category', 
                          data={'ftype': ftype, 'flang': self.flang, 
                                'acysem': self.acysem, 'acysemend': self.acysem},
                          headers=self.headers, verify=False)
        return res.json()

    def get_college(self, fcategory, ftype):
        res = requests.post('https://timetable.nycu.edu.tw/?r=main/get_college',
                          data={'fcategory': fcategory, 'ftype': ftype, 
                                'flang': self.flang, 'acysem': self.acysem, 
                                'acysemend': self.acysem},
                          headers=self.headers, verify=False)
        return res.json()

    def get_dep(self, fcollege, fcategory, ftype):
        res = requests.post('https://timetable.nycu.edu.tw/?r=main/get_dep',
                          data={'fcollege': fcollege, 'fcategory': fcategory, 
                                'ftype': ftype, 'flang': self.flang, 
                                'acysem': self.acysem, 'acysemend': self.acysem},
                          headers=self.headers, verify=False)
        return res.json()

    def get_cos(self, dep):
        url = "https://timetable.nycu.edu.tw/?r=main/get_cos_list"
        data = {
            "m_acy": self.year, "m_sem": self.semester,
            "m_acyend": self.year, "m_semend": self.semester,
            "m_dep_uid": dep, "m_group": "**", "m_grade": "**",
            "m_class": "**", "m_option": "**", "m_crsname": "**",
            "m_teaname": "**", "m_cos_id": "**", "m_cos_code": "**",
            "m_crstime": "**", "m_crsoutline": "**", "m_costype": "**",
            "m_selcampus": "**"
        }

        r = requests.post(url, headers=self.headers, verify=False, data=data)
        if r.status_code != requests.codes.ok:
            return

        raw_data = json.loads(r.text)
        for dep_value in raw_data:
            language = raw_data[dep_value]["language"]
            for dep_content in raw_data[dep_value]:
                if re.match("^[1-2]+$", dep_content) is None:
                    continue
                for cos_id in raw_data[dep_value][dep_content]:
                    if cos_id in self.course_data:
                        continue
                        
                    raw_cos_data = raw_data[dep_value][dep_content][cos_id]
                    time_list = self.parse_time(raw_cos_data["cos_time"])
                    classroom_list = self.parse_classroom(raw_cos_data["cos_time"])
                    brief_code = list(raw_data[dep_value]["brief"][cos_id].keys())[0]
                    brief = raw_data[dep_value]["brief"][cos_id][brief_code]['brief'].split(',')
                    name = raw_cos_data["cos_cname"].replace("(英文授課)", '').replace("(英文班)", '')
                    
                    self.course_data[cos_id] = {
                        "id": raw_cos_data["cos_id"],
                        "num_limit": raw_cos_data["num_limit"],
                        "reg_num": raw_cos_data["reg_num"],
                        "name": name,
                        "credit": raw_cos_data["cos_credit"],
                        "hours": raw_cos_data["cos_hours"],
                        "teacher": raw_cos_data["teacher"],
                        "time": time_list,
                        "classroom": classroom_list,
                        "time-classroom": raw_cos_data["cos_time"],
                        "english": language[cos_id]["授課語言代碼"] == "en-us",
                        "brief": brief,
                        "type": raw_cos_data["cos_type"],
                    }

    def crawl(self):
        print("=" * 70)
        print(f"NYCU 課程爬蟲 - {self.year} 學年度第 {self.semester} 學期 (基本資訊)")
        print("=" * 70)
        
        start_time = datetime.now()
        
        print("\n取得課程基本資料...")
        types = self.get_type()
        
        for i in range(len(types)):
            ftype = types[i]["uid"]
            print(f"  處理: {types[i]['cname']}")
            categories = self.get_category(ftype)
            
            if types[i]["cname"] == "其他課程":
                for fcategory in categories.keys():
                    if fcategory not in self.dep_list:
                        self.dep_list.append(fcategory)
                        self.get_cos(fcategory)
            else:
                for fcategory in categories.keys():
                    colleges = self.get_college(fcategory, ftype)
                    if len(colleges):
                        for fcollege in colleges.keys():
                            deps = self.get_dep(fcollege, fcategory, ftype)
                            if len(deps):
                                for fdep in deps.keys():
                                    if fdep not in self.dep_list:
                                        self.dep_list.append(fdep)
                                        self.get_cos(fdep)
                    else:
                        deps = self.get_dep("", fcategory, ftype)
                        if len(deps):
                            for fdep in deps.keys():
                                if fdep not in self.dep_list:
                                    self.dep_list.append(fdep)
                                    self.get_cos(fdep)
        
        end_time = datetime.now()
        elapsed = end_time - start_time
        
        print(f"\n已取得 {len(self.course_data)} 門課程的基本資料")
        print(f"花費時間: {elapsed}")
        print("=" * 70)
        
        return self.course_data

print("✓ 爬蟲類別載入完成")

## 步驟 4: 執行爬蟲

開始爬取課程資料：

In [None]:
# 建立爬蟲實例並執行
crawler = NYCUCrawler(YEAR, SEMESTER)
course_data = crawler.crawl()

## 步驟 5: 查看結果

顯示統計資訊和範例資料：

In [None]:
# 統計資訊
print(f"總課程數: {len(course_data)}")

# 英文授課統計
english_courses = sum(1 for c in course_data.values() if c.get('english', False))
print(f"英文授課: {english_courses} ({english_courses/len(course_data)*100:.1f}%)")

# 選別統計
course_types = {}
for c in course_data.values():
    ctype = c.get('type', '未知')
    course_types[ctype] = course_types.get(ctype, 0) + 1

print("\n課程選別:")
for ctype, count in sorted(course_types.items(), key=lambda x: x[1], reverse=True):
    print(f"  {ctype}: {count} ({count/len(course_data)*100:.1f}%)")

# 顯示第一門課程的完整資料
print("\n範例課程資料:")
first_id = list(course_data.keys())[0]
print(json.dumps(course_data[first_id], ensure_ascii=False, indent=2))

## 步驟 6: 下載資料

將資料儲存為 JSON 檔案並下載：

In [None]:
# 儲存為 JSON 檔案
filename = f"{YEAR}-{SEMESTER}_data.json"
with open(filename, 'w', encoding='utf-8') as f:
    json.dump(course_data, f, ensure_ascii=False, indent=2)

print(f"✓ 資料已儲存至: {filename}")
print(f"檔案大小: {os.path.getsize(filename)/1024:.2f} KB")

# 下載檔案
from google.colab import files
files.download(filename)
print("\n✓ 檔案下載已開始")

## 資料結構說明

每門課程包含以下欄位：

```json
{
  "id": "課程代碼",
  "name": "課程名稱",
  "teacher": "授課教師",
  "credit": "學分",
  "hours": "時數",
  "time": ["上課時間列表"],
  "classroom": ["教室列表"],
  "time-classroom": "時間教室字串",
  "num_limit": "人數上限",
  "reg_num": "已選人數",
  "english": "是否英文授課",
  "type": "必修/選修",
  "brief": ["課程標籤"]
}
```

## 時間代碼對照

- **星期**: M=一, T=二, W=三, R=四, F=五, S=六, U=日
- **節次**: y=06:00-06:50, z=07:00-07:50, 1=08:00-08:50, ..., d=21:30-22:20

例如：`"T56"` = 星期二第5,6節 (13:20-15:10)