## Game ID 爬蟲

In [55]:
import os
import requests
import json
import time
from pathlib import Path
from dotenv import load_dotenv
from datetime import datetime

load_dotenv()

True

In [None]:
# 設定搜尋參數
api_key = os.environ.get("STEAM_API_KEY")
url = f"https://api.steampowered.com/IStoreService/GetAppList/v1/"
now_date = datetime.now().strftime("%Y-%m-%d")
max_result = 10
max_save = 15
last_id = 0
search_result = None

# 設定追蹤或存取值
game_list = []
start_time = datetime.now().strftime("%H:%M:%S")
search_times = 0
file_num = 1
tries = 1
folder = Path(r"C:\Users\add41\Documents\Data_Engineer\Project\Steam-Games-Database-with-RAG\data\raw\game_id")

# 開始迴圈爬取資料
while True:
    params = {
        'key': api_key,
        'include_games': 'true',     # 包含遊戲
        'include_dlc': 'false',      # 排除 DLC
        'include_software': 'false', # 排除軟體
        'include_videos': 'false',   # 排除影片
        'max_results': max_result,
        'last_appid': last_id
    }

    try:
        # 訪問 API 取得資料
        print(f"開始第{search_times+1}次資料搜尋...")
        res = requests.get(url, params=params)
        result_list = res.json()

        # 將資料存入 result_list，查詢次數+1
        game_list.extend(result_list.get("response").get("apps"))
        last_id = result_list.get("response").get("last_appid")
        more_results = result_list.get("response").get("have_more_results")

        # 加入日期時間資訊
        now_time = datetime.now().strftime("%H:%M:%S")
        data = {
            "update_date": now_date,
            "update_time": now_time,
            "data": game_list
        }

        # 進行存檔
        file = f"game_id_{file_num}.json"
        save_path = folder / file

        with open(save_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)

        search_times += 1
        tries = 1
        print(f"第{search_times}次搜尋資料儲存完畢！")
        time.sleep(3)

        # 如果 game_list 資料筆數超過設定值，則將存檔編號+1
        # 將 game_list 清空再次循環
        if len(game_list) >= max_save:
            file_num += 1
            game_list.clear()

        if not more_results:
            search_result = True
            break

        if search_times >= 6:
            break

    except Exception as e:
        print(f"資料爬取發生錯誤: {e}")
        if tries < 5:
            print(f"等待{tries*10}秒後再次繼續...")
            time.sleep(tries*10)
            tries += 1
            continue
        else:
            print("已達重試次數上限，終止程式")
            search_result = False
            break


# 資料抓取完畢，建立 metadata
now_date_filename = datetime.now().strftime("%Y%m%d")
end_time = datetime.now().strftime("%H:%M:%S")

metadata = {
    "update_date": now_date,
    "start_time": start_time,
    "end_time": end_time,
    "search_result": search_result,
    "max_result": max_result,
    "search_times": search_times,
    "data_count": max_result * search_times,
    "last_appid": last_id
}

# 以日期命名並存檔 metadata
metadata_folder = Path(r"C:\Users\add41\Documents\Data_Engineer\Project\Steam-Games-Database-with-RAG\data\raw\game_id\metadata")
metadata_file = f"{now_date_filename}_metadata_game_id.json"
metadata_path = metadata_folder / metadata_file

with open(metadata_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)

開始搜尋第1筆資料...
第1筆資料儲存完畢！
開始搜尋第2筆資料...
第2筆資料儲存完畢！
開始搜尋第3筆資料...
第3筆資料儲存完畢！
開始搜尋第4筆資料...
第4筆資料儲存完畢！
開始搜尋第5筆資料...
第5筆資料儲存完畢！
開始搜尋第6筆資料...
第6筆資料儲存完畢！


## Game Info 爬蟲

In [57]:
import requests
import json
import time
from pathlib import Path
from dotenv import load_dotenv
from datetime import datetime

load_dotenv()

True

In [58]:
id_file_num = 1
id_folder = Path(r"C:\Users\add41\Documents\Data_Engineer\Project\Steam-Games-Database-with-RAG\data\raw\game_id")
info_file_num = 1
info_folder = Path(r"C:\Users\add41\Documents\Data_Engineer\Project\Steam-Games-Database-with-RAG\data\raw\game_info")

game_info_list = []
data_count = 0
failed_list = []
failed_count = 0

max_result = 10
tries = 1
last_id = None

now_date = datetime.now().strftime("%Y-%m-%d")
start_time = datetime.now().strftime("%H:%M:%S")


while True:
    id_file = f"game_id_{id_file_num}.json"
    id_path = id_folder / id_file

    if not id_path.exists():
        search_result = True
        break

    with open(id_path, 'r', encoding='utf-8') as f:
        game_list_data = json.load(f)

    print(f"開始讀取{id_file}資料...")
    game_list = game_list_data.get("data")

    for game in game_list:
        tries = 1
        print(f"[{id_file}]開始搜尋第{data_count+1}筆資料...")

        app_id = game.get("appid")
        url = f"https://store.steampowered.com/api/appdetails?appids={app_id}"

        while tries <= 5:
            try:
                res = requests.get(url)

                game_info = res.json()
                game_info_list.append(game_info)
                last_id = app_id

                now_time = datetime.now().strftime("%H:%M:%S")
                data = {
                    "update_date": now_date,
                    "update_time": now_time,
                    "data": game_info_list
                }

                info_file = f"game_info_{info_file_num}.json"
                info_save_path = info_folder / info_file

                with open(info_save_path, 'w', encoding='utf-8') as f:
                    json.dump(data, f, ensure_ascii=False, indent=2)

                data_count += 1
                print(f"[{id_file}]第{data_count}筆資料儲存完畢！")
                time.sleep(3)

                if len(game_info_list) >= max_result:
                    info_file_num += 1
                    game_info_list.clear()

                break

            except Exception as e:
                print(f"資料爬取發生錯誤: {e}")
                if tries < 5:
                    print(f"等待{tries*10}秒後再次繼續...")
                    time.sleep(tries*10)
                    tries += 1
                    continue
                else:
                    print("已達重試次數上限，跳過此筆資料")
                    failed_count += 1
                    failed_list.append(app_id)
                    break

    id_file_num += 1


now_date_filename = datetime.now().strftime("%Y%m%d")
end_time = datetime.now().strftime("%H:%M:%S")

metadata = {
    "update_date": now_date,
    "start_time": start_time,
    "end_time": end_time,
    "failed_count": failed_count,
    "failed_list": failed_list,
    "data_count": data_count,
    "last_appid": last_id
}

metadata_folder = Path(r"C:\Users\add41\Documents\Data_Engineer\Project\Steam-Games-Database-with-RAG\data\raw\game_info\metadata")
metadata_file = f"{now_date_filename}_metadata_game_info.json"
metadata_path = metadata_folder / metadata_file

with open(metadata_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)

開始讀取game_id_1.json資料...
[game_id_1.json]開始搜尋第1筆資料...
[game_id_1.json]第1筆資料儲存完畢！
[game_id_1.json]開始搜尋第2筆資料...
[game_id_1.json]第2筆資料儲存完畢！
[game_id_1.json]開始搜尋第3筆資料...
[game_id_1.json]第3筆資料儲存完畢！
[game_id_1.json]開始搜尋第4筆資料...
[game_id_1.json]第4筆資料儲存完畢！
[game_id_1.json]開始搜尋第5筆資料...
[game_id_1.json]第5筆資料儲存完畢！
[game_id_1.json]開始搜尋第6筆資料...
[game_id_1.json]第6筆資料儲存完畢！
[game_id_1.json]開始搜尋第7筆資料...
[game_id_1.json]第7筆資料儲存完畢！
[game_id_1.json]開始搜尋第8筆資料...
[game_id_1.json]第8筆資料儲存完畢！
[game_id_1.json]開始搜尋第9筆資料...
[game_id_1.json]第9筆資料儲存完畢！
[game_id_1.json]開始搜尋第10筆資料...
[game_id_1.json]第10筆資料儲存完畢！
[game_id_1.json]開始搜尋第11筆資料...
[game_id_1.json]第11筆資料儲存完畢！
[game_id_1.json]開始搜尋第12筆資料...
[game_id_1.json]第12筆資料儲存完畢！
[game_id_1.json]開始搜尋第13筆資料...
[game_id_1.json]第13筆資料儲存完畢！
[game_id_1.json]開始搜尋第14筆資料...
[game_id_1.json]第14筆資料儲存完畢！
[game_id_1.json]開始搜尋第15筆資料...
[game_id_1.json]第15筆資料儲存完畢！
[game_id_1.json]開始搜尋第16筆資料...
[game_id_1.json]第16筆資料儲存完畢！
[game_id_1.json]開始搜尋第17筆資料...
[game_id_1.json]第17筆資料儲存完畢！
[game_id

In [None]:
from src.mod import general as gr

root = gr.get_root_dir()

root

ModuleNotFoundError: No module named 'src'