In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# 기본 URL 설정
base_url = "https://statiz.sporki.com/schedule/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

# 연도와 월 설정
year = 2024
months = range(3, 12)  # 3월부터 11월까지

# 데이터를 저장할 리스트
all_data = []

for month in months:
    print(f"Fetching data for {year}-{month}...")
    params = {"year": year, "month": month}
    response = requests.get(base_url, headers=headers, params=params)

    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        
        # 주별 데이터 탐색
        rows = soup.select("table tbody tr")
        for row in rows:
            # 각 요일별 데이터 처리 (td:nth-child(2) ~ td:nth-child(8))
            for day_idx in range(2, 9):  # 2부터 8까지는 월~일을 나타냄
                day_cell = row.select_one(f"td:nth-child({day_idx})")
                
                if day_cell:
                    # 날짜 추출
                    date_tag = day_cell.select_one("span.day")
                    if date_tag:
                        day = date_tag.text.strip()
                        date = f"{year}-{month:02d}-{int(day):02d}"
                        
                        # 해당 날짜의 경기 리스트 추출
                        games = day_cell.select("div.games > ul > li > a")
                        for game in games:
                            teams = game.find_all("span", class_="team")
                            scores = game.find_all("span", class_="score")

                            if len(teams) == 2 and len(scores) == 2:
                                team1 = teams[0].text.strip()
                                team2 = teams[1].text.strip()
                                score1 = scores[0].text.strip()
                                score2 = scores[1].text.strip()

                                # 데이터 저장
                                all_data.append({
                                    "Date": date,
                                    "Team1": team1,
                                    "Score1": score1,
                                    "Team2": team2,
                                    "Score2": score2
                                })
    else:
        print(f"Failed to fetch data for {year}-{month}: HTTP {response.status_code}")

# 모든 데이터를 데이터프레임으로 변환
df = pd.DataFrame(all_data)

# 결과 출력 및 저장
print(df)
df.to_csv("statiz_game_schedule_2024.csv", index=False, encoding="utf-8-sig")
print("2024년 전체 데이터가 statiz_game_schedule_2024.csv로 저장되었습니다.")


Fetching data for 2024-3...
Fetching data for 2024-4...
Fetching data for 2024-5...
Fetching data for 2024-6...
Fetching data for 2024-7...
Fetching data for 2024-8...
Fetching data for 2024-9...
Fetching data for 2024-10...
Fetching data for 2024-11...
           Date Team1 Score1 Team2 Score2
0    2024-03-23    한화      2    LG      8
1    2024-03-23    롯데      3   SSG      5
2    2024-03-23    삼성      6    KT      2
3    2024-03-23    키움      5   KIA      7
4    2024-03-23    두산      3    NC      4
..          ...   ...    ...   ...    ...
722  2024-10-17    삼성      0    LG      1
723  2024-10-19    삼성      1    LG      0
724  2024-10-23    삼성      3   KIA      8
725  2024-10-25   KIA      2    삼성      4
726  2024-10-26   KIA      9    삼성      2

[727 rows x 5 columns]
2024년 전체 데이터가 statiz_game_schedule_2024.csv로 저장되었습니다.


In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# 기본 URL 설정
base_url = "https://statiz.sporki.com/schedule/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

# 연도와 월 설정
start_year = 2015
end_year = 2024
months = range(3, 12)  # 3월부터 11월까지

# 데이터를 저장할 리스트
all_data = []

for year in range(start_year, end_year + 1):
    for month in months:
        print(f"Fetching data for {year}-{month}...")
        params = {"year": year, "month": month}
        response = requests.get(base_url, headers=headers, params=params)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")
            
            # 주별 데이터 탐색
            rows = soup.select("table tbody tr")
            for row in rows:
                # 각 요일별 데이터 처리 (td:nth-child(2) ~ td:nth-child(8))
                for day_idx in range(2, 9):  # 2부터 8까지는 월~일을 나타냄
                    day_cell = row.select_one(f"td:nth-child({day_idx})")
                    
                    if day_cell:
                        # 날짜 추출
                        date_tag = day_cell.select_one("span.day")
                        if date_tag:
                            day = date_tag.text.strip()
                            date = f"{year}-{month:02d}-{int(day):02d}"
                            
                            # 해당 날짜의 경기 리스트 추출
                            games = day_cell.select("div.games > ul > li > a")
                            for game in games:
                                teams = game.find_all("span", class_="team")
                                scores = game.find_all("span", class_="score")

                                if len(teams) == 2 and len(scores) == 2:
                                    team1 = teams[0].text.strip()
                                    team2 = teams[1].text.strip()
                                    score1 = scores[0].text.strip()
                                    score2 = scores[1].text.strip()

                                    # 데이터 저장
                                    all_data.append({
                                        "Date": date,
                                        "Team1": team1,
                                        "Score1": score1,
                                        "Team2": team2,
                                        "Score2": score2
                                    })
        else:
            print(f"Failed to fetch data for {year}-{month}: HTTP {response.status_code}")

# 모든 데이터를 데이터프레임으로 변환
df = pd.DataFrame(all_data)

# 결과 출력 및 저장
print(df)
df.to_csv("statiz_game_schedule_10_years.csv", index=False, encoding="utf-8-sig")
print("10년치 데이터가 statiz_game_schedule_10_years.csv로 저장되었습니다.")


Fetching data for 2015-3...
Fetching data for 2015-4...
Fetching data for 2015-5...
Fetching data for 2015-6...
Fetching data for 2015-7...
Fetching data for 2015-8...
Fetching data for 2015-9...
Fetching data for 2015-10...
Fetching data for 2015-11...
Fetching data for 2016-3...
Fetching data for 2016-4...
Fetching data for 2016-5...
Fetching data for 2016-6...
Fetching data for 2016-7...
Fetching data for 2016-8...
Fetching data for 2016-9...
Fetching data for 2016-10...
Fetching data for 2016-11...
Fetching data for 2017-3...
Fetching data for 2017-4...
Fetching data for 2017-5...
Fetching data for 2017-6...
Fetching data for 2017-7...
Fetching data for 2017-8...
Fetching data for 2017-9...
Fetching data for 2017-10...
Fetching data for 2017-11...
Fetching data for 2018-3...
Fetching data for 2018-4...
Fetching data for 2018-5...
Fetching data for 2018-6...
Fetching data for 2018-7...
Fetching data for 2018-8...
Fetching data for 2018-9...
Fetching data for 2018-10...
Fetching data