In [None]:
import requests
from bs4 import BeautifulSoup
import re
from bs4 import XMLParsedAsHTMLWarning
import warnings
import pandas as pd
import numpy as np

warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)

In [None]:
class Racer_racelist_info:
    def __init__(self, racer_container):
        racer_basic_infos = racer_container.get_text().split()
        self.wakuban, self.racer_id, _, self.grade, _, _, _, weight_age, self.f_num, self.l_num, self.st_ave, self.g_winrate, self.g_2rate, self.g_3rate, self.l_winrate, self.l_2rate, self.l_3rate, self.m_id, self.m_2rate, self.m_3rate, self.b_id, self.b_2rate, self.b_3rate = racer_basic_infos[:23]
        match = re.search(r'(\d+)æ­³/(\d+.\d)kg', weight_age) 
        self.weight = match.group(1) if match else None
        self.age = match.group(2) if match else None

    def print(self):
        print(*self.__dict__.values())
        
class Racer_before_info:
    def __init__(self, racer_container):
        racer_before_infos = racer_container.get_text().split()
        self.wakuban, _, _, _, self.disp_time, self.tilt, _, _, _, _, self.adj_weight= racer_before_infos[:11]

    def print(self):
        print(*self.__dict__.values())
        
class Race_result_info:
    def __init__(self, result_row):
        result_infos = result_row.get_text().split()
        self.rank, self.wakuban, _, _, _, self.time = result_infos[:6]
    def print(self):
        print(*self.__dict__.values())

In [154]:
def create_hd_list(start_date, end_date):
    date_range = pd.date_range(start=start_date, end=end_date)
    hd_list = [date.strftime("%Y%m%d") for date in date_range]
    return hd_list

In [155]:
rno_list = [str(i).zfill(2) for i in range(1, 12)]
hd_list = create_hd_list("20250101", "20251231")
jcd_list = [str(i).zfill(2) for i in range(1, 24)]

In [None]:
def make_racelist_df(hd, jcd, rno):
    prefix = "https://www.boatrace.jp/owpc/pc/race/racelist?"
    url = f"{prefix}rno={rno}&jcd={jcd}&hd={hd}"
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, 'lxml')
    
    header = soup.find('h3', class_='title16_titleDetail__add2020')
    header_text = header.get_text(strip=True)
    match = re.search(r'(\d{3,4}m)', header_text)
    course_length = match.group(1)[:-1]
    
    racer_containers = soup.find_all('tbody', class_='is-fs12')
    
    info_list = []
    for container in racer_containers:
        info_list.append(Racer_racelist_info(container))
    
    racelist_df = pd.DataFrame([r.__dict__ for r in info_list])
    racelist_df['course_length'] = course_length
    return racelist_df

In [None]:
def make_before_df(hd, jcd, rno):
    prefix = "https://www.boatrace.jp/owpc/pc/race/beforeinfo?"
    url = f"{prefix}rno={rno}&jcd={jcd}&hd={hd}"
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, 'lxml')
    racer_containers = soup.find_all('tbody', class_='is-fs12')
    info_list = []
    for container in racer_containers:
        info_list.append(Racer_before_info(container))
    beforeinfo_df = pd.DataFrame([r.__dict__ for r in info_list])
    return beforeinfo_df

In [None]:
def make_result_df(hd, jcd, rno):
    prefix = "https://www.boatrace.jp/owpc/pc/race/raceresult?"
    url = f"{prefix}rno={rno}&jcd={jcd}&hd={hd}"
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.content, 'lxml')
    table_container = soup.find('table', class_='is-w495')
    rows = table_container.find_all('tbody')
    info_list = []
    for row in rows:
        info_list.append(Race_result_info(row))
    result_df = pd.DataFrame([r.__dict__ for r in info_list])
    return result_df

In [None]:
for hd in hd_list:
    for jcd in jcd_list:
        for rno in rno_list:
            raceid = hd + jcd + rno.zfill(2)
            racelist_df = make_racelist_df(hd, jcd, rno)
            before_df = make_before_df(hd, jcd, rno)
            result_df = make_result_df(hd, jcd, rno)
            racelist_df["wakuban"] = racelist_df["wakuban"].astype(str).str.normalize('NFKC')
            result_df["rank"] = result_df["rank"].astype(str).str.normalize('NFKC')
            df = pd.merge(racelist_df, before_df, on='wakuban', how='left')
            df = pd.merge(df, result_df, on='wakuban', how='left')
            

'20251231'