用語の英語（参考）  
https://www.jra.go.jp/keiba/overseas/yougo/

In [38]:
import numpy as np
import pandas as pd

import os
import re
import itertools
from collections import OrderedDict
from datetime import datetime, timedelta

import requests
from bs4 import BeautifulSoup

In [39]:
!python -V

Python 3.10.12


In [40]:
#!pip freeze

In [41]:
URL_DICT = {
    "top": "https://www.boatrace.jp/",
    "racersearch": "https://www.boatrace.jp/owpc/pc/data/racersearch/index",
    "stadium": "https://www.boatrace.jp/owpc/pc/data/racersearch/index",
    "record": "https://www.boatrace.jp/owpc/pc/data/record/index",
    "sch": {
        "sgpg1" : "https://www.boatrace.jp/owpc/pc/race/gradesch?year={}&hcd=01",
        "g1g2"  : "https://www.boatrace.jp/owpc/pc/race/gradesch?year={}&hcd=02",
        "g3"    : "https://www.boatrace.jp/owpc/pc/race/gradesch?year={}&hcd=03",
        "venus" : "https://www.boatrace.jp/owpc/pc/race/gradesch?year={}&hcd=04",
        "rookie": "https://www.boatrace.jp/owpc/pc/race/gradesch?year={}&hcd=05",
        "master": "https://www.boatrace.jp/owpc/pc/race/gradesch?year={}&hcd=06",
    }
}

In [42]:
#dfs = pd.read_html(URL_DICT["record"])

In [43]:
#display(dfs[0])
#display(dfs[1])

## 共通処理

In [44]:
import re

def string_formatting(src: str) -> str:
    dst = src.replace("\u3000", "").replace(" ", "")

    while "\n\n" in dst:
        dst = dst.replace("\n\n", "\n")

    return dst.strip()

def string_split(src: str) -> list[str]:
    fmt = string_formatting(src)
    dst = [token for token in re.split("[\n/]", fmt) if token.strip()]
    return dst

In [45]:
"""
def split_list_old(src_list, indices):
    dst_list = np.split(np.array(src_list, dtype=object), indices)
    dst_list = [dst.tolist() for dst in dst_list]
    return dst_list
"""

def split_list(src_list, n_divisions):
    n = len(src_list)
    if n % n_divisions:
        raise ValueError("array split does not result in an equal division")

    dst_list = [[] for _ in range(n_divisions)]
    w = n // n_divisions
    for idx, src in enumerate(src_list):
        dst_list[idx // w].append(src)
    return dst_list

In [46]:
CACHE_DIRPATH = "cache"

def get_beautiful_soup(url):
    cache_html_path = os.path.join(CACHE_DIRPATH, os.path.basename(url) + ".html")

    if os.path.isfile(cache_html_path):
        return BeautifulSoup(open(cache_html_path), 'html.parser')
    else:
        response = requests.get(url)

        os.makedirs(CACHE_DIRPATH, exist_ok=True)
        with open(cache_html_path, "w") as f:
            f.write(response.text)

    return BeautifulSoup(response.text, "html.parser")

## レース情報取得

1レース分のリザルト情報を取得。  
第4回BBCトーナメント最終日（2023/1/15）の12Rを対象に実装。



---

`[出馬表]`  
レースに参加するレーサー情報を取得する。  
- 出身 / 年齢 / 体重
- F数 / L数 / 平均ST

以下の情報は一旦考慮しない
- レースNo（艇番色）/ 進入コース/ STタイミング/ 成績

`[オッズ]`

そのまま読み込んで使うのは難しそうなので、適切な加工をする必要あり。  
単勝はそのまま使えそう。  
連単や連複は加工した方が良い。

`[直前情報]`

スタート位置、天候の情報を取得。  
レーサーの情報も取得する。（体重もここの方が信憑性ある？）  
前走成績は有るとき無いときあるので使用しない。

`[コンピュータ予想 / マイ予想]`  

使用しない。

`[結果]`

タイムと払い戻し情報を取得する。  
学習には使用せず、評価やロス算出に使用する。

`[ピットレポート]`

一応取っておく。出馬表のテーブルにくっつける。

---

### URL

In [47]:
SAMPLE_URL = {
    "racelist": "https://www.boatrace.jp/owpc/pc/race/racelist?rno=12&jcd=11&hd=20230115",
    "odds": {
        "trifecta":            "https://www.boatrace.jp/owpc/pc/race/odds3t?rno=12&jcd=11&hd=20230115",     # 三連単
        "trio":                "https://www.boatrace.jp/owpc/pc/race/odds3f?rno=12&jcd=11&hd=20230115",     # 三連複
        "exacta-and-quinella": "https://www.boatrace.jp/owpc/pc/race/odds2tf?rno=12&jcd=11&hd=20230115",     # 二連単 / 二連複
        "quinella-place":      "https://www.boatrace.jp/owpc/pc/race/oddsk?rno=12&jcd=11&hd=20230115",     # 拡連複
        "win-and-place":       "https://www.boatrace.jp/owpc/pc/race/oddstf?rno=12&jcd=11&hd=20230115",     # 単勝 / 複勝
    },
    "beforeinfo": "https://www.boatrace.jp/owpc/pc/race/beforeinfo?rno=12&jcd=11&hd=20230115",
    "raceresult": "https://www.boatrace.jp/owpc/pc/race/raceresult?rno=12&jcd=11&hd=20230115",
    "pitreport" : "https://www.boatrace.jp/owpc/pc/race/pitreport?rno=12&jcd=11&hd=20230115",
}


### pandas 読み込みテーブルの確認

体裁がかなり崩れるため、自作するのがよさそう、、

In [48]:
def disp_html_sample(url, n_head = 2):
    dfs = pd.read_html(url)
    print(f"table num : {len(dfs)}")

    for idx, df in enumerate(dfs):
        print(f"[{idx}]" + "-" * 50)
        display(df.head(n_head))
        print("")


In [49]:
disp_html_sample(SAMPLE_URL["racelist"])

table num : 2
[0]--------------------------------------------------


Unnamed: 0,レース,レース.1,1R,2R,3R,4R,5R,6R,7R,8R,9R,10R,11R,12R
0,締切予定時刻,締切予定時刻,10:35,11:03,11:31,11:59,12:27,12:55,13:24,13:53,14:26,15:00,15:34,16:15



[1]--------------------------------------------------


Unnamed: 0_level_0,枠,ボートレーサー,ボートレーサー,ボートレーサー,全国,当地,モーター,ボート,Unnamed: 8_level_0,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,早見
Unnamed: 0_level_1,枠,写真,登録番号/級別 氏名 支部/出身地 年齢/体重,F数 L数 平均ST,勝率 2連率 3連率,勝率 2連率 3連率,No 2連率 3連率,No 2連率 3連率,Unnamed: 8_level_1,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,レースNo（艇番色） 進入コース STタイミング 成績,早見
Unnamed: 0_level_2,枠,写真,登録番号/級別 氏名 支部/出身地 年齢/体重,F数 L数 平均ST,勝率 2連率 3連率,勝率 2連率 3連率,No 2連率 3連率,No 2連率 3連率,Unnamed: 8_level_2,初日,...,３日目.1,最終日,最終日.1,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,早見
0,１,,3415 / A1 松井 繁 大阪/大阪 53歳/52.0kg,F0 L0 0.16,6.81 37.19 62.81,6.21 42.11 47.37,19 35.94 50.78,58 30.66 43.07,,4,...,,,,,,,,,,
1,１,,3415 / A1 松井 繁 大阪/大阪 53歳/52.0kg,F0 L0 0.16,6.81 37.19 62.81,6.21 42.11 47.37,19 35.94 50.78,58 30.66 43.07,,1,...,,,,,,,,,,





In [50]:
#disp_html_sample(SAMPLE_URL["odds"])

In [51]:
disp_html_sample(SAMPLE_URL["beforeinfo"])

table num : 3
[0]--------------------------------------------------


Unnamed: 0,レース,レース.1,1R,2R,3R,4R,5R,6R,7R,8R,9R,10R,11R,12R
0,締切予定時刻,締切予定時刻,10:35,11:03,11:31,11:59,12:27,12:55,13:24,13:53,14:26,15:00,15:34,16:15



[1]--------------------------------------------------


Unnamed: 0_level_0,枠,写真,ボートレーサー,体重,展示 タイム,チルト,プロペラ,部品交換,前走成績,前走成績
Unnamed: 0_level_1,枠,写真,ボートレーサー,調整重量,展示 タイム,チルト,プロペラ,部品交換,前走成績,前走成績.1
0,1,,松井 繁,52.0kg,6.69,0.0,,,R,
1,1,,松井 繁,52.0kg,6.69,0.0,,,進入,



[2]--------------------------------------------------


Unnamed: 0_level_0,スタート展示,スタート展示,スタート展示
Unnamed: 0_level_1,コース,並び,ST
0,1 .01,1 .01,1 .01
1,2 F.02,2 F.02,2 F.02





In [52]:
disp_html_sample(SAMPLE_URL["raceresult"])

table num : 7
[0]--------------------------------------------------


Unnamed: 0,レース,レース.1,1R,2R,3R,4R,5R,6R,7R,8R,9R,10R,11R,12R
0,締切予定時刻,締切予定時刻,10:35,11:03,11:31,11:59,12:27,12:55,13:24,13:53,14:26,15:00,15:34,16:15



[1]--------------------------------------------------


Unnamed: 0,着,枠,ボートレーサー,レースタイム
0,１,1,3415 松井 繁,"1'48""0"
1,２,2,4686 丸野 一樹,"1'49""3"



[2]--------------------------------------------------


Unnamed: 0,スタート情報
0,1 .11 逃げ
1,2 .17



[3]--------------------------------------------------


Unnamed: 0,勝式,組番,払戻金,人気
0,3連単,1-2-6,"¥1,280",4.0
1,3連単,,,



[4]--------------------------------------------------


Unnamed: 0,返還



[5]--------------------------------------------------


Unnamed: 0,決まり手
0,逃げ



[6]--------------------------------------------------


Unnamed: 0,備考





In [53]:
disp_html_sample(SAMPLE_URL["pitreport"])

table num : 2
[0]--------------------------------------------------


Unnamed: 0,レース,レース.1,1R,2R,3R,4R,5R,6R,7R,8R,9R,10R,11R,12R
0,締切予定時刻,締切予定時刻,10:35,11:03,11:31,11:59,12:27,12:55,13:24,13:53,14:26,15:00,15:34,16:15



[1]--------------------------------------------------


Unnamed: 0_level_0,枠,ボートレーサー,ボートレーサー,ピットレポート,前走 結果
Unnamed: 0_level_1,枠,写真,登録番号/級別 氏名 支部/出身地 年齢/体重,ピットレポート,前走 結果
0,1,,3415 / A1 松井 繁 大阪/大阪 53歳/52.0kg,足はいいと思ったし納得してる。短期決戦でレースに集中する方向で臨めた。いい緊張感を持って全力...,
1,2,,4686 / A1 丸野 一樹 滋賀/京都 31歳/52.0kg,しっかりとした足になっていると思います。出足に力強さがあります。ゾーンに入っていて、納得して...,





### 出馬表

#### 確認用コード

In [54]:
SAMPLE_URL["racelist"]

'https://www.boatrace.jp/owpc/pc/race/racelist?rno=12&jcd=11&hd=20230115'

In [55]:
soup = get_beautiful_soup(SAMPLE_URL["racelist"])

In [56]:
soup.find("h3").text.split()

['決勝戦', '1800m']

In [57]:
table_soup = soup.find("div", class_="table1 is-tableFixed__3rdadd").find("table")

In [58]:
tr_soups = table_soup.find("tbody").find_all("tr")

In [59]:
td_soups = tr_soups[0].find_all("td")

In [60]:
"""
TEST = {
    "racer": {
        "id", "class", "name", "branch", "birthplace", "age", "weight"
    }
}
"""

'\nTEST = {\n    "racer": {\n        "id", "class", "name", "branch", "birthplace", "age", "weight"\n    }\n}\n'

In [61]:
"""
class Header:
    name: str
    datatype: str   # text or attr


td_soups[:5]
"""

'\nclass Header:\n    name: str\n    datatype: str   # text or attr\n\n\ntd_soups[:5]\n'

#### 読み込み処理実装

In [62]:
"""
def get_racer_info(tbody_soup) -> dict:

    td_soups = tbody_soup.find_all("td")

    racer_data = string_split(td_soups[2].text) + string_split(td_soups[3].text)
    wide_data = string_split(td_soups[4].text)
    local_data = string_split(td_soups[5].text)
    motor_data = string_split(td_soups[6].text)
    boat_data = string_split(td_soups[7].text)

    data = {}
    data["frame"] = int(td_soups[0].text)

    data["racer-id"] = int(racer_data[0])
    data["racer-class"] = racer_data[1]
    data["racer-name"] = racer_data[2]
    data["racer-branch"] = racer_data[3]
    data["racer-birthplace"] = racer_data[4]
    data["racer-age"] = int(racer_data[5].replace("歳", ""))
    data["racer-weight"] = float(racer_data[6].replace("kg", ""))

    data["racer-fn"] = racer_data[7]
    data["racer-ln"] = racer_data[8]
    data["racer-avgst"] = float(racer_data[9])

    data["wide-1rate"] = float(wide_data[0])
    data["wide-2rate"] = float(wide_data[1])
    data["wide-3rate"] = float(wide_data[2])

    data["locale-1rate"] = float(local_data[0])
    data["locale-2rate"] = float(local_data[1])
    data["locale-3rate"] = float(local_data[2])

    data["motor-no"] = int(motor_data[0])
    data["motor-2rate"] = float(motor_data[1])
    data["motor-3rate"] = float(motor_data[2])

    data["boat-no"] = int(boat_data[0])
    data["boat-2rate"] = float(boat_data[1])
    data["boat-3rate"] = float(boat_data[2])

    return data
"""

"""
tbody_soups = table_soup.find_all("tbody", class_="is-fs12")

race_infos = [get_racer_info(tbody_soup) for tbody_soup in tbody_soups]

pd.DataFrame(race_infos).head(3)
"""

'\ntbody_soups = table_soup.find_all("tbody", class_="is-fs12")\n\nrace_infos = [get_racer_info(tbody_soup) for tbody_soup in tbody_soups]\n\npd.DataFrame(race_infos).head(3)\n'

In [63]:
def get_racer_info(tbody_soups) -> list:

    datas = []
    for tbody_soup in tbody_soups:

        td_soups = tbody_soup.find_all("td")

        racer_data = string_split(td_soups[2].text) + string_split(td_soups[3].text)
        wide_data = string_split(td_soups[4].text)
        local_data = string_split(td_soups[5].text)
        motor_data = string_split(td_soups[6].text)
        boat_data = string_split(td_soups[7].text)

        frame = int(td_soups[0].text)

        racer_id = int(racer_data[0])
        racer_class = racer_data[1]
        racer_name = racer_data[2]
        racer_branch = racer_data[3]
        racer_birthplace = racer_data[4]
        racer_age = int(racer_data[5].replace("歳", ""))
        racer_weight = float(racer_data[6].replace("kg", ""))

        racer_fn = racer_data[7]
        racer_ln = racer_data[8]
        racer_avgst = float(racer_data[9])

        wide_1rate = float(wide_data[0])
        wide_2rate = float(wide_data[1])
        wide_3rate = float(wide_data[2])

        locale_1rate = float(local_data[0])
        locale_2rate = float(local_data[1])
        locale_3rate = float(local_data[2])

        motor_no = int(motor_data[0])
        motor_2rate = float(motor_data[1])
        motor_3rate = float(motor_data[2])

        boat_no = int(boat_data[0])
        boat_2rate = float(boat_data[1])
        boat_3rate = float(boat_data[2])

        day_labels = [
            "day1-1st", "day1-2nd",
            "day2-1st", "day2-2nd",
            "day3-1st", "day3-2nd",
            "day4-1st", "day4-2nd",
            "day5-1st", "day5-2nd",
            "day6-1st", "day6-2nd",
            "day7-1st", "day7-2nd",
        ]
        max_result = len(day_labels)
        results = {}
        for idx, day_label in enumerate(day_labels):
            race_no = td_soups[9 + idx + max_result * 0].text.strip()
            entry_no = td_soups[9 + idx + max_result * 1].text.strip()
            st_timing = td_soups[9 + idx + max_result * 2].text.strip()
            rank = td_soups[9 + idx + max_result * 3].text.strip()

            race_no = race_no if not race_no else int(race_no)
            entry_no = entry_no if not entry_no else int(entry_no)
            st_timing = st_timing if not st_timing else float(st_timing)
            rank = rank if not rank else int(rank)

            results[f"{day_label}-race-no"] = race_no
            results[f"{day_label}-entry-no"] = entry_no
            results[f"{day_label}-st-timing"] = st_timing
            results[f"{day_label}-rank"] = rank

        datas.append({
            "frame": frame,
            "racer-id": racer_id,
            "racer-class": racer_class,
            "racer-name": racer_name,
            "racer-branch": racer_branch,
            "racer-birthplace": racer_birthplace,
            "racer-age": racer_age,
            "racer-weight": racer_weight,
            "racer-fn": racer_fn,
            "racer-ln": racer_ln,
            "racer-avgst": racer_avgst,
            "wide-1rate": wide_1rate,
            "wide-2rate": wide_2rate,
            "wide-3rate": wide_3rate,
            "locale-1rate": locale_1rate,
            "locale-2rate": locale_2rate,
            "locale-3rate": locale_3rate,
            "motor-no": motor_no,
            "motor-2rate": motor_2rate,
            "motor-3rate": motor_3rate,
            "boat-no": boat_no,
            "boat-2rate": boat_2rate,
            "boat-3rate": boat_3rate,
            **results,
        })

    return datas

In [64]:
tbody_soups = table_soup.find_all("tbody", class_="is-fs12")

race_infos = get_racer_info(tbody_soups)

pd.DataFrame(race_infos).head(3)

Unnamed: 0,frame,racer-id,racer-class,racer-name,racer-branch,racer-birthplace,racer-age,racer-weight,racer-fn,racer-ln,...,day6-2nd-st-timing,day6-2nd-rank,day7-1st-race-no,day7-1st-entry-no,day7-1st-st-timing,day7-1st-rank,day7-2nd-race-no,day7-2nd-entry-no,day7-2nd-st-timing,day7-2nd-rank
0,1,3415,A1,松井繁,大阪,大阪,53,52.0,F0,L0,...,,,,,,,,,,
1,2,4686,A1,丸野一樹,滋賀,京都,31,52.0,F1,L0,...,,,,,,,,,,
2,3,4719,A1,上條暢嵩,大阪,大阪,29,52.0,F0,L0,...,,,,,,,,,,


### コース情報

https://www.boatrace.jp/owpc/pc/data/stadium?jcd=01

https://www.boatrace.jp/owpc/pc/data/stadium?jcd=02

In [153]:
URL = "https://www.boatrace.jp/owpc/pc/extra/data/stadium/index.html"

In [154]:
soup = get_beautiful_soup(URL)

In [155]:
div_soups = soup.find_all("div", class_="tableType4")

In [180]:
tr_soups = []
for div_soup in div_soups:
    tr_soups += div_soup.find("tbody").find_all("tr")

for tr_soup in tr_soups:
    course_name = tr_soup.find("img").get("alt").replace("BOATRACE", "")
    course_url = tr_soup.find("a").get("href")
    couruse_no = course_url.split("=")[-1]

    print((course_name, course_url, couruse_no))

('桐生', '/owpc/pc/data/stadium?jcd=01', '01')
('戸田', '/owpc/pc/data/stadium?jcd=02', '02')
('江戸川', '/owpc/pc/data/stadium?jcd=03', '03')
('平和島', '/owpc/pc/data/stadium?jcd=04', '04')
('多摩川', '/owpc/pc/data/stadium?jcd=05', '05')
('浜名湖', '/owpc/pc/data/stadium?jcd=06', '06')
('蒲郡', '/owpc/pc/data/stadium?jcd=07', '07')
('常滑', '/owpc/pc/data/stadium?jcd=08', '08')
('津', '/owpc/pc/data/stadium?jcd=09', '09')
('三国', '/owpc/pc/data/stadium?jcd=10', '10')
('びわこ', '/owpc/pc/data/stadium?jcd=11', '11')
('住之江', '/owpc/pc/data/stadium?jcd=12', '12')
('尼崎', '/owpc/pc/data/stadium?jcd=13', '13')
('鳴門', '/owpc/pc/data/stadium?jcd=14', '14')
('丸亀', '/owpc/pc/data/stadium?jcd=15', '15')
('児島', '/owpc/pc/data/stadium?jcd=16', '16')
('宮島', '/owpc/pc/data/stadium?jcd=17', '17')
('徳山', '/owpc/pc/data/stadium?jcd=18', '18')
('下関', '/owpc/pc/data/stadium?jcd=19', '19')
('若松', '/owpc/pc/data/stadium?jcd=20', '20')
('芦屋', '/owpc/pc/data/stadium?jcd=21', '21')
('福岡', '/owpc/pc/data/stadium?jcd=22', '22')
('唐津'

In [182]:
URL = "https://www.boatrace.jp/owpc/pc/data/stadium?jcd=01"

In [200]:
soup = get_beautiful_soup(URL)

# --------------------------------------------------

table01_soup = soup.find("table", class_="is-w748")
table01_tbody_soups = table01_soup.find_all("tbody")

table01_datatypes = [
        ("course", int),
        ("1st", float),
        ("2nd", float),
        ("3rd", float),
        ("4th", float),
        ("5th", float),
        ("6th", float),
        ("wintric-eacape", float),           # 逃げ
        ("wintric-turnover", float),         # 捲り
        ("wintric-insert", float),           # 差し
        ("wintric-turnover-insert", float),  # 捲り差し
        ("wintric-overtake", float),         # 抜き
        ("wintric-blessed", float),          # 恵まれ
]

table01_datas = []
for table01_tbody_soup in table01_tbody_soups:
    td_soups = table01_tbody_soup.find("tr").find_all("td")

    data = {datatype[0]: datatype[1](td_soup.text) for datatype, td_soup in zip(table01_datatypes, td_soups)}

    table01_datas.append(data)

#table01_df = pd.DataFrame(table01_datas)
#display(table01_df)

# --------------------------------------------------

table02_soup = soup.find("table", class_="is-w413")
table02_tbody_soups = table02_soup.find_all("tbody")

table02_datatypes = [
        ("frame", int),
        ("course1", float),
        ("course2", float),
        ("course3", float),
        ("course4", float),
        ("course5", float),
        ("course6", float),
]

table02_datas = []
for table02_tbody_soup in table02_tbody_soups:
    td_soups = table02_tbody_soup.find("tr").find_all("td")

    data = {datatype[0]: datatype[1](td_soup.text) for datatype, td_soup in zip(table02_datatypes, td_soups)}

    table02_datas.append(data)

table02_df = pd.DataFrame(table02_datas)
display(table02_df)

# --------------------------------------------------

season_table_soups = soup.find_all("table", class_="is-w358")

season_datatypes = [
        ("course", int),
        ("1st", float),
        ("2nd", float),
        ("3rd", float),
        ("4th", float),
        ("5th", float),
        ("6th", float),
]

# --------------------------------------------------

frame_soup = soup.find("div", class_="frame10_inner")

dt_soups = frame_soup.find_all("dt")
dd_soups = frame_soup.find_all("dd")

memo_labels = [
            "place",
            "motor",
            "water-quality",
            "tidal-diff",
            "record"
        ]

memo_datas = {memo_label: dd_soup.text for memo_label, dd_soup in zip(memo_labels, dd_soups)}
memo_datas


Unnamed: 0,frame,course1,course2,course3,course4,course5,course6
0,1,98.4,1.3,0.0,0.0,0.1,0.0
1,2,0.7,89.0,9.4,0.1,0.0,0.5
2,3,0.3,3.6,86.6,7.2,1.3,0.7
3,4,0.1,2.2,2.2,85.0,6.8,3.4
4,5,0.1,1.7,0.9,5.7,80.5,10.8
5,6,0.0,2.0,1.1,1.8,11.5,83.3


{'place': '群馬県',
 'motor': '減音',
 'water-quality': '淡水',
 'tidal-diff': 'なし',
 'record': '1.42.8 石田\u3000章央 2004/10/27'}

In [202]:
type(int)

type

### オッズ

#### 3連単

##### 確認用コード

In [65]:
SAMPLE_URL["odds"]["trifecta"]

'https://www.boatrace.jp/owpc/pc/race/odds3t?rno=12&jcd=11&hd=20230115'

In [66]:
soup = get_beautiful_soup(SAMPLE_URL["odds"]["trifecta"])

In [67]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

In [68]:
tr_soups = tbody_soup.find_all("tr")

In [69]:
td_soups = tbody_soup.find_all("td")

data = []

N_HEAD_ITEMS = 18
N_LINE_ITEMS = 12

for no_1st in range(1, 7):
    idx_head_line = (no_1st - 1) * 3

    margin_2nd = N_HEAD_ITEMS + N_LINE_ITEMS * 3
    margin_row = no_1st

    for iter_block in range(5):

        idx_2nd = idx_head_line + iter_block * margin_2nd
        no_2nd = td_soups[idx_2nd].text

        idx_3rd = idx_2nd + 1
        for iter_line in range(4):
            no_3rd = td_soups[idx_3rd].text
            odds = td_soups[idx_3rd + 1].text

            data.append({
                "boat-no": [int(no_1st), int(no_2nd), int(no_3rd)],
                "odds": float(odds)
            })

            margin_3rd = (N_HEAD_ITEMS - margin_row) if iter_line == 0 else N_LINE_ITEMS
            idx_3rd += margin_3rd

data

[{'boat-no': [1, 2, 3], 'odds': 8.2},
 {'boat-no': [1, 2, 4], 'odds': 8.3},
 {'boat-no': [1, 2, 5], 'odds': 22.5},
 {'boat-no': [1, 2, 6], 'odds': 12.8},
 {'boat-no': [1, 3, 2], 'odds': 13.0},
 {'boat-no': [1, 3, 4], 'odds': 12.2},
 {'boat-no': [1, 3, 5], 'odds': 27.5},
 {'boat-no': [1, 3, 6], 'odds': 20.9},
 {'boat-no': [1, 4, 2], 'odds': 15.6},
 {'boat-no': [1, 4, 3], 'odds': 19.4},
 {'boat-no': [1, 4, 5], 'odds': 34.8},
 {'boat-no': [1, 4, 6], 'odds': 23.4},
 {'boat-no': [1, 5, 2], 'odds': 50.5},
 {'boat-no': [1, 5, 3], 'odds': 56.6},
 {'boat-no': [1, 5, 4], 'odds': 64.5},
 {'boat-no': [1, 5, 6], 'odds': 61.6},
 {'boat-no': [1, 6, 2], 'odds': 37.1},
 {'boat-no': [1, 6, 3], 'odds': 55.5},
 {'boat-no': [1, 6, 4], 'odds': 51.4},
 {'boat-no': [1, 6, 5], 'odds': 86.4},
 {'boat-no': [2, 1, 3], 'odds': 30.8},
 {'boat-no': [2, 1, 4], 'odds': 28.4},
 {'boat-no': [2, 1, 5], 'odds': 55.9},
 {'boat-no': [2, 1, 6], 'odds': 32.8},
 {'boat-no': [2, 3, 1], 'odds': 92.9},
 {'boat-no': [2, 3, 4], 'od

##### 読み込み処理実装

In [70]:
def get_trifecta_odds(tbody_soup) -> list:
    RACER_SIZE = 6
    TABLE_ROW_SIZE = 5
    BLOCK_LINE_SIZE = 4
    N_HEAD_ITEMS = 18
    N_LINE_ITEMS = 12

    td_soups = tbody_soup.find_all("td")

    data = []
    for no_1st in range(1, RACER_SIZE + 1):
        idx_head_line = (no_1st - 1) * 3

        margin_2nd = N_HEAD_ITEMS + N_LINE_ITEMS * (BLOCK_LINE_SIZE - 1)
        margin_row = no_1st

        for iter_block in range(TABLE_ROW_SIZE):

            idx_2nd = idx_head_line + iter_block * margin_2nd
            no_2nd = td_soups[idx_2nd].text.strip()

            idx_3rd = idx_2nd + 1
            for iter_line in range(BLOCK_LINE_SIZE):
                no_3rd = td_soups[idx_3rd].text.strip()
                value = td_soups[idx_3rd + 1].text.strip()

                data.append({
                    "boad-no": [int(no_1st), int(no_2nd), int(no_3rd)],
                    "odds": float(value)
                })
                """
                data.append({
                    "1st": int(no_1st),
                    "2nd": int(no_2nd),
                    "3rd": int(no_3rd),
                    "odds": float(value)
                })
                """

                margin_3rd = (N_HEAD_ITEMS - margin_row) if iter_line == 0 else N_LINE_ITEMS
                idx_3rd += margin_3rd
    return data

In [71]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

trifecta_odds = get_trifecta_odds(tbody_soup)
trifecta_odds_df = pd.DataFrame(trifecta_odds)
trifecta_odds_df.head(10)

Unnamed: 0,boad-no,odds
0,"[1, 2, 3]",8.2
1,"[1, 2, 4]",8.3
2,"[1, 2, 5]",22.5
3,"[1, 2, 6]",12.8
4,"[1, 3, 2]",13.0
5,"[1, 3, 4]",12.2
6,"[1, 3, 5]",27.5
7,"[1, 3, 6]",20.9
8,"[1, 4, 2]",15.6
9,"[1, 4, 3]",19.4


##### 確認用コード(改良)

In [72]:
SAMPLE_URL["odds"]["trifecta"]

'https://www.boatrace.jp/owpc/pc/race/odds3t?rno=12&jcd=11&hd=20230115'

In [73]:
soup = get_beautiful_soup(SAMPLE_URL["odds"]["trifecta"])

In [74]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

In [75]:
td_soups = tbody_soup.find_all("td")

In [76]:
len(td_soups)

270

In [77]:
td_soups = tbody_soup.find_all("td")
line_td_soups = split_list(td_soups, 6 - 1)

In [78]:
data = []

for line_td_soup in line_td_soups:

    fs14_td_soups = [td_soup for td_soup in line_td_soup if "is-fs14" in td_soup.attrs["class"]]
    td_soups = [td_soup for td_soup in line_td_soup if "is-fs14" not in td_soup.attrs["class"]]

    cell_td_soups = split_list(td_soups, len(td_soups) // 2)
    block_td_soups = [[td_soup for idx, td_soup in enumerate(cell_td_soups) if idx % 6 == block] for block in range(6)]

    for idx_1st, (soup_2nd, soup_3rds) in enumerate(zip(fs14_td_soups, block_td_soups)):
        no_1st = idx_1st + 1
        no_2nd = soup_2nd.text.strip()
        for no_soup, data_soup in soup_3rds:
            no_3rd = no_soup.text.strip()
            odds = data_soup.text.strip()

            data.append({
                "boad-no": [int(no_1st), int(no_2nd), int(no_3rd)],
                "odds": float(odds)
            })

data

[{'boad-no': [1, 2, 3], 'odds': 8.2},
 {'boad-no': [1, 2, 4], 'odds': 8.3},
 {'boad-no': [1, 2, 5], 'odds': 22.5},
 {'boad-no': [1, 2, 6], 'odds': 12.8},
 {'boad-no': [2, 1, 3], 'odds': 30.8},
 {'boad-no': [2, 1, 4], 'odds': 28.4},
 {'boad-no': [2, 1, 5], 'odds': 55.9},
 {'boad-no': [2, 1, 6], 'odds': 32.8},
 {'boad-no': [3, 1, 2], 'odds': 84.5},
 {'boad-no': [3, 1, 4], 'odds': 83.7},
 {'boad-no': [3, 1, 5], 'odds': 141.2},
 {'boad-no': [3, 1, 6], 'odds': 110.2},
 {'boad-no': [4, 1, 2], 'odds': 111.9},
 {'boad-no': [4, 1, 3], 'odds': 165.3},
 {'boad-no': [4, 1, 5], 'odds': 203.7},
 {'boad-no': [4, 1, 6], 'odds': 149.2},
 {'boad-no': [5, 1, 2], 'odds': 244.8},
 {'boad-no': [5, 1, 3], 'odds': 347.7},
 {'boad-no': [5, 1, 4], 'odds': 373.4},
 {'boad-no': [5, 1, 6], 'odds': 319.2},
 {'boad-no': [6, 1, 2], 'odds': 176.2},
 {'boad-no': [6, 1, 3], 'odds': 433.5},
 {'boad-no': [6, 1, 4], 'odds': 325.2},
 {'boad-no': [6, 1, 5], 'odds': 522.8},
 {'boad-no': [1, 3, 2], 'odds': 13.0},
 {'boad-no': 

##### 読み込み処理実装(改良)

In [79]:
def get_trifecta_odds(tbody_soup) -> list:

    RACER_SIZE = 6

    td_soups = tbody_soup.find_all("td")
    line_td_soups = split_list(td_soups, RACER_SIZE - 1)

    data = []
    for line_td_soup in line_td_soups:

        fs14_td_soups = [td_soup for td_soup in line_td_soup if "is-fs14" in td_soup.attrs["class"]]
        td_soups = [td_soup for td_soup in line_td_soup if "is-fs14" not in td_soup.attrs["class"]]

        cell_td_soups = split_list(td_soups, len(td_soups) // 2)
        block_td_soups = [[td_soup for idx, td_soup in enumerate(cell_td_soups) if idx % RACER_SIZE == block] for block in range(6)]

        for idx_1st, (soup_2nd, soup_3rds) in enumerate(zip(fs14_td_soups, block_td_soups)):
            no_1st = idx_1st + 1
            no_2nd = soup_2nd.text.strip()
            for no_soup, data_soup in soup_3rds:
                no_3rd = no_soup.text.strip()
                odds = data_soup.text.strip()

                data.append({
                    "boad-no": [int(no_1st), int(no_2nd), int(no_3rd)],
                    "odds": float(odds)
                })
    return data

In [80]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

trifecta_odds = get_trifecta_odds(tbody_soup)
trifecta_odds_df = pd.DataFrame(trifecta_odds)
trifecta_odds_df.head(10)

Unnamed: 0,boad-no,odds
0,"[1, 2, 3]",8.2
1,"[1, 2, 4]",8.3
2,"[1, 2, 5]",22.5
3,"[1, 2, 6]",12.8
4,"[2, 1, 3]",30.8
5,"[2, 1, 4]",28.4
6,"[2, 1, 5]",55.9
7,"[2, 1, 6]",32.8
8,"[3, 1, 2]",84.5
9,"[3, 1, 4]",83.7


#### 3連複

##### 確認用コード

In [81]:
SAMPLE_URL["odds"]["trio"]

'https://www.boatrace.jp/owpc/pc/race/odds3f?rno=12&jcd=11&hd=20230115'

In [82]:
soup = get_beautiful_soup(SAMPLE_URL["odds"]["trio"])

In [83]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

In [84]:
tr_soups = tbody_soup.find_all("tr")

In [85]:
td_soups = tbody_soup.find_all("td")


RACER_SIZE = 6
TABLE_ROW_SIZE = 4
N_HEAD_ITEMS = 18
N_LINE_ITEMS = 12

data = []

for no_1st in range(1, RACER_SIZE + 1):
    idx_head_line = (no_1st - 1) * 3
    margin_row = no_1st

    margin_2nd = 0
    for iter_block in range(TABLE_ROW_SIZE):
        block_line_size = TABLE_ROW_SIZE - iter_block
        idx_2nd = idx_head_line + margin_2nd
        no_2nd = td_soups[idx_2nd].text.strip()

        idx_3rd = idx_2nd + 1
        for iter_line in range(block_line_size):
            no_3rd = td_soups[idx_3rd].text.strip()
            odds = td_soups[idx_3rd + 1].text.strip()

            if odds:
                """
                for pick_1, pick_2, pick_3 in itertools.permutations([no_1st, no_2nd, no_3rd]):
                    data.append({
                        "pick1": int(pick_1),
                        "pick2": int(pick_2),
                        "pick3": int(pick_3),
                        "odds": float(value),
                    })
                """
                data.append({
                    "boat-no": set([int(no_1st), int(no_2nd), int(no_3rd)]),
                    "odds": float(odds),
                })

            margin_2nd += N_HEAD_ITEMS if iter_line == 0 else N_LINE_ITEMS
            margin_3rd = (N_HEAD_ITEMS - margin_row) if iter_line == 0 else N_LINE_ITEMS
            idx_3rd += margin_3rd


data

[{'boat-no': {1, 2, 3}, 'odds': 4.2},
 {'boat-no': {1, 2, 4}, 'odds': 4.6},
 {'boat-no': {1, 2, 5}, 'odds': 12.4},
 {'boat-no': {1, 2, 6}, 'odds': 7.0},
 {'boat-no': {1, 3, 4}, 'odds': 6.9},
 {'boat-no': {1, 3, 5}, 'odds': 18.0},
 {'boat-no': {1, 3, 6}, 'odds': 13.7},
 {'boat-no': {1, 4, 5}, 'odds': 18.1},
 {'boat-no': {1, 4, 6}, 'odds': 12.7},
 {'boat-no': {1, 5, 6}, 'odds': 25.3},
 {'boat-no': {2, 3, 4}, 'odds': 29.7},
 {'boat-no': {2, 3, 5}, 'odds': 62.5},
 {'boat-no': {2, 3, 6}, 'odds': 50.2},
 {'boat-no': {2, 4, 5}, 'odds': 49.6},
 {'boat-no': {2, 4, 6}, 'odds': 33.7},
 {'boat-no': {2, 5, 6}, 'odds': 58.7},
 {'boat-no': {3, 4, 5}, 'odds': 64.3},
 {'boat-no': {3, 4, 6}, 'odds': 52.1},
 {'boat-no': {3, 5, 6}, 'odds': 94.6},
 {'boat-no': {4, 5, 6}, 'odds': 31.0}]

##### 読み込み処理実装

In [86]:
def get_trio_odds(tbody_soup) -> list:
    RACER_SIZE = 6
    TABLE_ROW_SIZE = 4
    N_HEAD_ITEMS = 18
    N_LINE_ITEMS = 12

    td_soups = tbody_soup.find_all("td")

    data = []
    for no_1st in range(1, RACER_SIZE + 1):
        idx_head_line = (no_1st - 1) * 3
        margin_row = no_1st

        margin_2nd = 0
        for iter_block in range(TABLE_ROW_SIZE):
            block_line_size = TABLE_ROW_SIZE - iter_block
            idx_2nd = idx_head_line + margin_2nd
            no_2nd = td_soups[idx_2nd].text.strip()

            idx_3rd = idx_2nd + 1
            for iter_line in range(block_line_size):
                no_3rd = td_soups[idx_3rd].text.strip()
                value = td_soups[idx_3rd + 1].text.strip()

                if value:
                    data.append({
                        "boat-no": set([int(no_1st), int(no_2nd), int(no_3rd)]),
                        "odds": float(value),
                    })
                    """
                    for pick_1, pick_2, pick_3 in itertools.permutations([no_1st, no_2nd, no_3rd]):
                        data.append({
                            "pick1": int(pick_1),
                            "pick2": int(pick_2),
                            "pick3": int(pick_3),
                            "odds": float(value),
                        })
                    """


                margin_2nd += N_HEAD_ITEMS if iter_line == 0 else N_LINE_ITEMS
                margin_3rd = (N_HEAD_ITEMS - margin_row) if iter_line == 0 else N_LINE_ITEMS
                idx_3rd += margin_3rd
    return data

In [87]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

trio_odds = get_trio_odds(tbody_soup)
trio_odds_df = pd.DataFrame(trio_odds)
trio_odds_df.head(10)

Unnamed: 0,boat-no,odds
0,"{1, 2, 3}",4.2
1,"{1, 2, 4}",4.6
2,"{1, 2, 5}",12.4
3,"{1, 2, 6}",7.0
4,"{1, 3, 4}",6.9
5,"{1, 3, 5}",18.0
6,"{1, 3, 6}",13.7
7,"{1, 4, 5}",18.1
8,"{1, 4, 6}",12.7
9,"{1, 5, 6}",25.3


##### 確認用コード(改良)

In [88]:
SAMPLE_URL["odds"]["trio"]

'https://www.boatrace.jp/owpc/pc/race/odds3f?rno=12&jcd=11&hd=20230115'

In [89]:
soup = get_beautiful_soup(SAMPLE_URL["odds"]["trio"])

In [90]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

In [91]:
tr_soups = tbody_soup.find_all("tr")
td_soups = tbody_soup.find_all("td")

In [92]:
len(td_soups)

144

In [93]:
td_soups = tbody_soup.find_all("td")

group_no_list = []
for group_no, idx in enumerate(range(3, -1, -1)):
    n_items = 18 + 12 * idx
    group_no_list.extend([group_no] * n_items)

line_td_soups = [[] for _ in range(4)]
for group_no, td_soup in zip(group_no_list, td_soups):
    line_td_soups[group_no].append(td_soup)

In [94]:
line_td_soups[-1]

[<td class="is-fs14 is-boatColor5 is-borderLeftNone">5</td>,
 <td class="is-boatColor5">6</td>,
 <td class="oddsPoint">25.3</td>,
 <td class="is-fs14 is-boatColor5">5</td>,
 <td class="is-boatColor5">6</td>,
 <td class="oddsPoint">58.7</td>,
 <td class="is-fs14 is-boatColor5">5</td>,
 <td class="is-boatColor5">6</td>,
 <td class="oddsPoint">94.6</td>,
 <td class="is-fs14 is-boatColor5">5</td>,
 <td class="is-boatColor5">6</td>,
 <td class="oddsPoint">31.0</td>,
 <td class="is-disabled"> </td>,
 <td class="is-disabled"> </td>,
 <td class="is-disabled"> </td>,
 <td class="is-disabled"> </td>,
 <td class="is-disabled"> </td>,
 <td class="is-disabled"> </td>]

In [95]:
data = []

for idx_line, line_td_soup in enumerate(line_td_soups):
    n_block = idx_line + 1
    fs14_td_soups = [td_soup for td_soup in line_td_soup if "is-fs14" in td_soup.attrs["class"]]
    td_soups = [td_soup for td_soup in line_td_soup if "is-fs14" not in td_soup.attrs["class"] and "is-disabled"  not in td_soup.attrs["class"]]

    cell_td_soups = split_list(td_soups, len(td_soups) // 2)
    block_td_soups = [[td_soup for idx, td_soup in enumerate(cell_td_soups) if idx % n_block == block] for block in range(n_block)]

    for idx_1st, (soup_2nd, soup_3rds) in enumerate(zip(fs14_td_soups, block_td_soups)):
        no_1st = idx_1st + 1
        no_2nd = soup_2nd.text.strip()
        for no_soup, data_soup in soup_3rds:
            no_3rd = no_soup.text.strip()
            odds = data_soup.text.strip()

            data.append({
                "boad-no": set([int(no_1st), int(no_2nd), int(no_3rd)]),
                "odds": float(odds)
            })

data

[{'boad-no': {1, 2, 3}, 'odds': 4.2},
 {'boad-no': {1, 2, 4}, 'odds': 4.6},
 {'boad-no': {1, 2, 5}, 'odds': 12.4},
 {'boad-no': {1, 2, 6}, 'odds': 7.0},
 {'boad-no': {1, 3, 4}, 'odds': 6.9},
 {'boad-no': {1, 3, 5}, 'odds': 18.0},
 {'boad-no': {1, 3, 6}, 'odds': 13.7},
 {'boad-no': {2, 3, 4}, 'odds': 29.7},
 {'boad-no': {2, 3, 5}, 'odds': 62.5},
 {'boad-no': {2, 3, 6}, 'odds': 50.2},
 {'boad-no': {1, 4, 5}, 'odds': 18.1},
 {'boad-no': {1, 4, 6}, 'odds': 12.7},
 {'boad-no': {2, 4, 5}, 'odds': 49.6},
 {'boad-no': {2, 4, 6}, 'odds': 33.7},
 {'boad-no': {3, 4, 5}, 'odds': 64.3},
 {'boad-no': {3, 4, 6}, 'odds': 52.1},
 {'boad-no': {1, 5, 6}, 'odds': 25.3},
 {'boad-no': {2, 5, 6}, 'odds': 58.7},
 {'boad-no': {3, 5, 6}, 'odds': 94.6},
 {'boad-no': {4, 5, 6}, 'odds': 31.0}]

##### 読み込み処理実装(改良)

In [96]:
def get_trio_odds(tbody_soup) -> list:
    N_HEAD_ITEMS = 18
    N_LINE_ITEMS = 12
    TABLE_LINE_ROW_SIZE = 4

    td_soups = tbody_soup.find_all("td")

    group_no_list = []
    for group_no, idx in enumerate(reversed(range(TABLE_LINE_ROW_SIZE))):
        n_items = N_HEAD_ITEMS + N_LINE_ITEMS * idx
        group_no_list.extend([group_no] * n_items)

    line_td_soups = [[] for _ in range(TABLE_LINE_ROW_SIZE)]
    for group_no, td_soup in zip(group_no_list, td_soups):
        line_td_soups[group_no].append(td_soup)

    data = []
    for idx_line, line_td_soup in enumerate(line_td_soups):
        n_block = idx_line + 1
        fs14_td_soups = [td_soup for td_soup in line_td_soup if "is-fs14" in td_soup.attrs["class"]]
        td_soups = [td_soup for td_soup in line_td_soup if "is-fs14" not in td_soup.attrs["class"] and "is-disabled"  not in td_soup.attrs["class"]]

        cell_td_soups = split_list(td_soups, len(td_soups) // 2)
        block_td_soups = [[td_soup for idx, td_soup in enumerate(cell_td_soups) if idx % n_block == block] for block in range(n_block)]

        for idx_1st, (soup_2nd, soup_3rds) in enumerate(zip(fs14_td_soups, block_td_soups)):
            no_1st = idx_1st + 1
            no_2nd = soup_2nd.text.strip()
            for no_soup, data_soup in soup_3rds:
                no_3rd = no_soup.text.strip()
                odds = data_soup.text.strip()

                data.append({
                    "boad-no": set([int(no_1st), int(no_2nd), int(no_3rd)]),
                    "odds": float(odds)
                })
    return data

In [97]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

trio_odds = get_trio_odds(tbody_soup)
trio_odds_df = pd.DataFrame(trio_odds)
trio_odds_df.head(10)

Unnamed: 0,boad-no,odds
0,"{1, 2, 3}",4.2
1,"{1, 2, 4}",4.6
2,"{1, 2, 5}",12.4
3,"{1, 2, 6}",7.0
4,"{1, 3, 4}",6.9
5,"{1, 3, 5}",18.0
6,"{1, 3, 6}",13.7
7,"{2, 3, 4}",29.7
8,"{2, 3, 5}",62.5
9,"{2, 3, 6}",50.2


#### 2連単・2連複

##### 確認用コード

In [98]:
SAMPLE_URL["odds"]["exacta-and-quinella"]

'https://www.boatrace.jp/owpc/pc/race/odds2tf?rno=12&jcd=11&hd=20230115'

In [99]:
soup = get_beautiful_soup(SAMPLE_URL["odds"]["exacta-and-quinella"])

In [100]:
exacta_tbody_soup, quinella_tbody_soup = soup.find_all("tbody", class_="is-p3-0")

In [101]:
data = []

exacta_tr_soups = exacta_tbody_soup.find_all("tr")
for exacta_tr_soup in exacta_tr_soups:
    exacta_td_soups = exacta_tr_soup.find_all("td")
    for idx_1st in range(6):
        no_1st = idx_1st + 1
        no_2nd = exacta_td_soups[idx_1st * 2].text.strip()
        odds = exacta_td_soups[idx_1st * 2 + 1].text.strip()
        data.append({
            "boat-no": [int(no_1st), int(no_2nd)],
            "odds": float(odds)
        })

data

[{'boat-no': [1, 2], 'odds': 2.9},
 {'boat-no': [2, 1], 'odds': 9.2},
 {'boat-no': [3, 1], 'odds': 24.1},
 {'boat-no': [4, 1], 'odds': 30.4},
 {'boat-no': [5, 1], 'odds': 58.9},
 {'boat-no': [6, 1], 'odds': 55.8},
 {'boat-no': [1, 3], 'odds': 4.2},
 {'boat-no': [2, 3], 'odds': 46.0},
 {'boat-no': [3, 2], 'odds': 75.1},
 {'boat-no': [4, 2], 'odds': 66.7},
 {'boat-no': [5, 2], 'odds': 102.1},
 {'boat-no': [6, 2], 'odds': 113.4},
 {'boat-no': [1, 4], 'odds': 5.7},
 {'boat-no': [2, 4], 'odds': 43.0},
 {'boat-no': [3, 4], 'odds': 69.2},
 {'boat-no': [4, 3], 'odds': 92.5},
 {'boat-no': [5, 3], 'odds': 157.1},
 {'boat-no': [6, 3], 'odds': 176.9},
 {'boat-no': [1, 5], 'odds': 15.4},
 {'boat-no': [2, 5], 'odds': 66.5},
 {'boat-no': [3, 5], 'odds': 107.5},
 {'boat-no': [4, 5], 'odds': 89.5},
 {'boat-no': [5, 4], 'odds': 145.2},
 {'boat-no': [6, 4], 'odds': 128.7},
 {'boat-no': [1, 6], 'odds': 14.1},
 {'boat-no': [2, 6], 'odds': 55.9},
 {'boat-no': [3, 6], 'odds': 97.2},
 {'boat-no': [4, 6], 'odd

In [102]:
data = []

quinella_tr_soups = quinella_tbody_soup.find_all("tr")
for quinella_tr_soup in quinella_tr_soups:
    quinella_td_soups = quinella_tr_soup.find_all("td")
    for idx_1st in range(6):
        no_1st = idx_1st + 1
        no_2nd = quinella_td_soups[idx_1st * 2].text.strip()
        odds = quinella_td_soups[idx_1st * 2 + 1].text.strip()

        if odds:
            data.append({
                "boat-no": set([int(no_1st), int(no_2nd)]),
                "odds": float(odds)
            })

data

[{'boat-no': {1, 2}, 'odds': 2.5},
 {'boat-no': {1, 3}, 'odds': 3.9},
 {'boat-no': {2, 3}, 'odds': 23.6},
 {'boat-no': {1, 4}, 'odds': 4.7},
 {'boat-no': {2, 4}, 'odds': 20.6},
 {'boat-no': {3, 4}, 'odds': 30.5},
 {'boat-no': {1, 5}, 'odds': 11.4},
 {'boat-no': {2, 5}, 'odds': 40.9},
 {'boat-no': {3, 5}, 'odds': 54.1},
 {'boat-no': {4, 5}, 'odds': 44.6},
 {'boat-no': {1, 6}, 'odds': 10.3},
 {'boat-no': {2, 6}, 'odds': 28.5},
 {'boat-no': {3, 6}, 'odds': 45.4},
 {'boat-no': {4, 6}, 'odds': 34.9},
 {'boat-no': {5, 6}, 'odds': 47.6}]

##### 読み込み処理実装

In [103]:
def get_exacta_odds(tbody_soup) -> list:
    RACER_SIZE = 6

    tr_soups = tbody_soup.find_all("tr")

    data = []
    for tr_soup in tr_soups:
        td_soups = tr_soup.find_all("td")
        for idx_1st in range(RACER_SIZE):
            no_1st = idx_1st + 1
            no_2nd = td_soups[idx_1st * 2].text.strip()
            odds = td_soups[idx_1st * 2 + 1].text.strip()
            data.append({
                "boat-no": [int(no_1st), int(no_2nd)],
                "odds": float(odds)
            })
    return data


def get_quinella_odds(tbody_soup) -> list:
    RACER_SIZE = 6

    tr_soups = tbody_soup.find_all("tr")

    data = []
    for tr_soup in tr_soups:
        td_soups = tr_soup.find_all("td")
        for idx_1st in range(RACER_SIZE):
            no_1st = idx_1st + 1
            no_2nd = td_soups[idx_1st * 2].text.strip()
            odds = td_soups[idx_1st * 2 + 1].text.strip()

            if odds:
                data.append({
                    "boat-no": set([int(no_1st), int(no_2nd)]),
                    "odds": float(odds)
                })
    return data

In [104]:
exacta_tbody_soup, quinella_tbody_soup = soup.find_all("tbody", class_="is-p3-0")

exacta_odds = get_exacta_odds(exacta_tbody_soup)
quinella_odds = get_quinella_odds(quinella_tbody_soup)

exacta_odds_df = pd.DataFrame(exacta_odds)
quinella_odds_df = pd.DataFrame(quinella_odds)

display(exacta_odds_df.head(5))
display(quinella_odds_df.head(5))

Unnamed: 0,boat-no,odds
0,"[1, 2]",2.9
1,"[2, 1]",9.2
2,"[3, 1]",24.1
3,"[4, 1]",30.4
4,"[5, 1]",58.9


Unnamed: 0,boat-no,odds
0,"{1, 2}",2.5
1,"{1, 3}",3.9
2,"{2, 3}",23.6
3,"{1, 4}",4.7
4,"{2, 4}",20.6


#### 拡連複

##### 確認用コード

In [105]:
SAMPLE_URL["odds"]["quinella-place"]

'https://www.boatrace.jp/owpc/pc/race/oddsk?rno=12&jcd=11&hd=20230115'

In [106]:
soup = get_beautiful_soup(SAMPLE_URL["odds"]["quinella-place"])

In [107]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

In [108]:
data = []

tr_soups = tbody_soup.find_all("tr")
for tr_soup in tr_soups:
    td_soups = tr_soup.find_all("td")
    for idx_1st in range(6):
        no_1st = idx_1st + 1
        no_2nd = td_soups[idx_1st * 2].text.strip()
        minmax_odds = td_soups[idx_1st * 2 + 1].text.strip().split("-")
        if minmax_odds != [""]:
            data.append({
                "boat-no": set([int(no_1st), int(no_2nd)]),
                "min-odds": float(minmax_odds[0]),
                "max-odds": float(minmax_odds[1]),
            })

data

[{'boat-no': {1, 2}, 'min-odds': 1.3, 'max-odds': 1.4},
 {'boat-no': {1, 3}, 'min-odds': 2.0, 'max-odds': 2.4},
 {'boat-no': {2, 3}, 'min-odds': 3.7, 'max-odds': 5.2},
 {'boat-no': {1, 4}, 'min-odds': 1.6, 'max-odds': 2.0},
 {'boat-no': {2, 4}, 'min-odds': 3.2, 'max-odds': 4.7},
 {'boat-no': {3, 4}, 'min-odds': 4.2, 'max-odds': 5.3},
 {'boat-no': {1, 5}, 'min-odds': 3.5, 'max-odds': 4.2},
 {'boat-no': {2, 5}, 'min-odds': 6.6, 'max-odds': 8.6},
 {'boat-no': {3, 5}, 'min-odds': 9.9, 'max-odds': 11.4},
 {'boat-no': {4, 5}, 'min-odds': 7.4, 'max-odds': 8.7},
 {'boat-no': {1, 6}, 'min-odds': 2.6, 'max-odds': 3.2},
 {'boat-no': {2, 6}, 'min-odds': 4.5, 'max-odds': 6.1},
 {'boat-no': {3, 6}, 'min-odds': 9.4, 'max-odds': 11.1},
 {'boat-no': {4, 6}, 'min-odds': 6.0, 'max-odds': 7.3},
 {'boat-no': {5, 6}, 'min-odds': 10.2, 'max-odds': 11.3}]

##### 読み込み処理実装

In [109]:
def get_quinella_place_odds(tbody_soup) -> list:
    RACER_SIZE = 6

    tr_soups = tbody_soup.find_all("tr")

    data = []
    for tr_soup in tr_soups:
        td_soups = tr_soup.find_all("td")
        for idx_1st in range(RACER_SIZE):
            no_1st = idx_1st + 1
            no_2nd = td_soups[idx_1st * 2].text.strip()
            minmax_odds = td_soups[idx_1st * 2 + 1].text.strip().split("-")
            if minmax_odds != [""]:
                data.append({
                    "boat-no": set([int(no_1st), int(no_2nd)]),
                    "min-odds": float(minmax_odds[0]),
                    "max-odds": float(minmax_odds[1]),
                })
    return data

In [110]:
tbody_soup = soup.find("tbody", class_="is-p3-0")

odds = get_quinella_place_odds(tbody_soup)
odds_df = pd.DataFrame(odds)

odds_df.head(5)

Unnamed: 0,boat-no,min-odds,max-odds
0,"{1, 2}",1.3,1.4
1,"{1, 3}",2.0,2.4
2,"{2, 3}",3.7,5.2
3,"{1, 4}",1.6,2.0
4,"{2, 4}",3.2,4.7


#### 単勝・複勝

##### 確認用コード

In [111]:
SAMPLE_URL["odds"]["win-and-place"]

'https://www.boatrace.jp/owpc/pc/race/oddstf?rno=12&jcd=11&hd=20230115'

In [112]:
soup = get_beautiful_soup(SAMPLE_URL["odds"]["win-and-place"])

In [113]:
win_table_soup, place_table_soup = soup.find_all("table", class_="is-w495")

win_tbody_soups = win_table_soup.find_all("tbody")
place_tbody_soups = place_table_soup.find_all("tbody")

In [114]:
data = []

for win_tbody_soup in win_tbody_soups:
    boat_no = win_tbody_soup.find("td", class_="is-fs14").text.strip()
    odds = win_tbody_soup.find("td", class_="oddsPoint").text.strip()

    data.append({
        "boat_no": int(boat_no),
        "odds": float(odds)
    })

data

[{'boat_no': 1, 'odds': 1.2},
 {'boat_no': 2, 'odds': 5.9},
 {'boat_no': 3, 'odds': 10.9},
 {'boat_no': 4, 'odds': 9.1},
 {'boat_no': 5, 'odds': 11.4},
 {'boat_no': 6, 'odds': 15.5}]

In [115]:
data = []

for place_tbody_soup in place_tbody_soups:
    boat_no = place_tbody_soup.find("td", class_="is-fs14").text.strip()
    minmax_odds = place_tbody_soup.find("td", class_="oddsPoint").text.strip().split("-")

    data.append({
        "boat_no": int(boat_no),
        "min_odds": float(minmax_odds[0]),
        "max_odds": float(minmax_odds[1])
    })

data

[{'boat_no': 1, 'min_odds': 1.0, 'max_odds': 1.1},
 {'boat_no': 2, 'min_odds': 1.2, 'max_odds': 1.9},
 {'boat_no': 3, 'min_odds': 2.7, 'max_odds': 4.6},
 {'boat_no': 4, 'min_odds': 2.9, 'max_odds': 4.9},
 {'boat_no': 5, 'min_odds': 3.4, 'max_odds': 5.7},
 {'boat_no': 6, 'min_odds': 2.8, 'max_odds': 4.6}]

##### 読み込み処理実装

In [116]:
def get_win_odds(table_soup) -> list:

    tbody_soups = table_soup.find_all("tbody")

    data = []
    for tbody_soup in tbody_soups:
        boat_no = tbody_soup.find("td", class_="is-fs14").text.strip()
        odds = tbody_soup.find("td", class_="oddsPoint").text.strip()

        data.append({
            "boat_no": int(boat_no),
            "odds": float(odds)
        })
    return data

def get_place_odds(table_soup) -> list:

    tbody_soups = table_soup.find_all("tbody")

    data = []
    for tbody_soup in tbody_soups:
        boat_no = tbody_soup.find("td", class_="is-fs14").text.strip()
        minmax_odds = tbody_soup.find("td", class_="oddsPoint").text.strip().split("-")

        data.append({
            "boat-no": int(boat_no),
            "min-odds": float(minmax_odds[0]),
            "max-odds": float(minmax_odds[1])
        })
    return data

In [117]:
win_table_soup, place_table_soup = soup.find_all("table", class_="is-w495")

win_odds = get_win_odds(win_table_soup)
place_odds = get_place_odds(place_table_soup)

win_odds_df = pd.DataFrame(win_odds)
place_odds_df = pd.DataFrame(place_odds)

display(win_odds_df)
display(place_odds_df)

Unnamed: 0,boat_no,odds
0,1,1.2
1,2,5.9
2,3,10.9
3,4,9.1
4,5,11.4
5,6,15.5


Unnamed: 0,boat-no,min-odds,max-odds
0,1,1.0,1.1
1,2,1.2,1.9
2,3,2.7,4.6
3,4,2.9,4.9
4,5,3.4,5.7
5,6,2.8,4.6


### 直線情報

In [118]:
SAMPLE_URL["beforeinfo"]

'https://www.boatrace.jp/owpc/pc/race/beforeinfo?rno=12&jcd=11&hd=20230115'

In [119]:
soup = get_beautiful_soup(SAMPLE_URL["beforeinfo"])

In [120]:
racer_soup = soup.find("table", class_="is-w748")
start_soup = soup.find("table", class_="is-w238")
weather_soup = soup.find("div", class_="weather1")

In [121]:
racer_tbody_soups = racer_soup.find_all("tbody")

racer_datas = []
for racer_tbody_soup in racer_tbody_soups:
    td_soups = racer_tbody_soup.find_all("td")

    frame = int(td_soups[0].text.strip())
    name = td_soups[2].text.strip().replace("　", "")
    weight = float(td_soups[3].text.strip().replace("kg", ""))
    ajust_weight = float(td_soups[12].text.strip())
    exhibit_time = float(td_soups[4].text.strip())
    tilt = float(td_soups[5].text.strip())

    before_result_r = td_soups[9].text.strip()
    before_result_entry = td_soups[11].text.strip()
    before_result_st = td_soups[14].text.strip()
    before_result_rank = td_soups[16].text.strip()

    before_result_r = before_result_r if not before_result_r else int(before_result_r)
    before_result_entry = before_result_entry if not before_result_entry else int(before_result_entry)
    before_result_st = before_result_st if not before_result_st else float(before_result_st)
    before_result_rank = before_result_rank if not before_result_rank else int(before_result_rank)

    racer_datas.append({
        "frame": frame,
        "name": name,
        "weight": weight,
        "ajust-weight": ajust_weight,
        "exhibit-time": exhibit_time,
        "tilt": tilt,
        "before-result-r":  before_result_r,
        "before-result-entry":  before_result_entry,
        "before-result-st":  before_result_st,
        "before-result-rank":  before_result_rank,
    })

pd.DataFrame(racer_datas)

Unnamed: 0,frame,name,weight,ajust-weight,exhibit-time,tilt,before-result-r,before-result-entry,before-result-st,before-result-rank
0,1,松井繁,52.0,0.0,6.69,0.0,,,,
1,2,丸野一樹,52.0,0.0,6.71,0.0,,,,
2,3,上條暢嵩,52.0,0.0,6.74,-0.5,,,,
3,4,桐生順平,52.0,0.0,6.82,-0.5,,,,
4,5,深谷知博,52.0,0.0,6.8,-0.5,,,,
5,6,池田浩二,52.0,0.0,6.74,-0.5,,,,


In [122]:
start_tr_soups = start_soup.find("tbody").find_all("tr")

start_datas = []
for start_tr_soup in start_tr_soups:
    frame = int(start_tr_soup.find("span", class_="table1_boatImage1Number").text.strip())
    st = start_tr_soup.find("span", class_="table1_boatImage1Time").text.strip()

    st = float(st.replace("F", "-")) if "F" in st else float(st)

    start_datas.append({
        "frame": frame,
        "st": st
    })

pd.DataFrame(start_datas)

Unnamed: 0,frame,st
0,1,0.01
1,2,-0.02
2,3,-0.01
3,4,0.12
4,5,0.1
5,6,0.05


In [123]:
# 結果の「水面気象情報」と同じ処理で取得できる
weather_unit_soups = weather_soup.find_all("div", class_="weather1_bodyUnit")

temperature = float(weather_unit_soups[0].find("span", class_="weather1_bodyUnitLabelData").text.strip().replace("℃", ""))
weather = weather_unit_soups[1].find("span", class_="weather1_bodyUnitLabelTitle").text.strip()
wind_velocity = float(weather_unit_soups[2].find("span", class_="weather1_bodyUnitLabelData").text.strip().replace("m", ""))
wind_direction = weather_unit_soups[3].find("p").attrs["class"][-1].replace("is-", "")
water_temperature = float(weather_unit_soups[4].find("span", class_="weather1_bodyUnitLabelData").text.strip().replace("℃", ""))
wave_height = float(weather_unit_soups[5].find("span", class_="weather1_bodyUnitLabelData").text.strip().replace("cm", ""))

weather_data = {
    "temperature": temperature,
    "weather": weather,
    "wind-velocity": wind_velocity,
    "wind-direction": wind_direction,
    "water-temperature": water_temperature,
    "wave-height": wave_height
}

pd.DataFrame([weather_data])

Unnamed: 0,temperature,weather,wind-velocity,wind-direction,water-temperature,wave-height
0,9.0,曇り,0.0,wind17,7.0,1.0


### 結果

In [124]:
SAMPLE_URL["raceresult"]

'https://www.boatrace.jp/owpc/pc/race/raceresult?rno=12&jcd=11&hd=20230115'

In [125]:
soup = get_beautiful_soup(SAMPLE_URL["raceresult"])

In [126]:
time_soup, start_soup, return_soup, _ = soup.find_all("table", class_="is-w495")
weather_soup = soup.find("div", class_="weather1")
factor_soup = soup.find("table", class_="is-h108__3rdadd")

In [127]:
time_tbody_soups = time_soup.find_all("tbody")

In [128]:
time_tbody_soups[0]

<tbody>
<tr>
<td class="is-fs14">１</td>
<td class="is-fs14 is-fBold is-boatColor1">1</td>
<td class="is-p10-0">
<span class="is-fs12">3415</span>　
                                    <span class="is-fs18 is-fBold is-lh24__3rdadd">松井　　　繁</span>
</td>
<td>1'48"0</td>
</tr>
</tbody>

In [129]:
time_data = []

for time_tbody_soup in time_tbody_soups:
    td_soups = time_tbody_soup.find_all("td")

    arrival = int(td_soups[0].text.strip())
    frame = int(td_soups[1].text.strip())

    racer_text = re.sub(r"[ \t\u3000]", "", td_soups[2].text.strip())
    racer_id, racer_name = racer_text.split()
    racetime = td_soups[3].text.strip()

    time_data.append({
        "arrival": arrival,
        "frame": frame,
        "racer-id": racer_id,
        "racer-name": racer_name,
        "racetime": racetime
    })

pd.DataFrame(time_data)

Unnamed: 0,arrival,frame,racer-id,racer-name,racetime
0,1,1,3415,松井繁,"1'48""0"
1,2,2,4686,丸野一樹,"1'49""3"
2,3,6,3941,池田浩二,"1'50""7"
3,4,5,4524,深谷知博,"1'52""4"
4,5,4,4444,桐生順平,
5,6,3,4719,上條暢嵩,


In [130]:
start_td_soups = start_soup.find_all("td")

In [131]:
start_data = []

for idx, start_td_soup in enumerate(start_td_soups):
    frame_class_name = "table1_boatImage1Number"
    timing_class_name = "table1_boatImage1TimeInner"

    frame_soup = start_td_soup.find("span", class_=frame_class_name)
    timing_soup = start_td_soup.find("span", class_=timing_class_name)

    frame = int(frame_soup.text.strip())
    start_timing = float(re.sub(r"[^.0-9]+", "", timing_soup.text.strip()))

    start_data.append({
        "frame": frame,
        "start-timing": start_timing,
    })

pd.DataFrame(start_data)

Unnamed: 0,frame,start-timing
0,1,0.11
1,2,0.17
2,3,0.16
3,4,0.16
4,5,0.13
5,6,0.16


In [132]:
return_tbody_soups = return_soup.find_all("tbody")

In [133]:
return_data = []

for return_tbody_soup in return_tbody_soups:
    td_soups = return_tbody_soup.find_all("td")

    bet_number = "".join([item.text.strip() for item in td_soups[1].find_all("span")])
    return_money = int(re.sub(r"[¥,]", "", td_soups[2].text.strip()))
    popularity = td_soups[3].text.strip()

    popularity = popularity if not popularity else int(popularity)

    if bet_number:
        return_data.append({
            "bet-number": bet_number,
            "return-money": return_money,
            "popularity": popularity
        })

pd.DataFrame(return_data)

Unnamed: 0,bet-number,return-money,popularity
0,1-2-6,1280,4.0
1,1=2=6,700,4.0
2,1-2,290,1.0
3,1=2,250,1.0
4,1=2,140,1.0
5,1,120,
6,1,100,


In [134]:
weather_unit_soups = weather_soup.find_all("div", class_="weather1_bodyUnit")

In [135]:
temperature = float(weather_unit_soups[0].find("span", class_="weather1_bodyUnitLabelData").text.strip().replace("℃", ""))
weather = weather_unit_soups[1].find("span", class_="weather1_bodyUnitLabelTitle").text.strip()
wind_velocity = float(weather_unit_soups[2].find("span", class_="weather1_bodyUnitLabelData").text.strip().replace("m", ""))
wind_direction = weather_unit_soups[3].find("p").attrs["class"][-1].replace("is-", "")
water_temperature = float(weather_unit_soups[4].find("span", class_="weather1_bodyUnitLabelData").text.strip().replace("℃", ""))
wave_height = float(weather_unit_soups[5].find("span", class_="weather1_bodyUnitLabelData").text.strip().replace("cm", ""))

weather_data = {
    "temperature": temperature,
    "weather": weather,
    "wind-velocity": wind_velocity,
    "wind-direction": wind_direction,
    "water-temperature": water_temperature,
    "wave-height": wave_height
}

pd.DataFrame([weather_data])

Unnamed: 0,temperature,weather,wind-velocity,wind-direction,water-temperature,wave-height
0,9.0,曇り,0.0,wind17,7.0,1.0


In [136]:
factor_soup.find("td", class_="is-fs16").text.strip()

'逃げ'

### ピットレポート

In [137]:
SAMPLE_URL["pitreport"]

'https://www.boatrace.jp/owpc/pc/race/pitreport?rno=12&jcd=11&hd=20230115'

In [138]:
soup = get_beautiful_soup(SAMPLE_URL["pitreport"])

In [139]:
table_soups = soup.find_all("div", class_="table1")
tbody_soups = table_soups[-1].find_all("tbody")

In [140]:
data = []

for tbody_soup in tbody_soups:
    td_soups = tbody_soup.find_all("td")

    frame = td_soups[0].text.strip()

    racer_text = re.sub(r"[ \t\u3000]", "", td_soups[2].text.strip())
    racer_data = [info for info in re.split(r"[\n/]", racer_text) if info]

    racer_id = int(racer_data[0])
    racer_class = racer_data[1]
    racer_name = racer_data[2]
    racer_branch = racer_data[3]
    racer_birthplace = racer_data[4]
    racer_age = int(racer_data[5].replace("歳", ""))
    racer_weight = float(racer_data[6].replace("kg", ""))

    pitreport = td_soups[3].text

    data.append({
        "frame": frame,
        "racer-id": racer_id,
        "racer-class": racer_class,
        "racer-name": racer_name,
        "racer-branch": racer_branch,
        "racer-birthplace": racer_birthplace,
        "racer-age": racer_age,
        "racer-weight": racer_weight,
        "pitreport": pitreport,
    })

#data

## レースID取得

朝、昼、夜に関しては、時刻で判断する。テーブルから取得はしない。

#### 確認用コード

In [141]:
# hcd=01はSG・PG1スケジュール
URL = "https://www.boatrace.jp/owpc/pc/race/gradesch?year={}&hcd=01"

In [142]:
YEAR = 2024

RACE_RANGE = [1, 12]

In [143]:
soup = get_beautiful_soup(URL)

In [144]:
#table_soup = soup.find("table")
#tr_soups = table_soup.find_all("tr")

tbody_soups = soup.find_all("tbody")
tr_soups = []
for tbody_soup in tbody_soups:
    tr_soups += tbody_soup.find_all("tr")

In [145]:
data = []

for tr_soup in tr_soups:
    td_soups = tr_soup.find_all("td")

    offset_idx = 0 if len(td_soups) >= 8 else -1
    begin_date, end_date = td_soups[1 + offset_idx].text.strip().split("-")

    place_img_path = td_soups[2 + offset_idx].find("img").get("src")

    place_img_name, _ = os.path.splitext(os.path.basename(place_img_path))
    place_no = place_img_name.replace("text_place1_", "")

    grade = td_soups[3 + offset_idx].attrs["class"][-1].replace("is-", "")

    timezone = "" if ("class" not in td_soups[4 + offset_idx].attrs) else td_soups[4 + offset_idx].attrs["class"][0].replace("is-", "")

    title = td_soups[5 + offset_idx].text.strip()

    is_held = td_soups[6 + offset_idx].find("a").text.strip() != ""

    curr_datetime = datetime.strptime(f"{YEAR}/{begin_date}", "%Y/%m/%d")
    end_datetime = datetime.strptime(f"{YEAR}/{end_date}", "%Y/%m/%d")

    while curr_datetime <= end_datetime:
        for rno in range(RACE_RANGE[0], RACE_RANGE[1] + 1):
            data.append({
                "rno": int(rno),
                "jcd": int(place_no),
                "hd": curr_datetime.strftime("%Y%m%d"),
                "grade": grade,
                "timezone": timezone,
                "title": title,
                "is_held": is_held
            })
        curr_datetime += timedelta(days=1)

#data

In [146]:
pd.DataFrame(data)

Unnamed: 0,rno,jcd,hd,grade,timezone,title,is_held
0,1,24,20240111,G1a,nighter,BBCトーナメント,True
1,2,24,20240111,G1a,nighter,BBCトーナメント,True
2,3,24,20240111,G1a,nighter,BBCトーナメント,True
3,4,24,20240111,G1a,nighter,BBCトーナメント,True
4,5,24,20240111,G1a,nighter,BBCトーナメント,True
...,...,...,...,...,...,...,...
979,8,7,20241231,G1a,nighter,クイーンズクライマックス,False
980,9,7,20241231,G1a,nighter,クイーンズクライマックス,False
981,10,7,20241231,G1a,nighter,クイーンズクライマックス,False
982,11,7,20241231,G1a,nighter,クイーンズクライマックス,False


In [147]:
pd.DataFrame(data).shape

(984, 7)

#### 読み込み処理実装

In [148]:
def get_race_id(year, hcd="01"):
    TABLE_COL_SIZE = 8

    TABLE_COL_IDX_DATE = 1
    TABLE_COL_IDX_PLACE_IMG = 2
    TABLE_COL_IDX_GRADE = 3
    TABLE_COL_IDX_TIMEZONE = 4
    TABLE_COL_IDX_TITLE = 5
    TABLE_COL_IDX_WINNER = 6

    RACE_NO_RANGE = [1, 12]

    URL_FORMAT = "https://www.boatrace.jp/owpc/pc/race/gradesch?year={}&hcd={}"

    url = URL_FORMAT.format(year, hcd)
    soup = get_beautiful_soup(url)

    tbody_soups = soup.find_all("tbody")
    tr_soups = []
    for tbody_soup in tbody_soups:
        tr_soups += tbody_soup.find_all("tr")

    data = []
    for tr_soup in tr_soups:
        td_soups = tr_soup.find_all("td")

        offset_idx = 0 if len(td_soups) >= TABLE_COL_SIZE else -1
        begin_date, end_date = td_soups[TABLE_COL_IDX_DATE + offset_idx].text.strip().split("-")

        place_img_path = td_soups[TABLE_COL_IDX_PLACE_IMG + offset_idx].find("img").get("src")

        place_img_name, _ = os.path.splitext(os.path.basename(place_img_path))
        place_no = place_img_name.replace("text_place1_", "")

        grade = td_soups[TABLE_COL_IDX_GRADE + offset_idx].attrs["class"][-1].replace("is-", "")

        timezone = "" if ("class" not in td_soups[TABLE_COL_IDX_TIMEZONE + offset_idx].attrs) else td_soups[4 + offset_idx].attrs["class"][0].replace("is-", "")

        title = td_soups[TABLE_COL_IDX_TITLE + offset_idx].text.strip()

        is_held = td_soups[TABLE_COL_IDX_WINNER + offset_idx].find("a").text.strip() != ""

        curr_datetime = datetime.strptime(f"{year}/{begin_date}", "%Y/%m/%d")
        end_datetime = datetime.strptime(f"{year}/{end_date}", "%Y/%m/%d")

        while curr_datetime <= end_datetime:
            for rno in range(RACE_NO_RANGE[0], RACE_NO_RANGE[1] + 1):
                data.append({
                    "rno": int(rno),
                    "jcd": int(place_no),
                    "hd": curr_datetime.strftime("%Y%m%d"),
                    "grade": grade,
                    "timezone": timezone,
                    "title": title,
                    "is_held": is_held
                })
            curr_datetime += timedelta(days=1)

    return data

In [149]:
data = get_race_id(2024)

In [150]:
pd.DataFrame(data)

Unnamed: 0,rno,jcd,hd,grade,timezone,title,is_held
0,1,24,20240111,G1a,nighter,BBCトーナメント,True
1,2,24,20240111,G1a,nighter,BBCトーナメント,True
2,3,24,20240111,G1a,nighter,BBCトーナメント,True
3,4,24,20240111,G1a,nighter,BBCトーナメント,True
4,5,24,20240111,G1a,nighter,BBCトーナメント,True
...,...,...,...,...,...,...,...
979,8,7,20241231,G1a,nighter,クイーンズクライマックス,False
980,9,7,20241231,G1a,nighter,クイーンズクライマックス,False
981,10,7,20241231,G1a,nighter,クイーンズクライマックス,False
982,11,7,20241231,G1a,nighter,クイーンズクライマックス,False


- ron: レース番号
- jcd: 場コード
- hd: 開催日

## TODO
