In [1]:
#
import re
from datetime import datetime
import pandas as pd
from bs4 import BeautifulSoup
import requests
from IPython.display import HTML

DEBUG = False

# Jリーgのカテゴリ {1～3}
CATEGORY = 1

# draw_boxのheight (win_boxのheightの1/3) に合わせること
BOX_HEIGHT = 25

# 各チームの勝ち点の表で、古い日程を下にしたい時はTrue
OLD_BOTTOM = True

# チーム列の並べ方 ('point': 最新の勝ち点順, 'avlbl_pt': 現時点の最大勝ち点=残り全部勝ったとしての勝ち点)
#TEAM_SORT_KEY = 'avlbl_pt'
TEAM_SORT_KEY = 'point'

col_names = ['match_date', 'section_no', 'match_index_in_section', 'start_time', 'stadium',
             'home_team', 'home_goal', 'away_goal', 'away_team']

def read_match_data(soup: BeautifulSoup):
    """Jリーグの各節の試合情報リストから内容を読み込んで返す
    """
    result_list = []

    match_sections = soup.find_all('section', class_='matchlistWrap')
    i = 1
    for _section in match_sections:
        match_div = _section.find('div', class_='timeStamp')
        if match_div:
            match_date = match_div.find('h4').text.strip()
            match_date = datetime.strptime(match_date[:match_date.index('(')], '%Y年%m月%d日')
        else:
            match_date = None
        section_no = _section.find('div', class_='leagAccTit').find('h5').text.strip()
        section_no = re.search('第(.+)節', section_no)[1]
        #print((match_date, section_no))
        for _tr in _section.find_all('tr'):
            stadium_td = _tr.find('td', class_='stadium')
            if not stadium_td:
                continue
            start_time = re.search('([^\>]+)\<br', str(stadium_td))[1]
            stadium = re.search('([^\>]+)\<\/a', str(stadium_td))[1]
            home_team = _tr.find('td', class_='clubName rightside').text.strip()
            home_goal = _tr.find('td', class_='point rightside').text.strip()
            away_team = _tr.find('td', class_='clubName leftside').text.strip()
            away_goal = _tr.find('td', class_='point leftside').text.strip()
            _str_match_date = (match_date.strftime("%Y/%m/%d") if match_date else '未定')
            if DEBUG:
                print(f'{_str_match_date} {section_no}節 {i} {start_time} [{stadium}]' + \
                      f' {home_team} ({home_goal}) - ({away_goal}) {away_team}')
            # 追加内容は、col_names に合わせること (追加する時は、必ず同時に更新)
            result_list.append((match_date, section_no, i, start_time, stadium,
                                home_team, home_goal, away_goal, away_team))
            i += 1
    return result_list

In [3]:
match_counts = {1: 39, 2: 43, 3: 30}
all_matches = pd.DataFrame()
for _i in range(1, match_counts[CATEGORY]):
    soup = BeautifulSoup(requests.get(f'https://www.jleague.jp/match/section/j{CATEGORY}/{_i}/').text, 'lxml')
    result_list = read_match_data(soup)
    all_matches = pd.concat([all_matches, pd.DataFrame(result_list, columns=col_names)])

all_matches.to_csv(f'match_result-J{CATEGORY}-{datetime.now().strftime("%Y%m%d")}.csv')

all_matches

Unnamed: 0,match_date,section_no,match_index_in_section,start_time,stadium,home_team,home_goal,away_goal,away_team
0,2021-02-26,1,1,18:04,等々力,横浜FM,0,2,川崎Ｆ
1,2021-02-27,1,2,14:00,札幌ド,横浜FC,1,5,札幌
2,2021-02-27,1,3,14:00,埼玉,FC東京,1,1,浦和
3,2021-02-27,1,4,14:03,Ｅスタ,仙台,1,1,広島
4,2021-02-27,1,5,14:03,昭和電ド,徳島,1,1,大分
...,...,...,...,...,...,...,...,...,...
5,2021-12-04,38,6,未定,アイスタ,Ｃ大阪,,,清水
6,2021-12-04,38,7,未定,豊田ス,浦和,,,名古屋
7,2021-12-04,38,8,未定,パナスタ,湘南,,,Ｇ大阪
8,2021-12-04,38,9,未定,鳴門大塚,広島,,,徳島


In [None]:
def get_point_from_match(_row: pd.Series):
    """勝点を計算
    """
    if not _row['has_result']:
        return 0
    # str型で入ってるはずなんだけど、比べられるからそのまま
    # まじめにやるなら int()を使う?
    if _row['goal_get'] > _row['goal_lose']:
        return 3
    if _row['goal_get'] < _row['goal_lose']:
        return 0
    return 1


def has_match_result(_row: pd.Series):
    """試合結果 (途中経過) があるか否かを返す
    """
    if not _row['goal_get']:
        return False
    if not _row['goal_lose']:
        return False
    return True

def make_team_df(all_matches: pd.DataFrame, target_team: str):
    """対象チームを抽出して相手チームと勝ち負けの形に整形
    """
    _df = all_matches[(all_matches['home_team'] == target_team) | (all_matches['away_team'] == target_team)]
    _df = _df.sort_values('match_date')
    _df['is_home'] = _df.apply(lambda x: True if x['home_team'] == target_team else False, axis=1)
    _df['opponent'] = _df.apply(lambda x: x['away_team'] if x['is_home'] else x['home_team'], axis=1)
    _df['goal_get'] = _df.apply(lambda x: x['home_goal'] if x['is_home'] else x['away_goal'], axis=1)
    _df['goal_lose'] = _df.apply(lambda x: x['away_goal'] if x['is_home'] else x['home_goal'], axis=1)
    _df = _df.drop(columns=['home_team', 'away_team', 'home_goal', 'away_goal', 'match_index_in_section'])
    _df['has_result'] = _df.apply(has_match_result, axis=1)
    _df['point'] = _df.apply(get_point_from_match, axis=1)
    return _df


def get_available_point(_df: pd.DataFrame):
    """該当チームの最大勝ち点を求める
    """
    return _df['point'].sum() + 3 * len(_df[_df['has_result'] == False])


def make_html_column(_df: pd.DataFrame, target_team: str, max_point: int):
    """抽出したチームごとのDataFrameを使って、HTMLでチームの勝ち点積み上げ表を作る
    """
    team_tr = f'<div class="draw_box {target_team}">{target_team}</div>'
    rows_list = []
    for (_i, _row) in _df.iterrows():
        if not _row['has_result']:
            rowspan = 3
            div_class = ''
        else:
            rowspan = _row['point']
            div_class = target_team
        if rowspan == 0:
            continue

        if type(_row['match_date']) is str:
            match_date = _row['match_date']
        else:
            if pd.isnull(_row['match_date']):
                match_date = '未定 '
            else:
                match_date = _row['match_date'].strftime('%m/%d')

        if rowspan == 3:
            row_html = \
                f'<div class="win_box {div_class}">' + \
                match_date + _row['opponent'] + '</br>' + \
                _row['goal_get'] + '-' + _row['goal_lose'] + '</br>' + \
                _row['stadium'] + '</br>' + \
                '</div>'
        else:
            row_html = \
                f'<div class="draw_box {div_class}">' + \
                match_date + _row['opponent'] + \
                '</div>'
                # _row['goal_get'] + '-' + _row['goal_lose'] + '</br>' + \
        rows_list.append(row_html)

    space_cols = max_point - get_available_point(_df)
    #print(space_cols)
    if space_cols:
        rows_list.append(f'<div class="space_box" style="height:{BOX_HEIGHT * space_cols}px">&nbsp;</div>')

    if OLD_BOTTOM:
        rows_list.reverse()
    return team_tr + ''.join(rows_list) + team_tr + '\n'


def get_point_column(max_point: int):
    """
    """
    td_list = []
    for _i in reversed(range(1, max_point + 1)):
        td_list.append(f'<div class="point_box">{_i}</div>')
    return '<div><div class="point_box">勝点</div>' + ''.join(td_list) + '<div class="point_box">勝点</div></div>'

In [None]:
team_map = {}
max_point = 0
team_list = all_matches['home_team'].value_counts().keys()
for target_team in team_list:
    _df = make_team_df(all_matches, target_team)
    cur_point = _df['point'].sum()
    available_point = get_available_point(_df)
    team_map[target_team] = {'df': _df, 'point': cur_point, 'avlbl_pt': available_point}
    if available_point > max_point:
        max_point = available_point
    # pirnt(_df)

for target_team in team_list:
    target_html = make_html_column(team_map[target_team]['df'], target_team, max_point)
    team_map[target_team]['html'] = target_html

insert_point_columns = [4, 10, 16]
point_column = '<div>' + get_point_column(max_point) + '</div>'
html_result = [f'<html><head><link rel="stylesheet" type="text/css" href=j_points.css></head><body><div class="boxContainer">']
html_result.append(point_column)
index = 0
for (target_team, point) in sorted(team_map.items(), key=lambda x:x[1][TEAM_SORT_KEY], reverse=True):
    html_result.append('<div>' + team_map[target_team]['html'] + '</div>')
    index += 1
    if index in insert_point_columns:
        html_result.append(point_column)
html_result.append(point_column)

html_result.append('</div></body></html>')

with open('j_points.html', mode='w') as _fp:
    _fp.write(''.join(html_result))

In [None]:
HTML(''.join(html_result))