In [1]:
# -*- coding: utf-8 -*-
import urllib
import requests
import datetime
import subprocess
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

In [2]:
giants_url = "https://baseballdata.jp/1/ctop.html"
swallows_url = "https://baseballdata.jp/2/ctop.html"
dena_url = "https://baseballdata.jp/3/ctop.html"
dragons_url = "https://baseballdata.jp/4/ctop.html"
tigers_url = "https://baseballdata.jp/5/ctop.html"
carp_url = "https://baseballdata.jp/6/ctop.html"

lions_url = "https://baseballdata.jp/7/ctop.html"
fighters_url = "https://baseballdata.jp/8/ctop.html"
lotte_url = "https://baseballdata.jp/9/ctop.html"
orix_url = "https://baseballdata.jp/11/ctop.html"
hawks_url = "https://baseballdata.jp/12/ctop.html"
eagles_url = "https://baseballdata.jp/376/ctop.html"

In [3]:
# dateで受け取った日に開催された各試合の出場成績のリンクをリスト形式で返す
def get_top_links(date):
    params = { 'date': date }
    schedule_page = requests.get('https://baseball.yahoo.co.jp/npb/schedule', params=params)
    soup_schedule = BeautifulSoup(schedule_page.text, 'html.parser')
    game_link_elms = soup_schedule.find_all('a', class_='bb-score__content')
    game_links = list(map(lambda x: x['href'].replace('index', 'top'), game_link_elms))
    return game_links

In [4]:
def data_generate(URL, debug=False):
    html = urllib.request.urlopen(URL)

    # htmlをBeautifulSoupで扱う
    soup = BeautifulSoup(html, "html.parser")
    
    tmp_data= []
    web_data = soup.tbody.findAll("tr")
    for i in range(len(web_data)):
        row_tmp = web_data[i].getText().replace("\r", "").replace(" ", "").split("\n")
        row = [a for a in row_tmp if a != '']
        if row not in tmp_data:
            if "○" in row: #一軍にいる選手
                row.pop(2) #一軍のoマークを削除
                tmp_data.append(row) 
            elif "選手名" in row:
                title = row
            else:
                tmp_data.append(row) #一軍にいないとされている選手
    
    title.remove("調子")
    title.remove("一軍")

    df = pd.DataFrame(tmp_data)    
    df.columns = title
    team = df.iloc[0]["球団"]
    
    if debug:
        display(df.head())
    
    # データの型をobject型からint、またはfloat型に変換する
    for i in ["打点", "本塁打", "安打数", "単打", "2塁打", "3塁打", "得点圏打数", "得点圏安打", 
              "UC本塁打", "試合数", "打席数", "打数", "得点","四球", "死球", "企盗塁", "盗塁", 
              "企犠打","犠打", "犠飛", "代打数", "代打安打", "併殺", "失策", "三振"]:
        df[i] = df[i].astype("int")

    for i in ["打率", "出塁率", "長打率", "最近5試合", "OPS", "得点圏打率", "UC率"]:
        df[i] = df[i].astype("float")

    df["盗塁成功率"] = df["盗塁"] / df["企盗塁"]
    df["犠打成功率"] = df["犠打"] / df["企犠打"]
    df["代打率"] = df["代打安打"] / df["代打数"]
    
    # 欠損値は0で補完
    df["盗塁成功率"].fillna(0, inplace=True)
    df["犠打成功率"].fillna(0, inplace=True)

    #　選手名を修正
    df["選手名"] = df["選手名"].apply(lambda x: x.split(":")[1].split(".")[0].split(team)[0])
    
    # 各選手について 凡退,単打率 二塁打率 三塁打率 本塁打率 四死球率 盗塁成功率 犠打成功率　併殺打率を計算する
    denominator = (df["打席数"] - df["犠打"] - df["犠飛"])
    df["単打率"] = df["単打"] / denominator
    df["二塁打率"] = df["2塁打"] / denominator
    df["三塁打率"] = df["3塁打"] / denominator
    df["本塁打率"] = df["本塁打"] / denominator
    df["四死球率"] = (df["四球"] + df["死球"]) / denominator
    df["併殺打率"] = df["併殺"] / denominator
    df["凡退率"] =  (denominator - df["単打"] - df["2塁打"] - df["3塁打"] - df["本塁打"] - df["四球"] - df["死球"]) / denominator
    df["併殺打率"] = df["併殺"] / denominator

    calc_df = df[["選手名", "凡退率","単打率","二塁打率","三塁打率","本塁打率","四死球率","盗塁成功率","犠打成功率", "併殺打率"]]
    return calc_df, team

In [5]:
# https://stackoverflow.com/questions/31247198/python-pandas-write-content-of-dataframe-into-text-file
def make_df(url, players, opt):
    df, team = data_generate(url)

    players = {i:ind for ind, i in enumerate(players)}
    inv_players = {ind:i for ind, i in enumerate(players)}
    
    df['order'] = df['選手名'].map(players)
    
    #今季一軍初出場選手の調整
    missing_no = list(set(range(9)) - set(df.order.unique()))
    missing_players = [inv_players[i] for i in missing_no] 
    print(missing_players)
    for i in range(len(missing_players)):
        tmp = [1.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.0, 0.000000, 0.000000]
        tmp = pd.DataFrame([missing_players[i]] + tmp + [missing_no[i]]).T
        tmp.columns = df.columns
        df = pd.concat([df, tmp], axis=0)
    
    df = df[~df.order.isnull()].sort_values("order", ascending=True).drop('order', axis=1).reset_index(drop=True)
    
    # 値が欠損の選手の調整
    df["凡退率"].fillna(1, inplace = True)
    df.fillna(0, inplace = True)
    
    display(df.head(9))
    
    assert df.shape[0] == 9
    assert np.allclose(df.iloc[:,1:7].sum(axis=1), 1)
    
    df.iloc[:,1:].to_csv(r'data/'+str(opt)+'.txt', header=None, index=None, sep=' ', mode='w')

In [6]:
d_today = datetime.date(2023,4,1)
game_links = get_top_links(d_today)
team_dic = {
    '読売ジャイアンツ':  giants_url ,  
    '東京ヤクルトスワローズ': swallows_url, 
    '横浜DeNAベイスターズ': dena_url, 
    '中日ドラゴンズ': dragons_url, 
    '阪神タイガース': tigers_url, 
    '広島東洋カープ': carp_url,
                           
    '埼玉西武ライオンズ': lions_url, 
    '北海道日本ハムファイターズ': fighters_url, 
    '千葉ロッテマリーンズ': lotte_url, 
    'オリックス・バファローズ': orix_url, 
    '福岡ソフトバンクホークス': hawks_url, 
    '東北楽天ゴールデンイーグルス': eagles_url
}

for id_, game_link in enumerate(game_links):
    html = urllib.request.urlopen(game_link)
    
    soup = BeautifulSoup(html, "html.parser")

    # 対戦チーム名
    home = soup.find('title').text.split(" ")[1].split('vs.')[0]
    visitor = soup.find('title').text.split(" ")[1].split('vs.')[1]
    
    # スタメンを抽出
    data = soup.find_all('td', class_='bb-splitsTable__data bb-splitsTable__data--text')
    data = [i.text.split('\n')[1] for i in data][:20]
    home_member = [i.replace(' ', '') for i in data[1:10]]
    visitor_member = [i.replace(' ', '') for i in data[11:20]]
    
    # データ生成
    print("先攻: " +visitor+ ", 後攻: " +home)
    make_df(team_dic[visitor], visitor_member, "top")
    make_df(team_dic[home], home_member, "bottom")
    
    # 勝率計算
    cmd = "./calc"
    c = subprocess.check_output(cmd).decode()
    print(c)

先攻: 中日ドラゴンズ, 後攻: 読売ジャイアンツ
['涌井秀章', 'カリステ']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,岡林勇希,0.6,0.2,0.0,0.2,0.0,0.0,1.0,0.0,0.0
1,大島洋平,0.0,0.4,0.2,0.0,0.0,0.4,0.0,0.0,0.0
2,高橋周平,0.75,0.0,0.25,0.0,0.0,0.0,0.0,1.0,0.0
3,アキーノ,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ビシエド,0.8,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,カリステ,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,木下拓哉,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,福永裕基,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,涌井秀章,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


['グリフィン']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,梶谷隆幸,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,吉川尚輝,0.25,0.25,0.0,0.0,0.0,0.5,0.0,0.0,0.25
2,丸佳浩,0.75,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
3,岡本和真,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,中田翔,0.25,0.0,0.0,0.25,0.25,0.25,0.0,0.0,0.0
5,坂本勇人,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,大城卓三,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,ブリンソン,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,グリフィン,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


試合開始の先攻の勝率: 0.128620
試合開始の後攻の勝率: 0.761156

先攻: 広島東洋カープ, 後攻: 東京ヤクルトスワローズ
['床田寛樹']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,菊池涼介,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,野間峻祥,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,秋山翔吾,0.333333,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,マクブルーム,0.666667,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.333333
4,西川龍馬,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,デビッドソン,0.666667,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0
6,坂倉将吾,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333
7,小園海斗,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,床田寛樹,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


['ピーターズ']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,濱田太貴,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,青木宣親,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,山田哲人,0.5,0.5,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,村上宗隆,0.75,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0
4,オスナ,0.25,0.25,0.25,0.0,0.25,0.0,0.0,0.0,0.0
5,中村悠平,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
6,内山壮真,0.75,0.25,0.0,0.0,0.0,0.0,1.0,0.0,0.0
7,長岡秀樹,0.333333,0.0,0.0,0.0,0.0,0.666667,1.0,0.0,0.0
8,ピーターズ,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


試合開始の先攻の勝率: 0.000000
試合開始の後攻の勝率: 0.975301

先攻: 横浜DeNAベイスターズ, 後攻: 阪神タイガース
['ガゼルマン', '伊藤光']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,佐野恵太,0.6,0.0,0.2,0.0,0.0,0.2,0.0,0.0,0.0
1,宮﨑敏郎,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,楠本泰史,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,牧秀悟,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ソト,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,桑原将志,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,伊藤光,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,森敬斗,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,ガゼルマン,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


['秋山拓巳']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,近本光司,0.666667,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0
1,中野拓夢,0.666667,0.0,0.0,0.0,0.0,0.333333,1.0,1.0,0.0
2,ノイジー,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,大山悠輔,0.666667,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,佐藤輝明,0.5,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0
5,森下翔太,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,梅野隆太郎,0.5,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0
7,小幡竜平,0.25,0.75,0.0,0.0,0.0,0.0,1.0,0.0,0.0
8,秋山拓巳,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


試合開始の先攻の勝率: 0.004525
試合開始の後攻の勝率: 0.955562

先攻: 東北楽天ゴールデンイーグルス, 後攻: 北海道日本ハムファイターズ
['炭谷銀仁朗', '黒川史陽']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,辰己涼介,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25
1,小深田大翔,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,フランコ,0.25,0.25,0.25,0.0,0.25,0.0,0.0,0.0,0.25
3,浅村栄斗,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,島内宏明,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,阿部寿樹,0.75,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
6,山﨑剛,0.666667,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,黒川史陽,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,炭谷銀仁朗,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


['矢澤宏太', '清水優心']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,矢澤宏太,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,石井一成,0.75,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
2,松本剛,0.75,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
3,野村佑希,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,清宮幸太郎,0.75,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
5,マルティネス,0.5,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0
6,上川畑大悟,0.75,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
7,清水優心,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,五十幡亮汰,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


試合開始の先攻の勝率: 0.257393
試合開始の後攻の勝率: 0.301671

先攻: オリックス・バファローズ, 後攻: 埼玉西武ライオンズ
['若月健矢']


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,野口智哉,0.8,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,福田周平,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,中川圭太,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,杉本裕太郎,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,森友哉,0.75,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0
5,宗佑磨,0.5,0.25,0.0,0.0,0.25,0.0,0.0,0.0,0.25
6,ゴンザレス,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,若月健矢,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,茶野篤政,0.666667,0.333333,0.0,0.0,0.0,0.0,1.0,1.0,0.0


[]


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,ペイトン,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,マキノン,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,外崎修汰,0.0,0.6,0.0,0.0,0.0,0.4,0.0,0.0,0.0
3,山川穂高,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,中村剛也,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,愛斗,0.75,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0
6,柘植世那,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,山村崇嘉,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,金子侑司,0.5,0.25,0.0,0.0,0.0,0.25,1.0,0.0,0.0


試合開始の先攻の勝率: 0.022139
試合開始の後攻の勝率: 0.727721

先攻: 千葉ロッテマリーンズ, 後攻: 福岡ソフトバンクホークス
[]


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,荻野貴司,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,中村奨吾,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,山口航輝,0.75,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
3,ポランコ,0.75,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
4,安田尚憲,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,井上晴哉,0.75,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.25
6,田村龍弘,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,藤岡裕大,0.666667,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,藤原恭大,0.666667,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0


[]


Unnamed: 0,選手名,凡退率,単打率,二塁打率,三塁打率,本塁打率,四死球率,盗塁成功率,犠打成功率,併殺打率
0,周東佑京,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,近藤健介,0.5,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0
2,柳田悠岐,0.75,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
3,栗原陵矢,0.5,0.0,0.25,0.0,0.25,0.0,0.0,0.0,0.0
4,牧原大成,0.5,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0
5,中村晃,0.75,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0
6,正木智也,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,今宮健太,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,甲斐拓也,0.666667,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0


試合開始の先攻の勝率: 0.000247
試合開始の後攻の勝率: 0.991411

