In [287]:
import numpy as np
import scipy as sp
import pandas as pd
from pandas import Series, DataFrame

import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns


In [288]:
pd.set_option('display.max_columns', 50)

sns.set()
%matplotlib inline
%precision 6

'%.6f'

In [289]:
plt.rcParams['font.family'] = 'IPAexGothic'
plt.rcParams['font.size'] = 6

In [290]:
date = '2023-04-02'

In [291]:
df = pd.read_json(f"../live-stats/{date}-bat.json")
bats = pd.json_normalize(data=df.to_dict("records"))

cols = [
    'date', 'visitor', 'home', 'inning_num', 'top_bottom', 'batter_num',
    'before_score.top', 'before_score.bottom',
    'before_count.b', 'before_count.s', 'before_count.o',
    'after_score.top', 'after_score.bottom',
    'after_count.b', 'after_count.s', 'after_count.o',
    '1b.id', '1b.handed', '1b.name',
    '2b.id', '2b.handed', '2b.name',
    '3b.id', '3b.handed', '3b.name',
    'batter.id', 'batter.handed', 'batter.name', 'pitcher.id', 'pitcher.handed', 'pitcher.name',
    'direction', 'result',
    ]

# 必要なカラムだけ残す
bat_results = bats[cols].copy()

# IDが入っている塁は1で、ランナーなしは0
bat_results['before_1b'] = 0
bat_results.loc[bat_results['1b.id'].notna(), 'before_1b'] = 1

bat_results['before_2b'] = 0
bat_results.loc[bat_results['2b.id'].notna(), 'before_2b'] = 1

bat_results['before_3b'] = 0
bat_results.loc[bat_results['3b.id'].notna(), 'before_3b'] = 1
# print(bat_results.head())

In [292]:
# 不要カラムの削除
drop_cols = [
    '1b.id', '1b.handed', '1b.name',
    '2b.id', '2b.handed', '2b.name',
    '3b.id', '3b.handed', '3b.name',
    'before_count.b', 'before_count.s',
    'after_count.b', 'after_count.s',
]
bat_results.drop(drop_cols, axis=1, inplace=True)
# print(bat_results)

In [293]:
# 塁情報をshiftしてbeforeをafterに変更
ren_cols = {
    'before_1b': 'after_1b', 'before_2b': 'after_2b', 'before_3b': 'after_3b',
}
after_base = bat_results[['before_1b', 'before_2b', 'before_3b']].shift(-1).fillna(0).rename(columns=ren_cols).astype(int)
# after_base

In [294]:
# カラム名を変更
ren_cols = {
    'inning_num': 'inning',
    'before_count.o': 'before_out', 'after_count.o': 'after_out',
    'visitor': 'team.top', 'home': 'team.bottom',
    'batter_num': 'batter_number',
}
bat_results.rename(columns=ren_cols, inplace=True)

In [295]:
# データを結合
bat_results = pd.concat([bat_results, after_base], axis=1)
# bat_results

In [296]:
# 得点情報を追加
def point(bat):
    if bat['top_bottom'] == 1:
        return bat['after_score.top'] - bat['before_score.top']
    elif bat['top_bottom'] == 2:
        return bat['after_score.bottom'] - bat['before_score.bottom']
    else:
        return 0

bat_results['point'] = bat_results.apply(point, axis=1)

In [297]:
# カラムの順番を変更
re_cols = [
    'date', 'team.top', 'team.bottom', 'inning', 'top_bottom', 'batter_number', 
    'before_score.top', 'before_score.bottom',
    'after_score.top', 'after_score.bottom',
    'point', 
    'before_out', 'before_1b', 'before_2b', 'before_3b', 
    'after_out', 'after_1b', 'after_2b', 'after_3b',
    'batter.id', 'batter.handed', 'batter.name' ,
    'pitcher.id', 'pitcher.handed', 'pitcher.name',
    'direction', 'result',
]
bat_results = bat_results.reindex(columns=re_cols)

In [298]:
# ファイル保存
bat_results.to_csv(f"../live-stats/bat-{date}.csv", index=False)