In [1]:
import pandas as pd
import json

In [2]:
# team_idをteam_nameに変換するため、辞書を作成
team_df = pd.read_csv('../data/input/team.csv')
team_dic = dict(zip(team_df['team_id'], team_df['team_name']))

In [3]:
# 各々の回数をカウントするためのカラム定義
# ○: フル出場, ▲: 途中出場, ▽: 途中退場, ◆: 途中出場途中退場, ×: 出場停止, ※: 他大会の影響で出場停止, B: ベンチ入り
tmp = ['rat_full_play', 'rat_out_play', 'rat_in_play', 'rat_inout_play', 'rat_bench_play', 'rat_susp_play']
colnames = ['rat_full_play', 'rat_out_play', 'rat_in_play', 'rat_inout_play', 'rat_bench_play', 'rat_susp_play']
colnames += [i + '_first' for i in tmp]
colnames += [i + '_second' for i in tmp]
characters = ['○', '▲', '▽', '◆', 'B', '×'] * 3

In [4]:
# 2015 ~ 2018シーズンのjsonをcsvに変換
for year in range(2015, 2019):
    
    # 年度毎にデータを集約
    dfs = []
    
    ## 処理年度のチーム一覧を取得
    team_df_year = team_df[team_df['year']== year]
    
    # 各チームに対する処理
    for div, team_id in zip(team_df_year['div'], team_df_year['team_id']):
        
        # 2stageか判定
        is_2stage = year < 2017 and div == 'J1'
        
        # シーズンの試合数を算出
        # J1は34試合、J2は42試合
        if div == 'J1':
            num_match = 34
        else:
            num_match = 42
        
        # jsonをdataframeに変換
        if is_2stage:
            with open(f'../data/input/event/event_play_{year}_{div}_{team_id}_s1.json', encoding='utf-8') as f:
                first_dic = json.load(f)
            with open(f'../data/input/event/event_play_{year}_{div}_{team_id}_s2.json',  encoding='utf-8') as f:
                second_dic = json.load(f)

            first_df = pd.DataFrame.from_dict(first_dic, orient='index')
            second_df = pd.DataFrame.from_dict(second_dic, columns=list(range(17, 34)), orient='index')
            full_df = pd.concat([first_df, second_df], axis=1, join='outer')
            
            # シーズン前半・後半に分けて集計するためにdataframeを分割
            first_df = full_df.iloc[:, :num_match // 2]
            second_df = full_df.iloc[:, num_match // 2:]
        
        else:
            with open(f"../data/input/event/event_play_{year}_{div}_{team_id}.json",  encoding='utf-8') as f:
                dic = json.load(f)

            full_df = pd.DataFrame.from_dict(dic, orient='index')

            # シーズン前半・後半に分けて集計するためにdataframeを分割
            first_df = full_df.iloc[:, :num_match // 2]
            second_df = full_df.iloc[:, num_match // 2:]
        
        # 各々の回数をカウント
        data = []
        for col, cha in zip(colnames, characters):
            if 'first' in col:
                data.append(first_df.apply(lambda d: d.str.contains(cha)).sum(axis=1).values)
            elif 'second' in col:
                data.append(second_df.apply(lambda d: d.str.contains(cha)).sum(axis=1).values)
            else:
                data.append(full_df.apply(lambda d: d.str.contains(cha)).sum(axis=1).values)
        
        # 1試合あたりの値に変換
        df = pd.DataFrame(data).T
        df /= num_match
        df.columns = colnames
        df['team_id'] = team_id
        df['team'] = df['team_id'].map(team_dic)
        df.drop(['team_id'], axis=1, inplace=True)
        df['name'] = full_df.index
        
        # 年度毎にデータを集約
        dfs.append(df)
    
    # 年度毎にデータをcsvに出力
    df = pd.concat(dfs)
    df.to_csv(f'../data/work/event_play_{year}.csv', index=False)     