### 강의에서 사용된 파이썬 주요 기능

- 경기 데이터 불러오기
  - pandas.set_option: https://pandas.pydata.org/docs/reference/api/pandas.set_option.html#pandas.set_option

- 경기 내 선수별 기록 집계
  - pandas.DataFrame.fillna: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.fillna.html
  - pandas.DataFrame.: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.astype.html
  - pandas.DataFrame.reset_index: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reset_index.html

- 경기 내 선수별 출전 시간 산출
  - numpy.ndarray.tolist: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.tolist.html
  - pandas.DataFrame.set_index: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.set_index.html
  - numpy.sort: https://numpy.org/doc/stable/reference/generated/numpy.sort.html
  - numpy.concatenate: https://numpy.org/doc/stable/reference/generated/numpy.concatenate.html
  - pandas.DataFrame.merge: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html

- 대회 전체 선수 기록 누적 집계
  - tqdm: https://tqdm.github.io

### 경기 데이터 불러오기

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

pd.set_option('display.max_columns', 40)

In [2]:
match_id = 2058017
match_events = pd.read_pickle(f'data/refined_events/World_Cup/{match_id}.pkl')
match_events

Unnamed: 0,match_id,event_id,period,time,team_id,team_name,player_id,player_name,event_type,sub_event_type,tags,start_x,start_y,end_x,end_y
0,2058017,263883958,1H,1.892,9598,Croatia,14943,M. Mandžukić,Pass,Simple pass,[],52.00,33.32,,
1,2058017,263883959,1H,3.889,9598,Croatia,69968,M. Brozović,Pass,Simple pass,[Accurate],40.56,32.64,35.36,17.68
2,2058017,263883960,1H,6.141,9598,Croatia,8287,L. Modrić,Pass,Simple pass,[Accurate],35.36,17.68,31.20,4.76
3,2058017,263883963,1H,9.227,9598,Croatia,69409,Š. Vrsaljko,Pass,Simple pass,[Accurate],31.20,4.76,11.44,23.80
4,2058017,263883964,1H,12.659,9598,Croatia,135747,D. Subašić,Pass,Launch,[Accurate],11.44,23.80,66.56,33.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1467,2058017,263885652,2H,2978.302,9598,Croatia,3476,I. Rakitić,Pass,Simple pass,[Accurate],47.84,54.40,66.56,63.92
1468,2058017,263885653,2H,2979.085,9598,Croatia,14812,I. Perišić,Others on the ball,Touch,[],66.56,63.92,85.28,66.64
1469,2058017,263885654,2H,2983.449,9598,Croatia,14812,I. Perišić,Pass,Cross,"[Left foot, High, Not accurate]",85.28,66.64,104.00,34.00
1470,2058017,263885613,2H,2985.869,4418,France,25381,H. Lloris,Goalkeeper leaving line,Goalkeeper leaving line,[],0.00,34.00,14.56,38.76


### 경기 내 선수별 기록 집계

##### (1) 이벤트 유형을 기준으로 추출되는 기록의 집계

- 슈팅 횟수

In [3]:
shot_records = match_events[
    (match_events['event_type'] == 'Shot') |
    (match_events['sub_event_type'].isin(['Free kick shot', 'Penalty']))
]
shots = shot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
shots.name = 'total_shots'
shots

team_id  team_name  player_id  player_name 
4418     France     3682       A. Griezmann    2
                    7936       P. Pogba        2
                    26010      O. Giroud       1
                    28115      N. Fekir        1
                    353833     K. Mbappé       2
9598     Croatia    3476       I. Rakitić      3
                    14812      I. Perišić      2
                    14943      M. Mandžukić    1
                    25393      D. Lovren       1
                    69396      D. Vida         2
                    69409      Š. Vrsaljko     2
                    69616      A. Rebić        3
Name: total_shots, dtype: int64

- 패스 횟수

In [4]:
pass_records = match_events[
    (match_events['event_type'] == 'Pass') |
    (match_events['sub_event_type'].isin(['Free kick', 'Free kick cross', 'corner']))
]
passes = pass_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
passes.name = 'total_passes'
passes

team_id  team_name  player_id  player_name 
4418     France     3309       R. Varane       25
                    3682       A. Griezmann    24
                    7936       P. Pogba        31
                    8200       S. N'Zonzi      14
                    25381      H. Lloris       18
                    25397      S. Umtiti       21
                    25437      B. Matuidi      22
                    26010      O. Giroud       21
                    28115      N. Fekir         2
                    31528      N. Kanté        13
                    209091     C. Tolisso       7
                    279545     L. Hernández    20
                    340646     B. Pavard       20
                    353833     K. Mbappé       14
9598     Croatia    3476       I. Rakitić      63
                    8287       L. Modrić       72
                    14812      I. Perišić      35
                    14943      M. Mandžukić    29
                    25393      D. Lovren       65
      

- 파울 횟수

In [5]:
foul_records = match_events[match_events['event_type'] == 'Foul']
fouls = foul_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
fouls.name = 'fouls'
fouls

team_id  team_name  player_id  player_name 
4418     France     7936       P. Pogba        2
                    25397      S. Umtiti       1
                    25437      B. Matuidi      1
                    26010      O. Giroud       3
                    28115      N. Fekir        1
                    31528      N. Kanté        3
                    279545     L. Hernández    1
                    340646     B. Pavard       1
9598     Croatia    3476       I. Rakitić      1
                    8287       L. Modrić       3
                    14812      I. Perišić      1
                    14943      M. Mandžukić    2
                    69409      Š. Vrsaljko     2
                    69616      A. Rebić        1
                    69968      M. Brozović     2
                    105361     I. Strinić      1
Name: fouls, dtype: int64

- 오프사이드 횟수

In [6]:
offside_records = match_events[match_events['event_type'] == 'Offside']
offsides = offside_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
offsides.name = 'offsides'
offsides

team_id  team_name  player_id  player_name 
4418     France     353833     K. Mbappé       1
9598     Croatia    14943      M. Mandžukić    1
Name: offsides, dtype: int64

##### (2) 태그 정보를 기준으로 추출되는 기록의 집계

- 유효슈팅 횟수

In [7]:
acc_shot_records = shot_records[shot_records['tags'].apply(lambda x: 'Accurate' in x)]
acc_shots = acc_shot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
acc_shots.name = 'shots_on_target'
acc_shots

team_id  team_name  player_id  player_name 
4418     France     3682       A. Griezmann    2
                    7936       P. Pogba        1
                    28115      N. Fekir        1
                    353833     K. Mbappé       2
9598     Croatia    14812      I. Perišić      1
                    14943      M. Mandžukić    1
                    69616      A. Rebić        1
Name: shots_on_target, dtype: int64

- 신체 부위별 슈팅 횟수

In [8]:
rshot_records = shot_records[shot_records['tags'].apply(lambda x: 'Right foot' in x)]
rshots = rshot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
rshots.name = 'rfoot_shots'

lshot_records = shot_records[shot_records['tags'].apply(lambda x: 'Left foot' in x)]
lshots = lshot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
lshots.name = 'lfoot_shots'

hshot_records = shot_records[shot_records['tags'].apply(lambda x: 'Head/body' in x)]
hshots = hshot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
hshots.name = 'header_shots'

shot_stats_list = [shots, acc_shots, rshots, lshots, hshots]
shot_stats = pd.concat(shot_stats_list, axis=1).fillna(0).astype(int)
shot_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots
team_id,team_name,player_id,player_name,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
4418,France,3682,A. Griezmann,2,2,0,2,0
4418,France,7936,P. Pogba,2,1,1,1,0
4418,France,26010,O. Giroud,1,0,0,1,0
4418,France,28115,N. Fekir,1,1,0,1,0
4418,France,353833,K. Mbappé,2,2,2,0,0
9598,Croatia,3476,I. Rakitić,3,0,1,2,0
9598,Croatia,14812,I. Perišić,2,1,0,1,1
9598,Croatia,14943,M. Mandžukić,1,1,1,0,0
9598,Croatia,25393,D. Lovren,1,0,1,0,0
9598,Croatia,69396,D. Vida,2,0,0,0,2


- 득점·도움·자책골 횟수

In [9]:
goal_records = match_events[match_events['tags'].apply(lambda x: 'Goal' in x)]
goals = goal_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
goals.name = 'goals'

assist_records = match_events[match_events['tags'].apply(lambda x: 'Assist' in x)]
assists = assist_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
assists.name = 'assists'

own_goal_records = match_events[match_events['tags'].apply(lambda x: 'Own goal' in x)]
own_goals = own_goal_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
own_goals.name = 'own_goals'

goal_stats_list = [goals, assists, own_goals]
goal_stats = pd.concat(goal_stats_list, axis=1).fillna(0).astype(int)
goal_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,goals,assists,own_goals
team_id,team_name,player_id,player_name,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4418,France,3682,A. Griezmann,1,0,0
4418,France,7936,P. Pogba,1,0,0
4418,France,353833,K. Mbappé,1,0,0
9598,Croatia,14812,I. Perišić,1,0,0
9598,Croatia,14943,M. Mandžukić,1,0,1
9598,Croatia,69396,D. Vida,0,1,0


- 성공한 패스 횟수 및 패스 성공률

In [10]:
acc_pass_records = pass_records[pass_records['tags'].apply(lambda x: 'Accurate' in x)]
acc_passes = acc_pass_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
acc_passes.name = 'acc_passes'

pass_stats = pd.concat([passes, acc_passes], axis=1).fillna(0).astype(int)
pass_stats['pass_accuracy'] = (pass_stats['acc_passes'] / pass_stats['total_passes']).round(2)
pass_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,total_passes,acc_passes,pass_accuracy
team_id,team_name,player_id,player_name,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4418,France,3309,R. Varane,25,19,0.76
4418,France,3682,A. Griezmann,24,19,0.79
4418,France,7936,P. Pogba,31,29,0.94
4418,France,8200,S. N'Zonzi,14,13,0.93
4418,France,25381,H. Lloris,18,12,0.67
4418,France,25397,S. Umtiti,21,18,0.86
4418,France,25437,B. Matuidi,22,19,0.86
4418,France,26010,O. Giroud,21,14,0.67
4418,France,28115,N. Fekir,2,1,0.5
4418,France,31528,N. Kanté,13,10,0.77


- 경고·퇴장 횟수

In [11]:
yellow_records = foul_records[foul_records['tags'].apply(lambda x: 'Yellow card' in x)]
yellows = yellow_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
yellows.name = 'yellow_cards'

red_records = foul_records[foul_records['tags'].apply(lambda x: 'Red card' in x)]
reds = red_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
reds.name = 'red_cards'

foul_stats = pd.concat([fouls, offsides, yellows, reds], axis=1).fillna(0).astype(int)
foul_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,fouls,offsides,yellow_cards,red_cards
team_id,team_name,player_id,player_name,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4418,France,7936,P. Pogba,2,0,0,0
4418,France,25397,S. Umtiti,1,0,0,0
4418,France,25437,B. Matuidi,1,0,0,0
4418,France,26010,O. Giroud,3,0,0,0
4418,France,28115,N. Fekir,1,0,0,0
4418,France,31528,N. Kanté,3,0,1,0
4418,France,279545,L. Hernández,1,0,1,0
4418,France,340646,B. Pavard,1,0,0,0
9598,Croatia,3476,I. Rakitić,1,0,0,0
9598,Croatia,8287,L. Modrić,3,0,0,0


##### (3) 선수별 통계 정리

In [12]:
player_stats = pd.concat([goal_stats, shot_stats, foul_stats, pass_stats], axis=1, sort=True)
player_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes,pass_accuracy
team_id,team_name,player_id,player_name,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
4418,France,3309,R. Varane,,,,,,,,,,,,,25,19,0.76
4418,France,3682,A. Griezmann,1.0,0.0,0.0,2.0,2.0,0.0,2.0,0.0,,,,,24,19,0.79
4418,France,7936,P. Pogba,1.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,2.0,0.0,0.0,0.0,31,29,0.94
4418,France,8200,S. N'Zonzi,,,,,,,,,,,,,14,13,0.93
4418,France,25381,H. Lloris,,,,,,,,,,,,,18,12,0.67
4418,France,25397,S. Umtiti,,,,,,,,,1.0,0.0,0.0,0.0,21,18,0.86
4418,France,25437,B. Matuidi,,,,,,,,,1.0,0.0,0.0,0.0,22,19,0.86
4418,France,26010,O. Giroud,,,,1.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,21,14,0.67
4418,France,28115,N. Fekir,,,,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,2,1,0.5
4418,France,31528,N. Kanté,,,,,,,,,3.0,0.0,1.0,0.0,13,10,0.77


In [13]:
player_stats = player_stats.fillna(0).reset_index()

for col in player_stats.columns[4:]:
    if col != 'pass_accuracy':
        player_stats[col] = player_stats[col].astype(int)

player_stats

Unnamed: 0,team_id,team_name,player_id,player_name,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes,pass_accuracy
0,4418,France,3309,R. Varane,0,0,0,0,0,0,0,0,0,0,0,0,25,19,0.76
1,4418,France,3682,A. Griezmann,1,0,0,2,2,0,2,0,0,0,0,0,24,19,0.79
2,4418,France,7936,P. Pogba,1,0,0,2,1,1,1,0,2,0,0,0,31,29,0.94
3,4418,France,8200,S. N'Zonzi,0,0,0,0,0,0,0,0,0,0,0,0,14,13,0.93
4,4418,France,25381,H. Lloris,0,0,0,0,0,0,0,0,0,0,0,0,18,12,0.67
5,4418,France,25397,S. Umtiti,0,0,0,0,0,0,0,0,1,0,0,0,21,18,0.86
6,4418,France,25437,B. Matuidi,0,0,0,0,0,0,0,0,1,0,0,0,22,19,0.86
7,4418,France,26010,O. Giroud,0,0,0,1,0,0,1,0,3,0,0,0,21,14,0.67
8,4418,France,28115,N. Fekir,0,0,0,1,1,0,1,0,1,0,0,0,2,1,0.5
9,4418,France,31528,N. Kanté,0,0,0,0,0,0,0,0,3,0,1,0,13,10,0.77


### 경기 내 선수별 출전 시간 산출

##### (1) 선수교체 및 퇴장 기록 필터링

In [14]:
player_change_records = match_events[
    (match_events['event_type'] == 'Substitution') |
    (match_events['tags'].apply(lambda x: 'Red card' in x))
]
player_change_records

Unnamed: 0,match_id,event_id,period,time,team_id,team_name,player_id,player_name,event_type,sub_event_type,tags,start_x,start_y,end_x,end_y
820,2058017,0,2H,540.0,4418,France,8200,S. N'Zonzi,Substitution,Player in,[31528],,,,
821,2058017,0,2H,540.0,4418,France,31528,N. Kanté,Substitution,Player out,[8200],,,,
1097,2058017,0,2H,1500.0,9598,Croatia,69411,A. Kramarić,Substitution,Player in,[69616],,,,
1098,2058017,0,2H,1500.0,9598,Croatia,69616,A. Rebić,Substitution,Player out,[69411],,,,
1114,2058017,0,2H,1620.0,4418,France,209091,C. Tolisso,Substitution,Player in,[25437],,,,
1115,2058017,0,2H,1620.0,4418,France,25437,B. Matuidi,Substitution,Player out,[209091],,,,
1252,2058017,0,2H,2100.0,9598,Croatia,135810,M. Pjaca,Substitution,Player in,[105361],,,,
1253,2058017,0,2H,2100.0,9598,Croatia,105361,I. Strinić,Substitution,Player out,[135810],,,,
1254,2058017,0,2H,2100.0,4418,France,28115,N. Fekir,Substitution,Player in,[26010],,,,
1255,2058017,0,2H,2100.0,4418,France,26010,O. Giroud,Substitution,Player out,[28115],,,,


##### (2) 선발 선수 추출

In [15]:
in_players = player_change_records[player_change_records['sub_event_type'] == 'Player in']['player_id'].tolist()
player_ids = [p for p in match_events['player_id'].unique() if not p in in_players]
player_ids

[14943,
 69968,
 8287,
 69409,
 135747,
 3309,
 14812,
 3476,
 105361,
 69396,
 25393,
 353833,
 340646,
 31528,
 7936,
 3682,
 25381,
 25397,
 25437,
 26010,
 69616,
 279545]

##### (3) 선수교체 및 퇴장 기록을 활용한 phase 구분

In [16]:
period_durations = match_events.groupby('period')['time'].max()
phase_record_list = []
phase = 1

for period in period_durations.index:
    change_times = player_change_records[player_change_records['period'] == period]['time'].unique().tolist()
    change_times.append(period_durations[period])
    if 0 not in change_times:
        change_times = [0] + change_times

    for i in range(len(change_times[:-1])):
        moment_records = player_change_records[
            (player_change_records['period'] == period) &
            (player_change_records['time'] == change_times[i])
        ]

        for _, record in moment_records.iterrows():
            if record['sub_event_type'] == 'Player out' or record['event_type'] == 'Foul':
                player_ids.remove(record['player_id'])
            else:
                player_ids.append(record['player_id'])

        phase_record = {
            'phase': phase,
            'period': period,
            'start_time': change_times[i],
            'end_time': change_times[i+1],
            'duration': change_times[i+1] - change_times[i],
            'player_ids': player_ids.copy()
        }
        phase += 1

        phase_record_list.append(phase_record)

phase_records = pd.DataFrame(phase_record_list).set_index('phase')
phase_records

Unnamed: 0_level_0,period,start_time,end_time,duration,player_ids
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1H,0.0,2905.875,2905.875,"[14943, 69968, 8287, 69409, 135747, 3309, 1481..."
2,2H,0.0,540.0,540.0,"[14943, 69968, 8287, 69409, 135747, 3309, 1481..."
3,2H,540.0,1500.0,960.0,"[14943, 69968, 8287, 69409, 135747, 3309, 1481..."
4,2H,1500.0,1620.0,120.0,"[14943, 69968, 8287, 69409, 135747, 3309, 1481..."
5,2H,1620.0,2100.0,480.0,"[14943, 69968, 8287, 69409, 135747, 3309, 1481..."
6,2H,2100.0,3002.149,902.149,"[14943, 69968, 8287, 69409, 135747, 3309, 1481..."


##### (4) 선수별 각 phase 출전 여부 판단

In [17]:
player_ids = np.sort(match_events['player_id'].unique())
for player_id in player_ids:
    phase_records[player_id] = 0

for phase in phase_records.index:
    for player_id in phase_records.at[phase, 'player_ids']:
        phase_records.at[phase, player_id] = 1

phase_records = phase_records[np.concatenate([phase_records.columns[:4], player_ids])]
phase_records

Unnamed: 0_level_0,period,start_time,end_time,duration,3309,3476,3682,7936,8200,8287,14812,14943,25381,25393,25397,25437,26010,28115,31528,69396,69409,69411,69616,69968,105361,135747,135810,209091,279545,340646,353833
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
1,1H,0.0,2905.875,2905.875,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,0,1,1,1
2,2H,0.0,540.0,540.0,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,0,0,1,1,1
3,2H,540.0,1500.0,960.0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,1,1,1,1,0,0,1,1,1
4,2H,1500.0,1620.0,120.0,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,1,1,1
5,2H,1620.0,2100.0,480.0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,1,1,0,1,1,1,0,1,1,1,1
6,2H,2100.0,3002.149,902.149,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,1,1,1,0,1,0,1,1,1,1,1,1


##### (5) 선수별 출전 시간 산출

In [18]:
playing_times = pd.Series(index=player_ids, dtype='float')
for player_id in player_ids:
    playing_times[player_id] = phase_records[phase_records[player_id] == 1]['duration'].sum().round(1)
playing_times

3309      5908.0
3476      5908.0
3682      5908.0
7936      5908.0
8200      2462.1
8287      5908.0
14812     5908.0
14943     5908.0
25381     5908.0
25393     5908.0
25397     5908.0
25437     4525.9
26010     5005.9
28115      902.1
31528     3445.9
69396     5908.0
69409     5908.0
69411     1502.1
69616     4405.9
69968     5908.0
105361    5005.9
135747    5908.0
135810     902.1
209091    1382.1
279545    5908.0
340646    5908.0
353833    5908.0
dtype: float64

In [19]:
playing_times = playing_times.reset_index()
playing_times.columns = ['player_id', 'playing_time']
playing_times

Unnamed: 0,player_id,playing_time
0,3309,5908.0
1,3476,5908.0
2,3682,5908.0
3,7936,5908.0
4,8200,2462.1
5,8287,5908.0
6,14812,5908.0
7,14943,5908.0
8,25381,5908.0
9,25393,5908.0


##### (6) 선수별 통계에 출전 시간 추가

In [20]:
player_stats = pd.merge(player_stats, playing_times)
player_stats

Unnamed: 0,team_id,team_name,player_id,player_name,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes,pass_accuracy,playing_time
0,4418,France,3309,R. Varane,0,0,0,0,0,0,0,0,0,0,0,0,25,19,0.76,5908.0
1,4418,France,3682,A. Griezmann,1,0,0,2,2,0,2,0,0,0,0,0,24,19,0.79,5908.0
2,4418,France,7936,P. Pogba,1,0,0,2,1,1,1,0,2,0,0,0,31,29,0.94,5908.0
3,4418,France,8200,S. N'Zonzi,0,0,0,0,0,0,0,0,0,0,0,0,14,13,0.93,2462.1
4,4418,France,25381,H. Lloris,0,0,0,0,0,0,0,0,0,0,0,0,18,12,0.67,5908.0
5,4418,France,25397,S. Umtiti,0,0,0,0,0,0,0,0,1,0,0,0,21,18,0.86,5908.0
6,4418,France,25437,B. Matuidi,0,0,0,0,0,0,0,0,1,0,0,0,22,19,0.86,4525.9
7,4418,France,26010,O. Giroud,0,0,0,1,0,0,1,0,3,0,0,0,21,14,0.67,5005.9
8,4418,France,28115,N. Fekir,0,0,0,1,1,0,1,0,1,0,0,0,2,1,0.5,902.1
9,4418,France,31528,N. Kanté,0,0,0,0,0,0,0,0,3,0,1,0,13,10,0.77,3445.9


### 대회 전체 선수별 기록 집계

##### (1) 경기 내 선수별 기록 집계 함수 구현

In [21]:
def generate_player_stats(match_id):
    # Data loading
    match_events = pd.read_pickle(f'data/refined_events/World_Cup/{match_id}.pkl')
    match_events = match_events[match_events['period'] != 'P']

    # Goal stats
    goal_records = match_events[match_events['tags'].apply(lambda x: 'Goal' in x)]
    goals = goal_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    goals.name = 'goals'

    own_goal_records = match_events[match_events['tags'].apply(lambda x: 'Own goal' in x)]
    own_goals = own_goal_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    own_goals.name = 'own_goals'

    assist_records = match_events[match_events['tags'].apply(lambda x: 'Assist' in x)]
    assists = assist_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    assists.name = 'assists'

    goal_stats_list = [goals, assists, own_goals]
    goal_stats = pd.concat(goal_stats_list, axis=1).fillna(0).astype(int)

    # Shot stats
    shot_records = match_events[
        (match_events['event_type'] == 'Shot') |
        (match_events['sub_event_type'].isin(['Free kick shot', 'Penalty']))
    ]
    shots = shot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    shots.name = 'total_shots'

    acc_shot_records = shot_records[shot_records['tags'].apply(lambda x: 'Accurate' in x)]
    acc_shots = acc_shot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    acc_shots.name = 'shots_on_target'

    rshot_records = shot_records[shot_records['tags'].apply(lambda x: 'Right foot' in x)]
    rshots = rshot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    rshots.name = 'rfoot_shots'

    lshot_records = shot_records[shot_records['tags'].apply(lambda x: 'Left foot' in x)]
    lshots = lshot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    lshots.name = 'lfoot_shots'

    hshot_records = shot_records[shot_records['tags'].apply(lambda x: 'Head/body' in x)]
    hshots = hshot_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    hshots.name = 'header_shots'

    shot_stats_list = [shots, acc_shots, rshots, lshots, hshots]
    shot_stats = pd.concat(shot_stats_list, axis=1).fillna(0).astype(int)

    # Foul stats
    foul_records = match_events[match_events['event_type'] == 'Foul']
    fouls = foul_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    fouls.name = 'fouls'

    offside_records = match_events[match_events['event_type'] == 'Offside']
    offsides = offside_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    offsides.name = 'offsides'

    yellow_records = foul_records[foul_records['tags'].apply(lambda x: 'Yellow card' in x)]
    yellows = yellow_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    yellows.name = 'yellow_cards'

    red_records = foul_records[foul_records['tags'].apply(lambda x: 'Red card' in x)]
    reds = red_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    reds.name = 'red_cards'

    foul_stats = pd.concat([fouls, offsides, yellows, reds], axis=1).fillna(0).astype(int)

    # Pass stats
    pass_records = match_events[
        (match_events['event_type'] == 'Pass') |
        (match_events['sub_event_type'].isin(['Free kick', 'Free kick cross', 'corner']))
    ]
    passes = pass_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    passes.name = 'total_passes'

    acc_pass_records = pass_records[pass_records['tags'].apply(lambda x: 'Accurate' in x)]
    acc_passes = acc_pass_records.groupby(['team_id', 'team_name', 'player_id', 'player_name'])['event_id'].count()
    acc_passes.name = 'acc_passes'

    pass_stats = pd.concat([passes, acc_passes], axis=1).fillna(0).astype(int)
    pass_stats['pass_accuracy'] = (pass_stats['acc_passes'] / pass_stats['total_passes']).round(2)

    # Playing time
    player_change_records = match_events[
        (match_events['event_type'] == 'Substitution') |
        (match_events['tags'].apply(lambda x: 'Red card' in x))
    ]
    in_players = player_change_records[player_change_records['sub_event_type'] == 'Player in']['player_id'].tolist()
    player_ids = [p for p in match_events['player_id'].unique() if not p in in_players]

    period_durations = match_events.groupby('period')['time'].max()
    phase_record_list = []
    phase = 1

    for period in period_durations.index:
        change_times = player_change_records[player_change_records['period'] == period]['time'].unique().tolist()
        change_times.append(period_durations[period])
        if 0 not in change_times:
            change_times = [0] + change_times

        for i in range(len(change_times[:-1])):
            moment_records = player_change_records[
                (player_change_records['period'] == period) &
                (player_change_records['time'] == change_times[i])
            ]

            for _, record in moment_records.iterrows():
                if record['sub_event_type'] == 'Player out' or record['event_type'] == 'Foul':
                    player_ids.remove(record['player_id'])
                else:
                    player_ids.append(record['player_id'])

            phase_record = {
                'phase': phase,
                'period': period,
                'start_time': change_times[i],
                'end_time': change_times[i+1],
                'duration': change_times[i+1] - change_times[i],
                'player_ids': player_ids.copy()
            }
            phase += 1

            phase_record_list.append(phase_record)

    phase_records = pd.DataFrame(phase_record_list).set_index('phase')

    player_ids = np.sort(match_events['player_id'].unique())
    for player_id in player_ids:
        phase_records[player_id] = 0

    for phase in phase_records.index:
        for player_id in phase_records.at[phase, 'player_ids']:
            phase_records.at[phase, player_id] = 1

    phase_records = phase_records[np.concatenate([phase_records.columns[:4], player_ids])]

    playing_times = pd.Series(index=player_ids, dtype='float')
    for player_id in player_ids:
        playing_times[player_id] = phase_records[phase_records[player_id] == 1]['duration'].sum().round(1)
    playing_times = playing_times.reset_index()
    playing_times.columns = ['player_id', 'playing_time']

    # Concatenation
    player_stats = pd.concat([goal_stats, shot_stats, foul_stats, pass_stats], axis=1, sort=True).fillna(0)
    for col in player_stats.columns:
        if col != 'pass_accuracy':
            player_stats[col] = player_stats[col].astype(int)

    player_stats = pd.merge(player_stats.reset_index(), playing_times)
    player_stats['match_id'] = match_id

    cols = player_stats.columns.tolist()
    cols = ['match_id'] + cols[:4] + ['playing_time'] + cols[4:-2]
    return player_stats[cols]

In [22]:
generate_player_stats(match_id)

Unnamed: 0,match_id,team_id,team_name,player_id,player_name,playing_time,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes,pass_accuracy
0,2058017,4418,France,3309,R. Varane,5908.0,0,0,0,0,0,0,0,0,0,0,0,0,25,19,0.76
1,2058017,4418,France,3682,A. Griezmann,5908.0,1,0,0,2,2,0,2,0,0,0,0,0,24,19,0.79
2,2058017,4418,France,7936,P. Pogba,5908.0,1,0,0,2,1,1,1,0,2,0,0,0,31,29,0.94
3,2058017,4418,France,8200,S. N'Zonzi,2462.1,0,0,0,0,0,0,0,0,0,0,0,0,14,13,0.93
4,2058017,4418,France,25381,H. Lloris,5908.0,0,0,0,0,0,0,0,0,0,0,0,0,18,12,0.67
5,2058017,4418,France,25397,S. Umtiti,5908.0,0,0,0,0,0,0,0,0,1,0,0,0,21,18,0.86
6,2058017,4418,France,25437,B. Matuidi,4525.9,0,0,0,0,0,0,0,0,1,0,0,0,22,19,0.86
7,2058017,4418,France,26010,O. Giroud,5005.9,0,0,0,1,0,0,1,0,3,0,0,0,21,14,0.67
8,2058017,4418,France,28115,N. Fekir,902.1,0,0,0,1,1,0,1,0,1,0,0,0,2,1,0.5
9,2058017,4418,France,31528,N. Kanté,3445.9,0,0,0,0,0,0,0,0,3,0,1,0,13,10,0.77


##### (2) 대회 전체 경기별 선수 기록 집계

- 경기 정보 불러오기

In [23]:
dataset_name = 'World_Cup'
match_df = pd.read_csv(f'data/refined_events/{dataset_name}/matches.csv', index_col=0, encoding='utf-8-sig')
match_df

Unnamed: 0_level_0,gameweek,datetime,venue,team1_id,team1_name,team1_goals,team2_id,team2_name,team2_goals,duration
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2057954,1,2018-06-14 15:00:00,Olimpiyskiy stadion Luzhniki,14358,Russia,5,16521,Saudi Arabia,0,Regular
2057955,1,2018-06-15 12:00:00,Stadion Central'nyj,16129,Egypt,0,15670,Uruguay,1,Regular
2057961,1,2018-06-15 15:00:00,Stadion Krestovskyi,16216,Morocco,0,10840,Iran,1,Regular
2057960,1,2018-06-15 18:00:00,Olimpiyskiy Stadion Fisht,9905,Portugal,3,1598,Spain,3,Regular
2057966,1,2018-06-16 10:00:00,Kazan' Arena,4418,France,2,8493,Australia,1,Regular
...,...,...,...,...,...,...,...,...,...,...
2058012,0,2018-07-07 18:00:00,Olimpiyskiy Stadion Fisht,14358,Russia,2,9598,Croatia,2,Penalties
2058014,0,2018-07-10 18:00:00,Stadion Krestovskyi,4418,France,1,5629,Belgium,0,Regular
2058015,0,2018-07-11 18:00:00,Olimpiyskiy stadion Luzhniki,9598,Croatia,2,2413,England,1,ExtraTime
2058016,0,2018-07-14 14:00:00,Stadion Krestovskyi,5629,Belgium,2,2413,England,0,Regular


- 경기별로 선수 기록 집계 함수 호출

In [24]:
stats_list = []

for match_id in tqdm(match_df.index):
    match_player_stats = generate_player_stats(match_id)
    stats_list.append(match_player_stats)

player_stats = pd.concat(stats_list, ignore_index=True)
player_stats

  0%|          | 0/64 [00:00<?, ?it/s]

100%|██████████| 64/64 [00:02<00:00, 26.55it/s]


Unnamed: 0,match_id,team_id,team_name,player_id,player_name,playing_time,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes,pass_accuracy
0,2057954,14358,Russia,4513,D. Cheryshev,4362.5,2,0,0,3,3,0,2,1,4,0,0,0,22,12,0.55
1,2057954,14358,Russia,41123,Mário Fernandes,5802.5,0,0,0,0,0,0,0,0,0,1,0,0,27,22,0.81
2,2057954,14358,Russia,101576,I. Akinfeev,5802.5,0,0,0,0,0,0,0,0,0,0,0,0,18,16,0.89
3,2057954,14358,Russia,101583,S. Ignashevich,5802.5,0,0,0,0,0,0,0,0,0,0,0,0,30,27,0.90
4,2057954,14358,Russia,101590,A. Dzagoev,1440.0,0,0,0,0,0,0,0,0,0,0,0,0,8,7,0.88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1777,2058017,9598,Croatia,69616,A. Rebić,4405.9,0,0,0,3,1,1,2,0,1,0,0,0,14,8,0.57
1778,2058017,9598,Croatia,69968,M. Brozović,5908.0,0,0,0,0,0,0,0,0,2,0,0,0,93,82,0.88
1779,2058017,9598,Croatia,105361,I. Strinić,5005.9,0,0,0,0,0,0,0,0,1,0,0,0,25,17,0.68
1780,2058017,9598,Croatia,135747,D. Subašić,5908.0,0,0,0,0,0,0,0,0,0,0,0,0,13,13,1.00


##### (3) 대회 전체 선수 기록 누적 집계

In [25]:
grouped = player_stats.groupby(['team_id', 'team_name', 'player_id', 'player_name'])

player_stats_accum = grouped[player_stats.columns[5:-1]].sum()
player_stats_accum['pass_accuracy'] = (player_stats_accum['acc_passes'] / player_stats_accum['total_passes']).round(2)
player_stats_accum['matches'] = grouped['match_id'].count()

player_stats_accum = player_stats_accum[['matches'] + player_stats.columns[5:-1].tolist()].reset_index()
player_stats_accum

Unnamed: 0,team_id,team_name,player_id,player_name,matches,playing_time,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes
0,1598,Spain,3269,Jordi Alba,4,25183.1,0,0,0,2,1,1,1,0,2,1,0,0,380,351
1,1598,Spain,3304,Nacho,2,10021.9,1,0,0,1,1,1,0,0,3,0,0,0,92,86
2,1598,Spain,3306,Sergio Ramos,4,25183.1,0,0,0,4,0,3,0,1,4,0,0,0,488,468
3,1598,Spain,3341,Piqué,4,25183.1,0,0,0,7,2,2,1,4,3,0,1,0,342,324
4,1598,Spain,3345,Thiago Alcântara,2,6065.9,0,0,0,2,0,1,1,0,1,0,0,0,113,109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597,19314,Senegal,133897,K. N'Diaye,3,17284.2,0,0,0,0,0,0,0,0,0,0,0,0,52,34
598,19314,Senegal,207164,B. N'Diaye,1,4857.4,0,0,0,1,0,1,0,0,1,0,0,0,23,15
599,19314,Senegal,256480,B. Keita,1,4922.1,0,0,0,1,1,1,0,0,1,0,0,0,13,12
600,19314,Senegal,375348,M. Wagué,3,12722.1,1,0,0,1,1,1,0,0,1,0,0,0,78,63


##### (4) 분야별 Top 10 추출

- 득점 순위 Top 10

In [26]:
player_stats_accum.sort_values('goals', ascending=False, ignore_index=True)[:10]

Unnamed: 0,team_id,team_name,player_id,player_name,matches,playing_time,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes
0,2413,England,8717,H. Kane,6,37123.8,6,0,0,14,6,11,0,3,4,4,0,0,119,96
1,4418,France,3682,A. Griezmann,7,35630.8,4,2,0,19,10,6,13,0,7,2,1,0,233,183
2,9905,Portugal,3322,Cristiano Ronaldo,4,23556.8,4,0,0,21,7,19,1,1,4,1,2,0,102,85
3,14358,Russia,4513,D. Cheryshev,5,19009.8,4,0,1,10,6,3,6,1,7,0,0,0,87,53
4,5629,Belgium,7905,R. Lukaku,6,29859.1,4,1,0,13,5,3,5,5,10,2,0,0,87,68
5,4418,France,353833,K. Mbappé,7,33782.3,4,0,0,8,7,6,1,1,3,1,2,0,161,123
6,9598,Croatia,14943,M. Mandžukić,6,38647.8,3,1,1,12,5,6,2,4,13,3,2,0,156,112
7,1598,Spain,4338,Diego Costa,4,19467.0,3,0,0,8,5,4,2,2,3,1,0,0,61,54
8,14358,Russia,101669,A. Dzyuba,5,20801.0,3,0,0,8,4,3,1,4,14,3,0,0,67,54
9,9598,Croatia,14812,I. Perišić,7,40194.9,3,1,0,23,4,9,10,4,5,3,0,0,185,122


- 득점 1위 해리 케인 경기별 기록

In [27]:
player_stats[player_stats['player_name'] == 'H. Kane']

Unnamed: 0,match_id,team_id,team_name,player_id,player_name,playing_time,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes,pass_accuracy
369,2057991,2413,England,8717,H. Kane,5821.9,2,0,0,3,2,2,0,1,1,1,0,0,18,13,0.72
814,2057993,2413,England,8717,H. Kane,3902.3,3,0,0,3,3,3,0,0,0,0,0,0,12,10,0.83
1534,2058009,2413,England,8717,H. Kane,7996.6,1,0,0,3,1,2,0,1,1,1,0,0,32,23,0.72
1619,2058013,2413,England,8717,H. Kane,5776.5,0,0,0,1,0,1,0,0,1,0,0,0,16,14,0.88
1703,2058015,2413,England,8717,H. Kane,7937.1,0,0,0,2,0,1,0,1,1,2,0,0,22,18,0.82
1732,2058016,2413,England,8717,H. Kane,5689.4,0,0,0,2,0,2,0,0,0,0,0,0,19,18,0.95


- 득점 순위 Top 10 - 동률시 출전 시간이 적을수록 우위

In [28]:
player_stats_accum.sort_values(['goals', 'playing_time'], ascending=[False, True], ignore_index=True)[:10]

Unnamed: 0,team_id,team_name,player_id,player_name,matches,playing_time,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes
0,2413,England,8717,H. Kane,6,37123.8,6,0,0,14,6,11,0,3,4,4,0,0,119,96
1,14358,Russia,4513,D. Cheryshev,5,19009.8,4,0,1,10,6,3,6,1,7,0,0,0,87,53
2,9905,Portugal,3322,Cristiano Ronaldo,4,23556.8,4,0,0,21,7,19,1,1,4,1,2,0,102,85
3,5629,Belgium,7905,R. Lukaku,6,29859.1,4,1,0,13,5,3,5,5,10,2,0,0,87,68
4,4418,France,353833,K. Mbappé,7,33782.3,4,0,0,8,7,6,1,1,3,1,2,0,161,123
5,4418,France,3682,A. Griezmann,7,35630.8,4,2,0,19,10,6,13,0,7,2,1,0,233,183
6,1598,Spain,4338,Diego Costa,4,19467.0,3,0,0,8,5,4,2,2,3,1,0,0,61,54
7,12430,Colombia,256634,Y. Mina,3,19775.6,3,0,0,3,3,0,0,3,6,0,0,0,157,143
8,14358,Russia,101669,A. Dzyuba,5,20801.0,3,0,0,8,4,3,1,4,14,3,0,0,67,54
9,15670,Uruguay,21174,E. Cavani,4,21765.4,3,0,0,16,6,9,5,2,4,0,0,0,81,67


- 패스 횟수 Top 10

In [29]:
player_stats_accum.sort_values('total_passes', ascending=False, ignore_index=True)[:10]

Unnamed: 0,team_id,team_name,player_id,player_name,matches,playing_time,goals,assists,own_goals,total_shots,shots_on_target,rfoot_shots,lfoot_shots,header_shots,fouls,offsides,yellow_cards,red_cards,total_passes,acc_passes
0,9598,Croatia,8287,L. Modrić,7,44436.9,2,1,0,10,3,9,1,0,13,0,0,0,489,420
1,1598,Spain,3306,Sergio Ramos,4,25183.1,0,0,0,4,0,3,0,1,4,0,0,0,488,468
2,2413,England,9380,J. Stones,7,41772.2,2,0,0,4,4,0,0,4,4,0,1,0,472,447
3,1598,Spain,3563,Isco,4,25183.1,1,0,0,9,3,7,1,1,1,3,0,0,454,408
4,5629,Belgium,36,T. Alderweireld,6,34540.2,0,1,0,5,1,4,1,0,3,0,2,0,448,411
5,9598,Croatia,3476,I. Rakitić,7,41633.0,1,0,0,16,5,11,4,1,9,1,1,0,416,350
6,2413,England,8653,H. Maguire,7,41950.5,1,1,0,10,3,0,1,9,9,0,2,0,408,379
7,1598,Spain,3269,Jordi Alba,4,25183.1,0,0,0,2,1,1,1,0,2,1,0,0,380,351
8,2413,England,8277,K. Walker,5,32029.9,0,0,0,0,0,0,0,0,3,0,2,0,375,341
9,9598,Croatia,25393,D. Lovren,7,42413.0,0,0,0,3,0,2,0,1,7,0,1,0,367,322


- 90분당 패스 횟수 Top 10

In [30]:
valid_stats = player_stats_accum[player_stats_accum['playing_time'] >= 5400].copy()
valid_stats['passes_per_90min'] = (valid_stats['total_passes'] / valid_stats['playing_time'] * 5400).round(1)

cols = valid_stats.columns[:6].tolist() + ['passes_per_90min']
valid_stats.sort_values('passes_per_90min', ascending=False, ignore_index=True)[cols][:10]

Unnamed: 0,team_id,team_name,player_id,player_name,matches,playing_time,passes_per_90min
0,1598,Spain,3306,Sergio Ramos,4,25183.1,104.6
1,1598,Spain,3345,Thiago Alcântara,2,6065.9,100.6
2,1598,Spain,3443,Koke,3,14884.4,98.7
3,1598,Spain,3563,Isco,4,25183.1,97.4
4,3148,Germany,14723,T. Kroos,3,17733.9,95.6
5,4418,France,26196,D. Sidibé,1,5692.6,91.1
6,16521,Saudi Arabia,139393,Abdullah Otayf,3,15732.8,90.3
7,3148,Germany,14716,J. Boateng,2,11593.9,83.8
8,1598,Spain,3353,Iniesta,4,18097.7,83.2
9,1598,Spain,3269,Jordi Alba,4,25183.1,81.5
