### 강의에서 사용된 파이썬 주요 기능

- 데이터 집계 기초
  - pandas.Series.value_counts: https://pandas.pydata.org/docs/reference/api/pandas.Series.value_counts.html
  - pandas.DataFrame.groupby: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html
  - pandas.DataFrame.pivot_table: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.pivot_table.html

- 이벤트 성공 여부 집계
  - pandas.DataFrame.apply: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html
  - Lambda functions: https://wikidocs.net/22804

### 경기 데이터 불러오기

In [None]:
import pandas as pd

In [None]:
match_id = 2058017
match_events = pd.read_pickle(f'data/refined_events/World_Cup/{match_id}.pkl')
match_events

### 데이터 집계 기초

##### (1) Series.value_counts 함수를 활용한 항목별 횟수 집계

- 팀별 슈팅 횟수

In [None]:
shot_records = match_events[
    (match_events['event_type'] == 'Shot') |
    (match_events['sub_event_type'].isin(['Free kick shot', 'Penalty']))
]
shot_records['team_name'].value_counts()

- 선수별 패스 횟수

In [None]:
pass_records = match_events[
    (match_events['event_type'] == 'Pass') |
    (match_events['sub_event_type'].isin(['Free kick', 'Free kick cross', 'corner']))
]
pass_records['player_name'].value_counts()

##### (2) DataFrame.groupby 함수를 활용한 항목별 연산

- 전후반 경기 시간

In [None]:
match_events.groupby('period')['time'].max()

- 팀별 패스 발생 및 종료 위치

In [None]:
pass_records.groupby('team_name')[['start_x', 'start_y', 'end_x', 'end_y']].mean()

- 각 이벤트 유형의 팀별 발생 횟수

In [None]:
match_events.groupby(['team_name', 'event_type'])['event_id'].count()

##### (3) pandas.pivot_table 함수를 활용한 다차원 집계

- 각 이벤트 유형의 팀별 발생 횟수

In [None]:
match_events.pivot_table(values='event_id', index='event_type', columns='team_name', aggfunc='count')

In [None]:
counts = match_events.pivot_table('event_id', 'event_type', 'team_name', 'count', fill_value=0)
counts.sort_values('France', ascending=False)

### 이벤트 성공 여부 집계

In [None]:
pass_records = match_events[
    (match_events['event_type'] == 'Pass') |
    (match_events['sub_event_type'].isin(['Free kick', 'Free kick cross', 'corner']))
]
pass_records.head()

##### (1) 반복문 기반 성공 여부 판단

In [None]:
success_idx = []
for i in pass_records.index:
    tags = pass_records.at[i, 'tags']
    if 'Accurate' in tags:
        success_idx.append(i)

acc_pass_records = pass_records.loc[success_idx]
acc_pass_records

##### (2) DataFrame.apply 함수 기반 성공 여부 판단

In [None]:
def is_accurate(tags):
    return 'Accurate' in tags

In [None]:
pass_records['tags'].apply(is_accurate)

In [None]:
acc_pass_records = pass_records[pass_records['tags'].apply(is_accurate)]
acc_pass_records

##### (3) 람다 표현식(lambda expression) 기반 성공 여부 판단

In [None]:
acc_pass_records = pass_records[pass_records['tags'].apply(lambda x: 'Accurate' in x)]
acc_pass_records

### 경기 통계 정리

##### (1) 패스 성공률 산출

In [None]:
total_pass_counts = pass_records['team_name'].value_counts().rename('total_passes')
acc_pass_counts = acc_pass_records['team_name'].value_counts().rename('acc_passes')
pass_counts = pd.concat([total_pass_counts, acc_pass_counts], axis=1)
pass_counts

In [None]:
pass_counts['pass_accuracy'] = pass_counts['acc_passes'] / pass_counts['total_passes']
pass_counts

##### (2) 유효 슈팅 횟수 집계

In [None]:
shot_records = match_events[
    (match_events['event_type'] == 'Shot') |
    (match_events['sub_event_type'].isin(['Free kick shot', 'Penalty']))
]
acc_shot_records = shot_records[shot_records['tags'].apply(lambda x: 'Accurate' in x)]

total_shot_counts = shot_records['team_name'].value_counts().rename('total_shots')
acc_shot_counts = acc_shot_records['team_name'].value_counts().rename('shots_on_target')
shot_counts = pd.concat([total_shot_counts, acc_shot_counts], axis=1)
shot_counts

##### (3) 득점 집계

- 득점 기록 필터링

In [None]:
match_events[match_events['tags'].apply(lambda x: 'Goal' in x)]

- 자책골 기록 필터링

In [None]:
match_events[match_events['tags'].apply(lambda x: 'Own goal' in x)]

- 자책골 포함 양팀 득점 집계

In [None]:
team_names = match_events['team_name'].unique()
goals = dict(zip(match_events['team_name'].unique(), [0, 0]))
goals

In [None]:
goal_records = match_events[match_events['tags'].apply(lambda x: 'Goal' in x)]
for i, event in goal_records.iterrows():
    goals[event['team_name']] += 1

own_goal_records = match_events[match_events['tags'].apply(lambda x: 'Own goal' in x)]
for i, event in own_goal_records.iterrows():
    opponent_name = [team for team in team_names if team != event['team_name']][0]
    goals[opponent_name] += 1

goals

##### (4) 경기 통계 정리

In [None]:
counts

In [None]:
foul_counts = counts.T[['Foul', 'Offside']]
foul_counts.columns = ['fouls', 'offsides']
foul_counts

In [None]:
match_stats = pd.concat([pass_counts, shot_counts, foul_counts], axis=1)
match_stats

In [None]:
pd.Series(goals)

In [None]:
match_stats['goals'] = pd.Series(goals)
match_stats = match_stats[[
    'goals', 'total_shots', 'shots_on_target',
    'fouls', 'offsides',
    'total_passes', 'acc_passes', 'pass_accuracy'
]]
match_stats