### 강의에서 소개된 파이썬 주요 기능
- numpy.ndarray.flatten: https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flatten.html
- matplotlib.pyplot.text: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.text.html

### 위치 추적 데이터 준비

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from src.plot_utils import draw_pitch

##### (1) 홈팀 데이터 불러오기 

In [None]:
match_id = 1
home_file = f'data_metrica/Sample_Game_{match_id}/Sample_Game_{match_id}_RawTrackingData_Home_Team.csv'
home_traces = pd.read_csv(home_file, header=[0, 1, 2])
home_traces

##### (2) 홈팀 데이터 열 이름 정리

In [None]:
players = [c[2] for c in home_traces.columns[3:-2:2]]
trace_cols = np.array([[f'H{int(p[6:]):02d}_x', f'H{int(p[6:]):02d}_y'] for p in players])
trace_cols

In [None]:
trace_cols.flatten()

In [None]:
trace_cols = np.array([[f'H{int(p[6:]):02d}_x', f'H{int(p[6:]):02d}_y'] for p in players]).flatten().tolist()
home_traces.columns = ['period', 'frame', 'time'] + trace_cols + ['ball_x', 'ball_y']

home_traces = home_traces.set_index('frame').astype(float)
home_traces['period'] = home_traces['period'].astype(int)
home_traces

##### (3) 원정팀 데이터 불러오기 및 열 이름 정리

In [None]:
away_file = f'data_metrica/Sample_Game_{match_id}/Sample_Game_{match_id}_RawTrackingData_Away_Team.csv'
away_traces = pd.read_csv(away_file, header=[0, 1, 2])

players = [c[2] for c in away_traces.columns[3:-2:2]]
trace_cols = np.array([[f'A{int(p[6:]):02d}_x', f'A{int(p[6:]):02d}_y'] for p in players]).flatten().tolist()
away_traces.columns = ['period', 'frame', 'time'] + trace_cols + ['ball_x', 'ball_y']

away_traces = away_traces.set_index('frame').astype(float)
away_traces['period'] = away_traces['period'].astype(int)
away_traces

##### (4) 양팀 데이터 결합

In [None]:
cols = home_traces.columns[:-2].tolist() + away_traces.columns[2:].tolist()
traces = pd.merge(home_traces, away_traces)[cols]
traces.index = home_traces.index.astype(int)

x_cols = [c for c in traces.columns if c.endswith('_x')]
y_cols = [c for c in traces.columns if c.endswith('_y')]
traces.loc[traces['period'] == 2, x_cols + y_cols] = 1 - traces.loc[traces['period'] == 2, x_cols + y_cols]
traces[x_cols] *= 104
traces[y_cols] *= 68

traces

### 이벤트 데이터 결합

##### (1) 이벤트 데이터 불러오기

In [None]:
event_file = f'data_metrica/Sample_Game_{match_id}/Sample_Game_{match_id}_RawEventsData.csv'
events = pd.read_csv(event_file, header=0)
events

##### (2) Subtype NaN 값 채우기

In [None]:
events[['Type', 'Subtype']].drop_duplicates().sort_values(['Type', 'Subtype'], ignore_index=True)[-40:]

In [None]:
events.loc[events['Subtype'].isna(), 'Subtype'] = events.loc[events['Subtype'].isna(), 'Type']

##### (3) 위치 추적 및 이벤트 데이터 결합

In [None]:
for i, event in events.iterrows():
    start_frame = event['Start Frame']
    end_frame = event['End Frame']
    traces.loc[start_frame:end_frame-1, 'event_player'] = event['From']
    traces.loc[start_frame:end_frame-1, 'event_type'] = event['Type']
    traces.loc[start_frame:end_frame-1, 'event_subtype'] = event['Subtype']

traces

##### (4) 결합 데이터 시점별 시각화

In [None]:
frame = 1000
data = traces.loc[frame]

fig, ax = draw_pitch(pitch='white', line='black')

for team, color in zip(['H', 'A'], ['r', 'b']):
    x_cols = [c for c in data.keys() if c.startswith(team) and c.endswith('_x')]
    y_cols = [c for c in data.keys() if c.startswith(team) and c.endswith('_y')]
    ax.scatter(data[x_cols], data[y_cols], s=100, c=color, alpha=0.7)
    
    for x, y in zip(x_cols, y_cols):
        if not (np.isnan(data[x]) or np.isnan(data[y])):
            ax.text(data[x] + 0.5, data[y] + 0.5, int(x[1:3]), fontsize=13, color=color)

ax.scatter(data['ball_x'], data['ball_y'], s=80, color='w', edgecolors='k')

time_text = f"{int(data['time'] // 60):02d}:{data['time'] % 60:05.2f}"
if not pd.isnull(data['event_subtype']):
    event_text = f"{data['event_subtype']} by {data['event_player']}" 
else:
    event_text = ''
ax.text(51, 67, time_text, fontsize=15, ha='right', va='top')
ax.text(53, 67, event_text, fontsize=15, ha='left', va='top')
    
plt.show()

##### (5) 결합 데이터 저장

In [None]:
traces.to_csv('data_metrica/Sample_Game_1/Sample_Game_1_IntegratedData.csv')