### 데이터 준비

In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from src.plot_utils import draw_pitch

##### (1) 가공 데이터 불러오기

In [None]:
match_id = 1
file = f'data_metrica/Sample_Game_{match_id}/Sample_Game_{match_id}_IntegratedData.csv'
traces = pd.read_csv(file, header=0, index_col=0)
traces

##### (2) 가공 데이터 형태 변환

In [None]:
players = [c[:-2] for c in traces.columns if c.endswith('_x') and not c.startswith('ball')]
player_cols = ['period', 'time', 'x', 'y', 'vx', 'vy', 'speed']
trace_list = []

for p in players:
    player_trace = traces[['period', 'time', f'{p}_x', f'{p}_y', f'{p}_vx', f'{p}_vy', f'{p}_speed']].copy()
    player_trace['team'] = 'Home' if p[0] == 'H' else 'Away'
    player_trace['player_id'] = int(p[1:])
    player_trace.columns = player_cols + ['team', 'player_id']
    trace_list.append(player_trace)

traces = pd.concat(trace_list).reset_index()[['team', 'player_id', 'frame'] + player_cols]
traces

In [None]:
traces['distance'] = traces['speed'] * 0.04
traces['speed'] = traces['speed'] * 3.6
traces.to_csv('data_metrica/Sample_Game_1/Sample_Game_1_IntegratedData_Reshaped.csv')
traces[34700:34720]

### 활동량 지표 집계

##### (1) 뛴 거리

In [None]:
distances = traces.groupby('player_id')['distance'].sum()
distances

##### (2) 출전 시간

In [None]:
durations = traces[['player_id', 'x']].dropna().groupby('player_id').count() * 0.04
durations.columns = ['duration']
durations

##### (3) 1분당 뛴 거리

In [None]:
stats = pd.concat([durations, distances], axis=1)
stats['dist_1min'] = stats['distance'] / stats['duration'] * 60
stats

##### (4) 속도 구간별 뛴 거리

In [None]:
speed_bins = [0, 7, 15, 20, 25, 50]
speed_cats = pd.cut(traces['speed'], speed_bins, right=False, labels=np.arange(1, 6))
distances_by_speed = traces.pivot_table('distance', index='player_id', columns=speed_cats, aggfunc='sum')
distances_by_speed

##### (5) 추가 지표 산출 및 연결

In [None]:
distances_by_speed.columns = [f'zone{i}_dist' for i in distances_by_speed.columns]
stats = pd.concat([stats, distances_by_speed], axis=1)

grouped = traces.groupby('player_id')
stats['max_speed'] = grouped['speed'].max()
stats['mean_x'] = grouped['x'].mean()
stats['mean_y'] = grouped['y'].mean()
stats['team'] = grouped['team'].first()
stats = stats.reset_index().set_index(['team', 'player_id']).round(2)
stats

### 활동량 지표 시각화

##### (1) 선수별 지표값 막대그래프 시각화

In [None]:
col_name = 'distance'

plt.figure(figsize=(15, 8))
plt.rcParams.update({'font.size': 15})

for team, color in zip(['Home', 'Away'], ['r', 'b']):
    team_stats = stats.loc[team]
    plt.bar(team_stats.index, team_stats[col_name], color=color, label=team)

plt.grid(axis='y', color='k', linestyle='--')
plt.legend()
plt.xticks(stats.reset_index()['player_id'])
plt.xlabel('player_id')
plt.ylabel(col_name)
plt.show()

##### (2) 속도 구간별 뛴 거리 막대그래프 시각화

In [None]:
plt.get_cmap('jet')

In [None]:
plt.figure(figsize=(15, 8))
plt.title('Distance by Speed Zone')

player_ids = stats.reset_index()['player_id']
n_zones = len(distances_by_speed.columns)
colors = plt.cm.jet(np.linspace(0.9, 0.1, n_zones))

bottom = 0
for i, zone_dist in enumerate(distances_by_speed.columns[::-1]):
    plt.bar(player_ids, stats[zone_dist], bottom=bottom, color=colors[i], label=f'Zone {5-i}')
    if i < n_zones - 1:
        bottom = bottom + stats[zone_dist]

plt.grid(axis='y', color='k', linestyle='--')
plt.axvline(14.5, color='k', linestyle='--')

plt.xticks(stats.reset_index()['player_id'])
plt.ylim(0, 12000)
plt.xlabel('player_id')
plt.ylabel('distance')

# plt.legend()
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1])

plt.show()

##### (3) 선수별 평균 위치 및 지표값 산점도 시각화

In [None]:
col_name = 'dist_1min'

values_sorted = stats[col_name].sort_values()
min_size = 600
max_size = 1200
min_values = values_sorted.iloc[8]
max_values = values_sorted.iloc[-1]
sizes = min_size + (stats[col_name] - min_values) / (max_values - min_values) * (max_size - min_size)

draw_pitch('white', 'black', size_x=15, size_y=10)

for team, color in zip(['Home', 'Away'], ['r', 'b']):
    team_stats = stats.loc[team]
    x = team_stats['mean_x']
    y = team_stats['mean_y']
    plt.scatter(x, y, c=color, s=sizes[team].clip(0))
    for p in team_stats.index:
        plt.text(x[p], y[p], p, color='w', ha='center', va='center', fontsize=15)

plt.show()