## Metrica Data Processing

In [1]:
import os

wd = os.path.normpath(os.getcwd() + '/..')
os.chdir(wd)
os.getcwd()

'/workspace/ballradar'

In [2]:
%load_ext autoreload
%autoreload 2

import json
import xml.etree.ElementTree as ET

import numpy as np
import pandas as pd
import torch
from matplotlib import animation
from tqdm import tqdm

from dataset import SoccerDataset
from datatools.metrica_helper import MetricaHelper
from datatools.trace_helper import TraceHelper
from models import load_model

### Parsing Metrica Sample Game 3 Data

In [2]:
tree = ET.parse("data/metrica_traces/Sample_Game_3/Sample_Game_3_metadata.xml")
root = tree.getroot()
root[0].tag, root[1].tag

('Metadata', 'DataFormatSpecifications')

In [3]:
player_records = []

for player in root.iter("Player"):
    team_code = player.get("teamId")[-1]
    squad_num = int(player.findtext("ShirtNumber"))
    player_code = f"{team_code}{squad_num:02d}"

    for param in player.iter("ProviderParameter"):
        if param.findtext("Name") == "position_type":
            position = param.findtext("Value")

    player_records.append([squad_num, player_code, position])

player_records = pd.DataFrame(player_records, columns=["squad_num", "code", "position"]).set_index("squad_num")
player_records

Unnamed: 0_level_0,code,position
squad_num,Unnamed: 1_level_1,Unnamed: 2_level_1
11,A11,Goalkeeper
1,A01,Right Back
2,A02,Right Center Back (4)
3,A03,Left Center Back (4)
4,A04,Left Back
5,A05,"Attacking Right Midfielder (4,5)"
6,A06,Defensive Right Center Midfielder (5)
7,A07,Attacking Center Midfielder
8,A08,Defensive Left Center Midfielder (5)
9,A09,"Attacking Left Midfielder (4,5)"


In [4]:
phase_records = []

for i, data_spec in enumerate(root[1]):
    start_frame = int(data_spec.get("startFrame"))
    end_frame = int(data_spec.get("endFrame"))
    session = 1 if i == 0 else 2

    player_codes = []
    gk_codes = []

    for player_xy in data_spec[1]:
        squad_num = int(player_xy[0].get("playerChannelId")[6:-2])
        player_code = player_records.at[squad_num, "code"]
        player_codes.append(player_code)

        position = player_records.at[squad_num, "position"]
        if position == "Goalkeeper":
            gk_codes.append(player_code)
    
    player_codes = player_codes[10:11] + player_codes[:10] + player_codes[-1:] + player_codes[11:-1]
    phase_records.append([i + 1, session, start_frame, end_frame, player_codes, gk_codes])

header = ["phase", "session", "start_frame", "end_frame", "player_codes", "gk_codes"]
phase_records = pd.DataFrame(phase_records, columns=header).set_index("phase")
phase_records

Unnamed: 0_level_0,session,start_frame,end_frame,player_codes,gk_codes
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,1,69661,"[A11, A01, A02, A03, A04, A05, A06, A07, A08, ...","[A11, B28]"
2,2,69662,89697,"[A11, A01, A02, A12, A04, A05, A06, A07, A08, ...","[A11, B28]"
3,2,89698,93452,"[A11, A01, A02, A12, A04, A05, A06, A07, A08, ...","[A11, B28]"
4,2,93453,93835,"[A11, A01, A02, A12, A04, A05, A06, A07, A08, ...","[A11, B28]"
5,2,93836,94657,"[A11, A01, A02, A12, A04, A05, A06, A07, A08, ...","[A11, B28]"
6,2,94658,98472,"[A11, A01, A02, A12, A04, A05, A06, A07, A08, ...","[A11, B28]"
7,2,98473,102811,"[A11, A01, A02, A12, A04, A15, A06, A07, A08, ...","[A11, B28]"
8,2,102812,110298,"[A11, A01, A02, A12, A04, A15, A06, A07, A16, ...","[A11, B28]"
9,2,110299,120212,"[A11, A01, A02, A12, A04, A15, A06, A07, A16, ...",[A11]
10,2,120213,129831,"[A11, A01, A02, A12, A04, A15, A06, A07, A16, ...",[A11]


In [16]:
time_cols = ["frame", "session", "time"]
xy_cols = np.array([[f"{p}_x", f"{p}_y"] for p in player_records["code"].tolist() + ["ball"]]).flatten().tolist()

traces_txt = pd.read_csv("data/metrica_traces/Sample_Game_3/Sample_Game_3_tracking.txt", sep=";", header=None)
traces = pd.DataFrame(index=traces_txt.index, columns=time_cols + xy_cols)

for phase in tqdm(phase_records.index):
    i0 = phase_records.at[phase, "start_frame"] - 1
    i1 = phase_records.at[phase, "end_frame"] - 1
    player_codes = phase_records.at[phase, "player_codes"]

    phase_traces = traces_txt.loc[i0:i1]
    phase_traces.columns = player_codes
    leftmost = phase_traces[player_codes[0]].str.split(":", expand=True)
    leftmost.columns = ["frame", player_codes[0]]
    rightmost = phase_traces[player_codes[-1]].str.split(":", expand=True)
    rightmost.columns = [player_codes[-1], "ball"]
    phase_traces = pd.concat([leftmost, phase_traces[player_codes[1:-1]], rightmost], axis=1)

    traces.loc[phase_traces.index, "frame"] = phase_traces["frame"].astype(int)
    traces.loc[phase_traces.index, "session"] = phase_records.at[phase, "session"]

    for p in phase_traces.columns[1:]:
        xy = phase_traces[p].str.split(",", expand=True).astype(float).values
        traces.loc[phase_traces.index, [f"{p}_x", f"{p}_y"]] = xy

traces["time"] = (traces["frame"] * 0.04).astype(float).round(2)
traces

100%|██████████| 11/11 [00:09<00:00,  1.13it/s]


Unnamed: 0,frame,session,time,A11_x,A11_y,A01_x,A01_y,A02_x,A02_y,A03_x,...,B32_x,B32_y,B33_x,B33_y,B34_x,B34_y,B35_x,B35_y,ball_x,ball_y
0,1,1,0.04,0.84722,0.52855,0.65268,0.24792,0.66525,0.46562,0.68103,...,,,,,,,,,,
1,2,1,0.08,0.84722,0.52855,0.65231,0.24513,0.66482,0.46548,0.68095,...,,,,,,,,,,
2,3,1,0.12,0.84722,0.52855,0.65197,0.24387,0.66467,0.46537,0.68078,...,,,,,,,,,,
3,4,1,0.16,0.84722,0.52855,0.65166,0.24288,0.6646,0.46488,0.68063,...,,,,,,,,,,
4,5,1,0.20,0.84722,0.52855,0.65141,0.24251,0.66452,0.46469,0.68052,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143756,143757,2,5750.28,0.11993,0.51783,0.47808,0.45408,,,,...,0.80792,0.27106,0.73396,0.8533,0.90315,0.5375,0.50199,0.55081,,
143757,143758,2,5750.32,0.11993,0.51783,0.47786,0.45521,,,,...,0.80712,0.27184,0.73251,0.85289,0.90301,0.53788,0.50164,0.55178,,
143758,143759,2,5750.36,0.11993,0.51783,0.47743,0.45709,,,,...,0.80582,0.27242,0.73086,0.85218,0.90264,0.53799,0.50099,0.55329,,
143759,143760,2,5750.40,0.11993,0.51783,0.47669,0.45947,,,,...,0.80444,0.2726,0.72892,0.85192,0.90204,0.53782,0.50003,0.55502,,


In [53]:
traces.to_csv(f"data/metrica_traces/Sample_Game_3/Sample_Game_3_RawTrackingData.csv", index=False)

### Processing Metrica Data

In [2]:
match_id = 2

event_file = f"data/metrica_traces/Sample_Game_{match_id}/Sample_Game_{match_id}_RawEventsData.csv"
events = pd.read_csv(event_file)

if match_id <= 2:
    team1_file = f"data/metrica_traces/Sample_Game_{match_id}/Sample_Game_{match_id}_RawTrackingData_Home_Team.csv"
    team2_file = f"data/metrica_traces/Sample_Game_{match_id}/Sample_Game_{match_id}_RawTrackingData_Away_Team.csv"
    team1_traces = pd.read_csv(team1_file, header=[0, 1, 2])
    team2_traces = pd.read_csv(team2_file, header=[0, 1, 2])
    helper = MetricaHelper(team1_traces, team2_traces, events=events)
else:
    trace_file = f"data/metrica_traces/Sample_Game_{match_id}/Sample_Game_{match_id}_RawTrackingData.csv"
    traces = pd.read_csv(trace_file, index_col=0)
    helper = MetricaHelper(traces_from_txt=traces, events=events)

helper.traces

Unnamed: 0_level_0,session,time,A11_x,A11_y,A01_x,A01_y,A02_x,A02_y,A03_x,A03_y,...,B22_x,B22_y,B23_x,B23_y,B24_x,B24_y,B26_x,B26_y,ball_x,ball_y
frame,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,0.04,101.8170,36.29736,69.99696,20.59560,73.17216,30.81816,75.47256,40.03632,...,53.88012,-1.62936,54.19116,36.1044,53.47836,26.08848,,,,
2,1,0.08,101.8170,36.29736,69.99696,20.59560,73.17216,30.81816,75.47256,40.03632,...,53.88012,-1.62936,54.19116,36.1044,53.47836,26.08848,,,,
3,1,0.12,101.8170,36.29736,69.99696,20.59560,73.17216,30.81816,75.47256,40.03632,...,53.88012,-1.62936,54.19116,36.1044,53.47836,26.08848,,,,
4,1,0.16,101.8170,36.29736,69.99696,20.59560,73.17216,30.81816,75.47256,40.03632,...,53.88012,-1.62936,54.19116,36.1044,53.47836,26.08848,,,,
5,1,0.20,101.8170,36.29736,69.99696,20.59560,73.17216,30.81816,75.47256,40.03632,...,53.88012,-1.62936,54.19116,36.1044,53.47836,26.08848,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141152,2,5646.08,6.7878,36.89496,41.57892,11.05632,24.36264,29.77560,29.31336,23.19048,...,,,29.00340,44.5284,39.70080,8.99712,48.56328,29.93184,,
141153,2,5646.12,6.7878,36.89496,41.57892,11.05632,24.36264,29.77560,29.31336,23.19048,...,,,29.00340,44.5284,39.70080,8.99712,48.58164,29.91600,,
141154,2,5646.16,6.7878,36.89496,41.57892,11.05632,24.36264,29.77560,29.31336,23.19048,...,,,29.00340,44.5284,39.70080,8.99712,48.59676,29.86920,,
141155,2,5646.20,6.7878,36.89496,41.57892,11.05632,24.36264,29.77560,29.31336,23.19048,...,,,29.00340,44.5284,39.70080,8.99712,48.61836,29.89080,,


In [25]:
helper.generate_phase_records()
helper.phase_records

Unnamed: 0_level_0,session,start_time,end_time,player_codes
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,0.0,2717.6,"[A11, A01, A02, A03, A04, A05, A06, A07, A08, ..."
2,2,2717.7,3519.3,"[A11, A01, A02, A03, A04, A05, A06, A07, A08, ..."
3,2,3519.4,3828.0,"[A11, A01, A02, A03, A04, A05, A06, A07, A08, ..."
4,2,3828.1,4244.3,"[A11, A01, A02, A03, A05, A06, A07, A08, A09, ..."
5,2,4244.4,5029.4,"[A11, A01, A02, A03, A05, A06, A07, A09, A10, ..."
6,2,5029.5,5646.1,"[A11, A01, A02, A03, A05, A06, A07, A09, A12, ..."


In [26]:
helper.downsample_to_10fps()
helper.split_into_episodes()
helper.calc_running_features(remove_outliers=True, smoothing=True)
helper.estimate_team_poss()
helper.traces

Combining tracking and event data: 100%|██████████| 1935/1935 [00:00<00:00, 4236.33it/s]
Calculating running features: 100%|██████████| 26/26 [00:01<00:00, 22.02it/s]
Estimating player possessions: 100%|██████████| 1935/1935 [00:00<00:00, 4029.26it/s]


Unnamed: 0,frame,session,time,phase,episode,team_poss,player_poss,event_player,event_type,A11_x,...,B24_speed,B24_accel,B26_x,B26_y,B26_vx,B26_vy,B26_speed,B26_accel,ball_x,ball_y
0,1,1,0.1,1,0,B,,,,101.817000,...,0.0,-0.008270,,,,,,,54.07803,36.27549
1,2,1,0.2,1,1,B,,,,101.817000,...,0.0,-0.002514,,,,,,,54.07803,36.27549
2,3,1,0.3,1,1,B,,,,101.817000,...,0.0,0.002084,,,,,,,54.07803,36.27549
3,4,1,0.4,1,1,B,,,,101.817000,...,0.0,0.005524,,,,,,,54.07803,36.27549
4,5,1,0.5,1,1,B,,,,101.924136,...,0.0,0.007807,,,,,,,54.07803,36.27549
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56456,56457,2,5645.7,6,0,B,,,,6.787800,...,0.0,0.000000,48.245112,30.098016,0.928366,-0.665732,1.142394,-0.783774,44.23464,-2.80152
56457,56458,2,5645.8,6,0,B,,,,6.787800,...,0.0,0.000000,48.335940,30.048912,0.876206,-0.597517,1.060549,-0.838108,44.23464,-2.80152
56458,56459,2,5645.9,6,0,B,,,,6.787800,...,0.0,0.000000,48.415104,30.026664,0.811121,-0.508040,0.957091,-0.849351,44.23464,-2.80152
56459,56460,2,5646.0,6,0,B,,,,6.787800,...,0.0,0.000000,48.487032,29.992392,0.733112,-0.397300,0.833846,-0.817501,44.23464,-2.80152


In [27]:
helper.traces.to_csv(f"data/metrica_traces/match{match_id}.csv", index=False)
helper.events.to_csv(f"data/metrica_events/match{match_id}.csv", index=False)

### Visualization for Metrica Data

##### Animating Trajectories

In [15]:
traces = helper.traces[helper.traces["session"] == 1].reset_index(drop=True)[0:9000]
annot_cols = ["team_poss", "player_poss"]
anim = TraceHelper.plot_scene(traces, show_episodes=True, show_events=True, annot_cols=annot_cols, play_speed=1)

t0 = traces["time"].iloc[0]
t1 = traces["time"].iloc[-1]
t0_str = f"{int(t0 // 60):02d}.{int(t0 % 60):02d}"
t1_str = f"{int(t1 // 60):02d}.{int(t1 % 60):02d}"
path = f"animations/metrica_match{match_id}_{t0_str}-{t1_str}.mp4"

writer = animation.FFMpegWriter(fps=10)
anim.save(path, writer=writer)

##### Animating Feature Plots

In [None]:
session = 1
traces = helper.traces[helper.traces["session"] == session]
anim = TraceHelper.plot_speeds_and_accels(traces, helper.team1_players)
writer = animation.FFMpegWriter(fps=5)

smoothing = True
if smoothing:
    path = f"animations/feature_plots/metrica_match{match_id}_s{session}_smooth.mp4"
else:
    path = f"animations/feature_plots/metrica_match{match_id}_s{session}_noisy.mp4"
    
anim.save(path, writer=writer)

### Validation-Test Split of Metrica Sample Game 3 Data

In [5]:
traces = pd.read_csv("data/metrica_traces/match3.csv", header=0)
events = pd.read_csv("data/metrica_events/match3.csv", header=0)
events

Unnamed: 0,team,type,subtype,session,start_frame,start_time,end_frame,end_time,from,to,start_x,start_y,end_x,end_y,phase
0,Away,SET PIECE,KICK OFF,1,145,14.5,145,14.5,A10,,,,,,1
1,Away,PASS,PASS,1,145,14.5,151,15.1,A10,A07,0.50125,0.48725,0.49864,0.48705,1
2,Away,CARRY,CARRY,1,151,15.1,154,15.4,A07,,0.49864,0.48705,0.49700,0.48500,1
3,Away,PASS,PASS,1,154,15.4,171,17.1,A07,A08,0.49700,0.48500,0.63373,0.63449,1
4,Away,CARRY,CARRY,1,171,17.1,186,18.6,A08,,0.63373,0.63449,0.66986,0.59707,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3615,Home,PASS,PASS,2,57363,5736.3,57388,5738.8,B33,B20,0.73416,0.40874,0.71353,0.85950,11
3616,Home,PASS,PASS,2,57388,5738.8,57413,5741.3,B20,B28,0.71353,0.85950,0.88776,0.51189,11
3617,Home,CARRY,CARRY,2,57388,5738.8,57388,5738.8,B20,,0.71353,0.85950,0.71353,0.85950,11
3618,Home,CARRY,CARRY,2,57413,5741.3,57422,5742.2,B28,,0.88776,0.51189,0.89225,0.50456,11


In [4]:
traces_valid = traces[traces["session"] == 1]
traces_test = traces[traces["session"] == 2]
traces_valid.to_csv("data/metrica_traces/match3_valid.csv", index=False)
traces_test.to_csv("data/metrica_traces/match3_test.csv", index=False)

In [6]:
events_valid = events[events["session"] == 1]
events_test = events[events["session"] == 2]
events_valid = events_valid.to_csv("data/metrica_events/match3_valid.csv", index=False)
events_test = events_test.to_csv("data/metrica_events/match3_test.csv", index=False)

### Testing SoccerDataset-GK Based on Metrica Data

In [2]:
dir = "data/metrica_traces"
train_files = ["match1.csv", "match2.csv", "match3_valid.csv"]
test_files = ["match3_test.csv"]
train_paths = [f"{dir}/{f}" for f in train_files]
test_paths = [f"{dir}/{f}" for f in test_files]
train_paths, test_paths

(['data/metrica_traces/match1.csv',
  'data/metrica_traces/match2.csv',
  'data/metrica_traces/match3_train.csv'],
 ['data/metrica_traces/match3_test.csv'])

In [3]:
dataset = SoccerDataset(test_paths, target_type="gk", train=False, flip_pitch=True)
len(dataset)

100%|██████████| 1/1 [00:00<00:00,  1.11it/s]


18058

In [4]:
idx = 3
input_tensor, target_tensor = dataset[idx]
input_tensor.shape, target_tensor.shape

(torch.Size([100, 120]), torch.Size([100, 4]))

In [84]:
team1_cols = [f"A{i:02d}{s}" for i in np.arange(2, 12) for s in dataset.feature_types]
team2_cols = [f"B{i:02d}{s}" for i in np.arange(2, 12) for s in dataset.feature_types]
cols = team1_cols + team2_cols + ["A01_x", "A01_y", "B01_x", "B01_y"]

traces = pd.DataFrame(np.concatenate([input_tensor, target_tensor], axis=1), columns=cols)
traces["time"] = (np.arange(dataset.ws) + 1) * 0.1
traces.head()

Unnamed: 0,A02_x,A02_y,A02_vx,A02_vy,A02_speed,A02_accel,A03_x,A03_y,A03_vx,A03_vy,...,B11_y,B11_vx,B11_vy,B11_speed,B11_accel,A01_x,A01_y,B01_x,B01_y,time
0,34.246262,53.158463,0.100086,0.53176,0.541097,-0.607757,29.916107,40.836529,-0.240814,-0.018525,...,23.075279,1.847583,-2.813526,3.365931,-2.41673,10.95336,36.316078,102.429901,37.893242,0.1
1,34.252201,53.201305,0.08996,0.467949,0.476518,-0.552715,29.893751,40.833286,-0.221777,-0.052731,...,22.809383,1.673692,-2.633325,3.1202,-2.531195,10.95336,36.316078,102.4272,37.898422,0.2
2,34.254467,53.237953,0.086634,0.411714,0.42073,-0.489186,29.869452,40.813919,-0.19707,-0.077733,...,22.566816,1.485687,-2.444541,2.860603,-2.568901,10.95336,36.316078,102.4245,37.90332,0.3
3,34.259762,53.275177,0.092324,0.364347,0.375862,-0.451224,29.845367,40.794552,-0.165385,-0.08639,...,22.343256,1.288811,-2.258538,2.60039,-2.536515,10.95336,36.316078,102.421799,37.908718,0.4
4,34.270237,53.309734,0.102419,0.320903,0.336851,-0.491181,29.829708,40.786415,-0.130089,-0.082978,...,22.145687,1.089297,-2.076845,2.345177,-2.461154,10.95336,36.316078,102.41964,37.91412,0.5


In [85]:
anim = TraceHelper.plot_scene(traces)
writer = animation.FFMpegWriter(fps=10)
path = f"animations/sample_metrica_gk_{idx}.mp4"
anim.save(path, writer=writer)

### Testing SoccerDataset-GK-Macro Based on Metrica Data

In [3]:
dir = "data/metrica_traces"
train_files = ["match1.csv", "match2.csv", "match3_train.csv"]
test_files = ["match3_test.csv"]
train_paths = [f"{dir}/{f}" for f in train_files]
test_paths = [f"{dir}/{f}" for f in test_files]
train_paths, test_paths

(['data/metrica_traces/match1.csv',
  'data/metrica_traces/match2.csv',
  'data/metrica_traces/match3_train.csv'],
 ['data/metrica_traces/match3_test.csv'])

In [17]:
dataset = SoccerDataset(test_paths, target_type="gk", macro_type="team_poss", train=False, flip_pitch=True)
dataset.input_data.shape, dataset.macro_data.shape, dataset.target_data.shape

100%|██████████| 1/1 [00:00<00:00,  1.19it/s]


(torch.Size([18058, 100, 120]),
 torch.Size([18058, 100]),
 torch.Size([18058, 100, 4]))

In [18]:
idx = 3
input_tensor, macro_tensor, target_tensor = dataset[idx]
input_tensor.shape, macro_tensor.shape, target_tensor.shape

(torch.Size([100, 120]), torch.Size([100]), torch.Size([100, 4]))

### Processing GPS-Event Data from a Single Match

##### (1) Model loading

In [2]:
trial = 207
with open(f"saved/{trial:03d}/params.json", "r") as f:
    params = json.load(f)

device = "cuda:0"
model = load_model(params["model"], params).to(device)
state_dict = torch.load(
    f"saved/{trial}/model/{params['model']}_state_dict_best.pt",
    map_location=lambda storage, _: storage,
)
model.load_state_dict(state_dict)

<All keys matched successfully>

##### (2) Data preprocessing

In [9]:
match_id = "22213-22216"
traces = pd.read_csv(f"data/gps_event_traces/{match_id}.csv", header=0, encoding="utf-8-sig")
helper = GPSEventHelper(traces)
helper.split_into_episodes()
helper.calc_running_features(remove_outliers=True, smoothing=True)

Preprocessing: 100%|██████████| 27/27 [00:01<00:00, 23.32it/s]


##### (3) GK prediction and feature calculation

In [10]:
team1_gk, team2_gk = helper.predict_gk_traces(model)
helper.calc_single_player_running_features(team1_gk)
helper.calc_single_player_running_features(team2_gk)
helper.traces

Phase (1, 1) : 100%|██████████| 17/17 [00:02<00:00,  6.46it/s]
Phase (1, 2) : 100%|██████████| 30/30 [00:04<00:00,  6.15it/s]
Phase (2, 3) : 100%|██████████| 25/25 [00:04<00:00,  6.08it/s]
Phase (3, 3) : 100%|██████████| 2/2 [00:00<00:00,  2.72it/s]


Unnamed: 0,frame,session,time,phase,episode,team_poss,event_player,event_types,A02_x,A02_y,...,B99_speed,B99_accel,B01_x,B01_y,B01_vx,B01_vy,B01_speed,B01_accel,ball_x,ball_y
0,1,1,0.1,"(1, 1)",1,A,,,29.815190,-2.084963,...,0.737121,-1.582206,96.422394,34.983829,0.0,0.0,0.0,-0.020367,51.830496,35.253859
1,2,1,0.2,"(1, 1)",1,A,,,29.825521,-2.074379,...,0.576186,-0.929833,96.422394,34.983829,0.0,0.0,0.0,-0.011090,51.830496,35.253859
2,3,1,0.3,"(1, 1)",1,A,,,29.835852,-2.063795,...,0.468255,-0.385885,96.422394,34.983829,0.0,0.0,0.0,0.005711,51.830496,35.253859
3,4,1,0.4,"(1, 1)",1,A,,,29.866845,-2.042628,...,0.426626,0.049638,96.422394,34.983829,0.0,0.0,0.0,0.030036,51.830496,35.253859
4,5,1,0.5,"(1, 1)",1,A,,,29.887507,-2.021461,...,0.446587,0.376738,96.422394,34.983829,0.0,0.0,0.0,0.061884,51.830496,35.253859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57430,57431,2,2891.2,"(4, -4)",90,A,,,,,...,1.785385,-1.216739,,,,,,,81.295372,29.492550
57431,57432,2,2891.3,"(4, -4)",90,A,,,,,...,1.661598,-1.104679,,,,,,,80.011514,29.874521
57432,57433,2,2891.4,"(4, -4)",90,A,,,,,...,1.535480,-0.971611,,,,,,,78.727655,30.256491
57433,57434,2,2891.5,"(4, -4)",90,A,,,,,...,1.405504,-0.817536,,,,,,,77.443797,30.638462


##### (4) GT ball reconstruction and macro-intent estimation

In [11]:
helper.estimate_team_poss()
helper.estimate_turnover_times()
helper.estimate_ball_traces()
helper.estimate_macro_intents()
helper.traces

Unnamed: 0,frame,session,time,phase,episode,team_poss,event_player,event_types,A02_x,A02_y,...,B01_x,B01_y,B01_vx,B01_vy,B01_speed,B01_accel,ball_x,ball_y,intent_x,intent_y
0,1,1,0.1,"(1, 1)",1,A,,,29.815190,-2.084963,...,96.422394,34.983829,0.0,0.0,0.0,-0.020367,51.830496,35.253859,51.830496,35.253859
1,2,1,0.2,"(1, 1)",1,A,,,29.825521,-2.074379,...,96.422394,34.983829,0.0,0.0,0.0,-0.011090,51.830496,35.253859,51.830496,35.253859
2,3,1,0.3,"(1, 1)",1,A,,,29.835852,-2.063795,...,96.422394,34.983829,0.0,0.0,0.0,0.005711,51.830496,35.253859,51.830496,35.253859
3,4,1,0.4,"(1, 1)",1,A,,,29.866845,-2.042628,...,96.422394,34.983829,0.0,0.0,0.0,0.030036,51.830496,35.253859,51.830496,35.253859
4,5,1,0.5,"(1, 1)",1,A,,,29.887507,-2.021461,...,96.422394,34.983829,0.0,0.0,0.0,0.061884,51.830496,35.253859,51.830496,35.253859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57430,57431,2,2891.2,"(4, -4)",90,A,,,,,...,,,,,,,81.295372,29.492550,76.159939,31.020432
57431,57432,2,2891.3,"(4, -4)",90,A,,,,,...,,,,,,,80.011514,29.874521,76.159939,31.020432
57432,57433,2,2891.4,"(4, -4)",90,A,,,,,...,,,,,,,78.727655,30.256491,76.159939,31.020432
57433,57434,2,2891.5,"(4, -4)",90,A,,,,,...,,,,,,,77.443797,30.638462,76.159939,31.020432


##### (5) Visualization

In [31]:
traces = helper.traces[helper.traces["session"] == 1].reset_index(drop=True)[0:300]
anim = TraceHelper.plot_scene(traces, show_episodes=True, show_events=True, fps=10, play_speed=2)

t0 = traces["time"].iloc[0]
t1 = traces["time"].iloc[-1]
t0_str = f"{int(t0 // 60):02d}.{int(t0 % 60):02d}"
t1_str = f"{int(t1 // 60):02d}.{int(t1 % 60):02d}"
path = f"animations/gps_{match_id}_{t0_str}-{t1_str}.mp4"

writer = animation.FFMpegWriter(fps=10)
anim.save(path, writer=writer)

In [9]:
session = 1
traces = helper.traces[helper.traces["session"] == session]
anim = TraceHelper.plot_speeds_and_accels(traces, helper.team1_players)

path = f"animations/feature_plots/gps_{match_id}_s{session}.mp4"
writer = animation.FFMpegWriter(fps=5)
anim.save(path, writer=writer)

### Processing GPS-Event Data in the Entire Dataset

In [None]:
trial = 206
with open(f"saved/{trial:03d}/params.json", "r") as f:
    params = json.load(f)

device = "cuda:0"
model = load_model(params["model"], params).to(device)
state_dict = torch.load(
    f"saved/{trial}/model/{params['model']}_state_dict_best.pt",
    map_location=lambda storage, _: storage,
)
model.load_state_dict(state_dict)

<All keys matched successfully>

In [None]:
dir = "data/gps_event_traces"
match_ids = [f.split(".")[0] for f in os.listdir(dir) if f.endswith(".csv")]
match_ids.sort()

if not os.path.exists("data/gps_event_traces_gk_pred"):
    os.mkdir("data/gps_event_traces_gk_pred")

for i, match_id in enumerate(match_ids):
    print()
    print(f"[{i}] {match_id}")
    traces = pd.read_csv(f"{dir}/{match_id}.csv", header=0, encoding="utf-8-sig")

    # data preprocessing
    helper = GPSEventHelper(traces)
    helper.split_into_episodes()
    helper.calc_running_features(remove_outliers=True, smoothing=True)

    # GK prediction and feature calculation
    team1_gk, team2_gk = helper.predict_gk_traces(model)
    helper.calc_single_player_running_features(team1_gk)
    helper.calc_single_player_running_features(team2_gk)

    # GT ball reconstruction
    helper.estimate_team_poss()
    helper.estimate_turnover_times()
    helper.estimate_ball_traces()

    helper.traces.to_csv(f"data/gps_event_traces_gk_pred/{match_id}.csv", index=False)

In [20]:
dir = "data/gps_event_traces_gk_pred"
match_ids = [f.split(".")[0] for f in os.listdir(dir) if f.endswith(".csv")]
match_ids.sort()

for match_id in tqdm(match_ids):
    traces = pd.read_csv(f"{dir}/{match_id}.csv", header=0, encoding="utf-8-sig")
    helper = GPSEventHelper(traces)
    helper.estimate_ball_traces()
    helper.traces.to_csv(f"data/gps_event_traces_gk_pred/{match_id}.csv", index=False)

100%|██████████| 43/43 [07:20<00:00, 10.23s/it]


### Testing SoccerDataset-Ball Based on GPS-Event Data

In [2]:
data_paths = ["data/gps_event_traces_gk_pred/13213-13216.csv"]
dataset = SoccerDataset(data_paths[-1:], target_type="ball", train=False, flip_pitch=False)
len(dataset)

100%|██████████| 1/1 [00:01<00:00,  1.30s/it]


15436

In [3]:
input_tensor, target_tensor = dataset[3000]
input_tensor.shape, target_tensor.shape

(torch.Size([100, 132]), torch.Size([100, 2]))

In [4]:
if dataset.target_type == "gk":
    team1_cols = [f"A{i:02d}{x}" for i in np.arange(2, 12) for x in dataset.feature_types]
    team2_cols = [f"B{i:02d}{x}" for i in np.arange(2, 12) for x in dataset.feature_types]
    cols = team1_cols + team2_cols + ["A01_x", "A01_y", "B01_x", "B01_y"]
else:
    team1_cols = [f"A{i:02d}{x}" for i in np.roll(np.arange(1, 12), -1) for x in dataset.feature_types]
    team2_cols = [f"B{i:02d}{x}" for i in np.roll(np.arange(1, 12), -1) for x in dataset.feature_types]
    cols = team1_cols + team2_cols + [f"{dataset.target_type}_x", f"{dataset.target_type}_y"]
    
traces = pd.DataFrame(np.concatenate([input_tensor, target_tensor], axis=1), columns=cols)
traces["time"] = (np.arange(dataset.ws) + 1) * 0.1
traces.head()

Unnamed: 0,A02_x,A02_y,A02_vx,A02_vy,A02_speed,A02_accel,A03_x,A03_y,A03_vx,A03_vy,...,B11_accel,B01_x,B01_y,B01_vx,B01_vy,B01_speed,B01_accel,ball_x,ball_y,time
0,18.988108,26.463755,0.290285,-0.949361,0.99275,0.028325,24.115307,6.202611,0.250527,-0.691177,...,-0.308376,94.16143,33.575695,0.00044,-0.139796,0.139796,0.091413,19.676529,-2.809545,0.1
1,19.018932,26.377308,0.355911,-0.93047,0.996216,0.037953,24.135859,6.12697,0.254838,-0.674049,...,-0.314114,94.160675,33.562454,0.03226,-0.141117,0.144758,0.19756,19.676529,-2.809545,0.2
2,19.060032,26.280054,0.382975,-0.919135,0.99573,-0.00709,24.166683,6.07294,0.288609,-0.643319,...,-0.300616,94.159927,33.549213,0.072392,-0.137019,0.154968,0.317425,19.676529,-2.809545,0.3
3,19.111406,26.193607,0.392556,-0.915357,0.995981,-0.020173,24.197508,6.008104,0.372677,-0.597475,...,-0.178906,94.181511,33.531952,0.12296,-0.12886,0.178113,0.365068,19.676529,-2.809545,0.4
4,19.142233,26.096354,0.367407,-0.919135,0.989847,-0.048129,24.248882,5.954075,0.484288,-0.544075,...,-0.032469,94.203491,33.519939,0.18681,-0.119021,0.221504,0.386723,19.676529,-2.809545,0.5


In [5]:
anim = TraceHelper.plot_scene(traces)
writer = animation.FFMpegWriter(fps=10)
path = f"animations/sample_{dataset.target_type}.mp4"
anim.save(path, writer=writer)

### Testing Player Possession Estimation for GPS-Event Data

In [3]:
match_id = "22213-22216"
traces = pd.read_csv(f"data/gps_event_traces_gk_pred/{match_id}.csv", header=0, encoding="utf-8-sig")
helper = GPSEventHelper(traces)
helper.estimate_ball_traces()
helper.traces

Unnamed: 0,frame,session,time,phase,episode,team_poss,event_player,event_types,A02_x,A02_y,...,B01_x,B01_y,B01_vx,B01_vy,B01_speed,B01_accel,ball_x,ball_y,intent_x,intent_y
0,1,1,0.1,"(1, 1)",1,A,,,29.815190,-2.084963,...,96.422394,34.983829,0.0,0.0,0.0,-0.020367,51.830496,35.253859,51.830496,35.253859
1,2,1,0.2,"(1, 1)",1,A,,,29.825521,-2.074379,...,96.422394,34.983829,0.0,0.0,0.0,-0.011090,51.830496,35.253859,51.830496,35.253859
2,3,1,0.3,"(1, 1)",1,A,,,29.835852,-2.063795,...,96.422394,34.983829,0.0,0.0,0.0,0.005711,51.830496,35.253859,51.830496,35.253859
3,4,1,0.4,"(1, 1)",1,A,,,29.866845,-2.042628,...,96.422394,34.983829,0.0,0.0,0.0,0.030036,51.830496,35.253859,51.830496,35.253859
4,5,1,0.5,"(1, 1)",1,A,,,29.887507,-2.021461,...,96.422394,34.983829,0.0,0.0,0.0,0.061884,51.830496,35.253859,51.830496,35.253859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57430,57431,2,2891.2,"(4, -4)",90,A,,,,,...,,,,,,,81.295372,29.492550,76.159939,31.020432
57431,57432,2,2891.3,"(4, -4)",90,A,,,,,...,,,,,,,80.011514,29.874521,76.159939,31.020432
57432,57433,2,2891.4,"(4, -4)",90,A,,,,,...,,,,,,,78.727655,30.256491,76.159939,31.020432
57433,57434,2,2891.5,"(4, -4)",90,A,,,,,...,,,,,,,77.443797,30.638462,76.159939,31.020432


In [48]:
helper.traces[helper.traces["episode"] == 6][37:]["event_player"].fillna(method="bfill")

3427      A06
3428      A06
3429      A06
3430      A06
3431      A06
        ...  
3523    OUT-T
3524    OUT-T
3525    OUT-T
3526    OUT-T
3527    OUT-T
Name: event_player, Length: 101, dtype: object

In [5]:
match_traces = helper.traces
feature_types = ["_x", "_y", "_vx", "_vy", "_speed", "_accel"]
player_cols = [c for c in match_traces.columns if c[0] in ["A", "B"] and c[3:] in feature_types]

phase = "(1, 1)"
episode = 1

phase_traces = match_traces[match_traces["phase"] == phase]

team1_gk, team2_gk = SoccerDataset.detect_goalkeepers(phase_traces, 54)
team1_code, team2_code = team1_gk[0], team2_gk[0]

input_cols = [c for c in phase_traces[player_cols].dropna(axis=1).columns]
team1_cols = [c for c in input_cols if c.startswith(team1_code)]
team2_cols = [c for c in input_cols if c.startswith(team2_code)]

In [17]:
n_features = 6
input_players = [c[:3] for c in input_cols[::n_features]] + ["OUT-B", "OUT-T", "OUT-L", "OUT-R"]
poss_dict = dict(zip(input_players, np.arange(len(input_players))))
poss_dict["GOAL-L"] = len(input_players) - 2
poss_dict["GOAL-R"] = len(input_players) - 1

episode_traces = phase_traces[phase_traces["episode"] == episode]
player_poss = episode_traces["event_player"].fillna(method="bfill").fillna(method="ffill")
player_poss = player_poss.apply(lambda x: poss_dict[x])
player_poss

0       4
1       4
2       4
3       4
4       4
       ..
502    21
503    21
504    21
505    21
506    21
Name: event_player, Length: 507, dtype: int64

In [16]:
traces = helper.traces[helper.traces["session"] == 1].reset_index(drop=True)[0:9000]
anim = TraceHelper.plot_scene(
    traces, 
    show_episodes=True,
    show_events=True, 
    annot_cols=["team_poss", "event_player"],
    play_speed=1
)

t0 = traces["time"].iloc[0]
t1 = traces["time"].iloc[-1]
t0_str = f"{int(t0 // 60):02d}.{int(t0 % 60):02d}"
t1_str = f"{int(t1 // 60):02d}.{int(t1 % 60):02d}"
path = f"animations/gps_{match_id}_{t0_str}-{t1_str}.mp4"

writer = animation.FFMpegWriter(fps=10)
anim.save(path, writer=writer)

### Testing SoccerDataset-PlayerPoss Based on GPS-Event Data

In [27]:
data_paths = ["data/gps_event_traces_gk_pred/22213-22216.csv"]
dataset = SoccerDataset(
    data_paths[-1:],
    macro_type="player_poss", 
    target_type="ball",
    train=False,
    flip_pitch=True,
)
len(dataset)

100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


14331

In [28]:
team1_cols = [f"A{i:02d}{x}" for i in np.arange(1, 12) for x in dataset.feature_types]
team2_cols = [f"B{i:02d}{x}" for i in np.arange(1, 12) for x in dataset.feature_types]
cols = team1_cols + team2_cols

idx = 5
input_tensor, macro_tensor, target_tensor = dataset[idx]
print(input_tensor.shape, macro_tensor.shape, target_tensor.shape)
    
traces = pd.DataFrame(input_tensor[:, :len(cols)], columns=cols)
traces["player_poss"] = macro_tensor + 1
traces["transition"] = target_tensor
traces["time"] = (np.arange(dataset.ws) + 1) * 0.1
traces.head()

torch.Size([100, 156]) torch.Size([100]) torch.Size([100])


Unnamed: 0,A01_x,A01_y,A01_vx,A01_vy,A01_speed,A01_accel,A02_x,A02_y,A02_vx,A02_vy,...,B10_accel,B11_x,B11_y,B11_vx,B11_vy,B11_speed,B11_accel,player_poss,transition,time
0,37.749378,36.343967,-0.485002,-0.280502,0.560275,-0.826422,51.396595,24.373953,-0.087657,0.098435,...,-0.254012,98.74485,34.414425,-0.0,0.0,0.0,0.165618,15,1,0.1
1,37.708054,36.3228,-0.415647,-0.139387,0.438396,-0.581729,51.375931,24.384537,-0.236481,0.07031,...,-0.186984,98.74485,34.414425,0.030816,-0.032642,0.04489,0.308081,15,1,0.2
2,37.687393,36.333382,-0.283921,0.041199,0.286895,-0.216265,51.33461,24.395121,-0.349423,0.053535,...,-0.069208,98.74485,34.414425,0.013313,-0.020912,0.02479,0.392909,15,1,0.3
3,37.666729,36.354549,-0.126428,0.205504,0.24128,0.258294,51.293285,24.395121,-0.438043,0.029358,...,0.097264,98.74485,34.414425,-0.0331,0.031712,0.04584,0.392743,15,1,0.4
4,37.677063,36.396885,0.034918,0.349085,0.350827,0.767155,51.241631,24.395121,-0.445027,-0.012088,...,0.212714,98.74485,34.414425,-0.088466,0.097341,0.131535,0.303785,15,0,0.5


In [21]:
match_id = "22213-22216"
match_traces = pd.read_csv(f"data/gps_event_traces_gk_pred/{match_id}.csv", header=0, encoding="utf-8-sig")
traces[["ball_x", "ball_y"]] = match_traces[["ball_x", "ball_y"]].values[idx : idx + 100]
traces

Unnamed: 0,A01_x,A01_y,A01_vx,A01_vy,A01_speed,A01_accel,A02_x,A02_y,A02_vx,A02_vy,...,B11_y,B11_vx,B11_vy,B11_speed,B11_accel,player_poss,transition,time,ball_x,ball_y
0,29.918501,-1.989710,0.230942,0.248677,0.339373,-0.126347,16.405586,38.831104,-1.035024,-0.084619,...,34.983829,0.000000,0.000000,0.000000,0.101256,4,1,0.1,51.830496,35.253859
1,29.939161,-1.968543,0.202044,0.261752,0.330660,-0.077083,16.302277,38.831104,-1.011183,-0.031085,...,34.983829,-0.011294,-0.026448,0.028758,0.181697,4,1,0.2,51.830496,35.253859
2,29.949493,-1.936793,0.118963,0.261012,0.286844,0.091133,16.209297,38.831104,-0.969281,0.027631,...,34.983829,0.002049,-0.017294,0.017415,0.259467,4,1,0.3,51.830496,35.253859
3,29.949493,-1.915625,-0.006261,0.261505,0.261580,0.337255,16.116320,38.841686,-0.920396,0.064143,...,34.983829,0.021436,0.024873,0.032836,0.316185,4,1,0.4,51.830496,35.253859
4,29.939161,-1.883875,-0.117759,0.278281,0.302171,0.536487,16.033670,38.852272,-0.869343,0.095968,...,34.983829,0.033903,0.077662,0.084740,0.273347,4,0,0.5,51.830496,35.253859
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,32.294624,-3.323240,2.838971,-0.371782,2.863211,0.186764,13.998469,43.191532,1.283304,-0.785996,...,35.263172,0.685983,-0.084833,0.691209,1.768910,8,1,9.6,14.805761,39.943890
96,32.573559,-3.354990,2.892191,-0.315040,2.909299,-0.001661,14.112110,43.106865,1.185052,-0.904661,...,35.251503,0.917478,-0.090795,0.921959,1.646537,8,1,9.7,15.216049,38.787258
97,32.862827,-3.386741,2.878224,-0.273100,2.891151,-0.343185,14.225751,42.990444,1.076685,-1.085494,...,35.234978,1.051126,-0.170425,1.064853,1.081704,8,1,9.8,15.626336,37.630625
98,33.152096,-3.407908,2.858477,-0.226967,2.867473,-0.780317,14.318729,42.863441,0.986379,-1.266327,...,35.209831,1.075821,-0.309945,1.119579,0.445929,8,1,9.9,16.036623,36.473993


In [22]:
anim = TraceHelper.plot_scene(traces, annot_cols=["player_poss", "transition"], fps=10, play_speed=1)
path = f"animations/sample_player_poss_{idx}.mp4"
writer = animation.FFMpegWriter(fps=10)
anim.save(path, writer=writer)