In [1]:
import os
wd = os.path.normpath(os.getcwd() + '/..')
os.chdir(wd)
os.getcwd()

'/home/hyunsung/Workspace/footballcpd'

In [7]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from tqdm import tqdm
from datetime import datetime, timedelta
from joblib import Parallel, delayed
from src.myconstants import *

pd.set_option('display.width', 250)
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 20)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Merging formation records

In [3]:
form_periods_files = os.listdir('data/form_avg')
form_periods_files.sort()
form_periods_list = []

for file in form_periods_files:
    path = f'data/form_avg/{file}'
    form_periods_list.append(pd.read_pickle(path))

form_periods = pd.concat(form_periods_list, ignore_index=True)
form_periods.to_pickle('data/form_periods.pkl')
form_periods

Unnamed: 0,activity_id,session,form_period,start_dt,end_dt,duration,coords,edge_mat
0,12864,1,1,2020-01-01 14:00:00,2020-01-01 14:47:00,2820.0,"[[-719.0, 1625.0], [-1342.0, 660.0], [948.0, 1...","[[0.0, 0.977, 0.955, 0.2, 0.023, 0.828, 0.257,..."
1,12868,1,1,2020-01-01 14:00:00,2020-01-01 14:47:00,2820.0,"[[-833.0, -1647.0], [14.0, 181.0], [-369.0, -1...","[[0.0, 0.338, 0.684, 0.994, 0.165, 0.042, 0.15..."
2,12868,2,2,2020-01-01 15:02:00,2020-01-01 15:50:00,2880.0,"[[-933.0, -1939.0], [-106.0, 618.0], [-231.0, ...","[[0.0, 0.164, 0.86, 0.993, 0.131, 0.036, 0.237..."
3,12870,1,1,2020-01-01 13:30:00,2020-01-01 14:19:00,2940.0,"[[-1408.0, 43.0], [460.0, -1087.0], [-285.0, 5...","[[0.0, 0.22, 0.871, 0.098, 0.367, 0.962, 0.328..."
4,12870,2,2,2020-01-01 14:33:00,2020-01-01 15:23:00,3000.0,"[[-1282.0, -147.0], [290.0, -888.0], [-209.0, ...","[[0.0, 0.416, 0.886, 0.054, 0.192, 0.998, 0.27..."
...,...,...,...,...,...,...,...,...
861,9064,2,3,2020-01-01 16:32:00,2020-01-01 16:51:00,1140.0,"[[-1589.0, 486.0], [-1684.0, -710.0], [-34.0, ...","[[0.0, 0.984, 0.515, 0.04, 0.009, 0.025, 0.813..."
862,9281,1,1,2020-01-01 15:00:00,2020-01-01 15:47:00,2820.0,"[[-1478.0, 113.0], [-1176.0, -1051.0], [1666.0...","[[0.0, 0.982, 0.015, 0.022, 0.016, 0.191, 0.98..."
863,9281,2,2,2020-01-01 16:03:00,2020-01-01 16:51:00,2880.0,"[[-1270.0, -89.0], [-1128.0, -1051.0], [1465.0...","[[0.0, 0.94, 0.052, 0.025, 0.047, 0.184, 0.974..."
864,9285,1,1,2020-01-01 14:01:00,2020-01-01 14:48:00,2820.0,"[[-674.0, 1815.0], [-1457.0, 614.0], [-1564.0,...","[[0.0, 0.992, 0.237, 0.295, 0.148, 0.748, 0.20..."


### Generating and merging role-assignment records

In [8]:
def generate_role_records(activity_id, freq=1):
    fgp_path = f'data/fgp_avg/{activity_id}.csv'
    fgp_df = pd.read_csv(fgp_path, header=0, encoding='utf-8-sig')
    fgp_df[LABEL_DATETIME] = fgp_df[LABEL_DATETIME].apply(lambda dt: datetime.strptime(dt, '%Y-%m-%d %H:%M:%S'))

    grouped = fgp_df.groupby([LABEL_PLAYER_ID, LABEL_ROLE_PERIOD], as_index=False)
    cols = HEADER_ROLE_RECORDS[1:4] + [LABEL_DATETIME, LABEL_SQUAD_NUM, LABEL_PLAYER_NAME, LABEL_BASE_ROLE]
    role_records = grouped[cols].first()
    role_records[LABEL_ACTIVITY_ID] = activity_id
    role_records[LABEL_START_DT] = grouped[LABEL_DATETIME].first()[LABEL_DATETIME] - timedelta(seconds=freq)
    role_records[LABEL_END_DT] = grouped[LABEL_DATETIME].last()[LABEL_DATETIME]
    role_records[LABEL_DURATION] = grouped[LABEL_GAMETIME].count()[LABEL_GAMETIME] * freq

    role_records = pd.merge(role_records, form_periods[[LABEL_ACTIVITY_ID, LABEL_FORM_PERIOD, LABEL_COORDS]])
    role_records[LABEL_X] = role_records.apply(lambda x: x[LABEL_COORDS][x[LABEL_BASE_ROLE]-1, 0], axis=1)
    role_records[LABEL_Y] = role_records.apply(lambda x: x[LABEL_COORDS][x[LABEL_BASE_ROLE]-1, 1], axis=1)

    role_records = role_records[HEADER_ROLE_RECORDS].sort_values(
        [LABEL_SQUAD_NUM, LABEL_ROLE_PERIOD], ignore_index=True)
    target_path = f'data/role_avg/{activity_id}.csv'
    role_records.to_csv(target_path, index=False, encoding='utf-8-sig')
    return target_path

In [10]:
activity_records = pd.read_csv('data/activity_records.csv', header=0, encoding='utf-8-sig')
form_periods = pd.read_pickle('data/form_periods.pkl')
activity_ids = activity_records[activity_records[LABEL_STATS_SAVED] == 1][LABEL_ACTIVITY_ID]
role_records_list = []

target_paths = Parallel(n_jobs=45)(delayed(generate_role_records)(id) for id in tqdm(activity_ids))
pd.read_csv(target_paths[0], header=0)

100%|██████████| 457/457 [00:09<00:00, 48.60it/s]


Unnamed: 0,activity_id,session,player_period,form_period,role_period,start_dt,end_dt,duration,player_id,squad_num,player_name,base_role,x,y
0,1879,1,2,1,2,2020-01-01 16:35:00,2020-01-01 16:47:00,720,1248,7,P07,3,1042.0,1812.0
1,1879,2,3,2,3,2020-01-01 17:02:00,2020-01-01 17:33:00,1860,1248,7,P07,3,674.0,1848.0
2,1879,2,4,2,4,2020-01-01 17:33:00,2020-01-01 17:44:00,660,1248,7,P07,3,674.0,1848.0
3,1879,2,4,2,5,2020-01-01 17:44:00,2020-01-01 17:51:00,420,1248,7,P07,3,674.0,1848.0
4,1879,2,5,2,6,2020-01-01 17:51:00,2020-01-01 17:53:00,120,1248,7,P07,9,-1318.0,546.0
5,1879,1,1,1,1,2020-01-01 16:00:00,2020-01-01 16:35:00,2100,1252,9,P09,1,1710.0,-60.0
6,1879,1,2,1,2,2020-01-01 16:35:00,2020-01-01 16:47:00,720,1252,9,P09,1,1710.0,-60.0
7,1879,2,3,2,3,2020-01-01 17:02:00,2020-01-01 17:33:00,1860,1252,9,P09,1,1701.0,24.0
8,1879,2,4,2,4,2020-01-01 17:33:00,2020-01-01 17:44:00,660,1252,9,P09,1,1701.0,24.0
9,1879,2,4,2,5,2020-01-01 17:44:00,2020-01-01 17:51:00,420,1252,9,P09,1,1701.0,24.0


In [11]:
role_records_files = os.listdir('data/role_avg')
role_records_files.sort()
role_records_list = []

for activity_id in tqdm(activity_ids):
    path = f'data/role_avg/{activity_id}.csv'
    role_records_list.append(pd.read_csv(path, header=0, encoding='utf-8-sig'))

role_records = pd.concat(role_records_list, ignore_index=True)
role_records

100%|██████████| 457/457 [00:00<00:00, 471.13it/s]


Unnamed: 0,activity_id,session,player_period,form_period,role_period,start_dt,end_dt,duration,player_id,squad_num,player_name,base_role,x,y
0,1879,1,2,1,2,2020-01-01 16:35:00,2020-01-01 16:47:00,720,1248,7,P07,3,1042.0,1812.0
1,1879,2,3,2,3,2020-01-01 17:02:00,2020-01-01 17:33:00,1860,1248,7,P07,3,674.0,1848.0
2,1879,2,4,2,4,2020-01-01 17:33:00,2020-01-01 17:44:00,660,1248,7,P07,3,674.0,1848.0
3,1879,2,4,2,5,2020-01-01 17:44:00,2020-01-01 17:51:00,420,1248,7,P07,3,674.0,1848.0
4,1879,2,5,2,6,2020-01-01 17:51:00,2020-01-01 17:53:00,120,1248,7,P07,9,-1318.0,546.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21182,22875,1,2,1,2,2020-01-01 15:30:00,2020-01-01 15:41:00,660,11817,62,P62,10,-778.0,-1710.0
21183,22875,1,2,1,3,2020-01-01 15:41:00,2020-01-01 15:48:00,420,11817,62,P62,10,-778.0,-1710.0
21184,22875,2,3,2,4,2020-01-01 16:02:00,2020-01-01 16:20:00,1080,11817,62,P62,10,-1052.0,-1476.0
21185,22875,2,4,2,5,2020-01-01 16:20:00,2020-01-01 16:51:20,1880,11817,62,P62,10,-1052.0,-1476.0


In [12]:
role_records.to_csv('data/role_records.csv', index=False, encoding='utf-8-sig')