In [1]:
import csv
from math import ceil
from operator import mul
from pathlib import Path
from typing import List

import numpy as np
import pandas as pd
import torch
import torch.utils.data as data

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class SoccerNet(data.Dataset):

    def __init__(self, data_dir: Path, matches: List[Path], window_size_sec=20, frame_rate=8, **kwargs):
        self.path = data_dir

        videos_csv_path = self.path.joinpath(f'videos_v2.csv')
        self.videos = SoccerNet.read_videos(videos_csv_path)

        classes_csv_path = self.path.joinpath(f'classes_v2.csv')
        self.classes = SoccerNet.read_classes(classes_csv_path)
        self.num_classes = len(self.classes)

        self.annotations = SoccerNet.read_annotations(self.path.joinpath(f'annotations_v2.csv'))
        self.half_matches = [(mp, h) for mp, h in self.annotations.index.unique()]

        self.window_size_sec = window_size_sec
        self.frame_rate = frame_rate
        self.frames_per_window = self.window_size_sec * self.frame_rate

        # CAMBIAR AQUI
        self.matches = matches
        self.labels = []
        # self._load_labels(matches[0])

        emotion_features_path = matches[0].joinpath('face_extraction_results.npy')
        self.emotion_features = np.load(emotion_features_path, allow_pickle=True)

    @staticmethod
    def read_classes(classes_csv_path):
        with open(classes_csv_path, mode='r') as csv_file:
            csv_reader = csv.reader(csv_file)
            next(csv_reader, None)
            return {i: r[0] for i, r in enumerate(csv_reader)}

    @staticmethod
    def read_videos(videos_csv_path):
        return pd.read_csv(videos_csv_path,
                           usecols=['match_path',
                                    'match_date',
                                    'visiting_team',
                                    'home_team',
                                    'score',
                                    'first_half_duration_sec',
                                    'second_half_duration_sec'],
                           dtype={'match_date': str,
                                  'visiting_team': str,
                                  'home_team': str,
                                  'score': str},
                           converters={'match_path': Path,
                                       'first_half_duration_sec': lambda d: int(float(d)),
                                       'second_half_duration_sec': lambda d: int(float(d))
                                       })

    @staticmethod
    def read_annotations(annotations_csv_path):
        to_secs = lambda t: sum(map(mul, [60, 1], map(int, t.split(':'))))
        return pd.read_csv(annotations_csv_path,
                           usecols=['match_path',
                                    'half',
                                    'game_time',
                                    'label',
                                    'position',
                                    'team',
                                    'visibility'],
                           dtype={'label': int,
                                  'position': int,
                                  'team': int
                                  },
                           converters={'match_path': Path,
                                       'half': lambda h: int(h) - 1,
                                       'game_time': to_secs,
                                       'visibility': lambda v: 1 if int(v) else -1},
                           index_col=['match_path',
                                      'half'])

    '''
    def _load_labels(self, match_path, half, num_batches):
        labels = np.zeros((num_batches, self.num_classes))
        for r in self.annotations.loc[(match_path, half)].itertuples():
            index = r.game_time // self.window_size_sec
            if index < num_batches:
                labels[index, r.label] = 1
        self.labels.append(labels)
    '''

    def _load_labels(self, match_path):
        half = 0
        for r in self.annotations.loc[(match_path, half)].itertuples():
            print(r)

    def __getitem__(self, index):
        pass

    def __len__(self):
        pass



    # print(dataset.classes)
    # print(dataset.emotion_features)
    # dataset getitem returns: (list of 160 vectors per action, action name)


In [59]:
dataset.annotations

Unnamed: 0_level_0,Unnamed: 1_level_0,game_time,label,position,team,visibility
match_path,half,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
england_epl/2014-2015/2015-02-21 - 18-00 Crystal Palace 1 - 2 Arsenal,0,0,2,0,0,1
england_epl/2014-2015/2015-02-21 - 18-00 Crystal Palace 1 - 2 Arsenal,0,19,9,19192,2,1
england_epl/2014-2015/2015-02-21 - 18-00 Crystal Palace 1 - 2 Arsenal,0,33,10,33487,1,-1
england_epl/2014-2015/2015-02-21 - 18-00 Crystal Palace 1 - 2 Arsenal,0,36,9,36795,2,1
england_epl/2014-2015/2015-02-21 - 18-00 Crystal Palace 1 - 2 Arsenal,0,47,10,47415,0,1
...,...,...,...,...,...,...
spain_laliga/2016-2017/2017-05-21 - 21-00 Malaga 0 - 2 Real Madrid,1,2592,7,2592470,0,1
spain_laliga/2016-2017/2017-05-21 - 21-00 Malaga 0 - 2 Real Madrid,1,2613,8,2613744,1,-1
spain_laliga/2016-2017/2017-05-21 - 21-00 Malaga 0 - 2 Real Madrid,1,2645,9,2645735,2,1
spain_laliga/2016-2017/2017-05-21 - 21-00 Malaga 0 - 2 Real Madrid,1,2657,10,2657242,1,1


In [58]:
data_path = Path('../../../../mnt/DATA/datasets/soccernet')
videos_path = Path('../videos_processed_extraction.txt')

with videos_path.open() as f:
    videos = [Path(line).parent for line in f.readlines()][:1]

dataset = SoccerNet(data_path, videos)
dataset.annotations[(Path('england_epl/2014-2015/2015-02-21 - 18-00 Crystal Palace 1 - 2 Arsenal'),0)]


KeyError: (PosixPath('england_epl/2014-2015/2015-02-21 - 18-00 Crystal Palace 1 - 2 Arsenal'), 0)

In [51]:
detections_path = '../../../../mnt/DATA/datasets/soccernet/germany_bundesliga/2014-2015/2015-05-02 - 16-30 Hoffenheim 1 - 1 Dortmund/face_extraction_results.npy'
df = np.load(detections_path, allow_pickle=True)


In [53]:
df

array([[(0, '00106', 0, 9324, 0.9995306730270386),
        array([0.        , 0.49555725, 0.        , 0.        , 0.        ,
               0.        , 0.24611108, 0.        , 0.01044159, 0.        ,
               0.        , 0.00147872, 0.        , 0.        , 0.01558053,
               0.21686314, 0.        , 0.5447881 , 0.        , 0.        ,
               0.        , 0.        , 0.23414885, 0.        , 0.        ,
               0.00655145, 0.        , 0.        , 0.        , 0.02011331,
               0.35444343, 0.        , 0.        , 0.01424181, 0.        ,
               0.00081222, 0.16539061, 0.        , 0.03470436, 0.        ,
               0.04886378, 0.        , 0.        , 0.23656957, 0.        ,
               0.        , 0.13711649, 0.        , 0.        , 0.        ,
               0.0775089 , 0.        , 0.        , 0.01688929, 0.        ,
               0.        , 0.08088451, 0.        , 0.        , 0.        ,
               0.        , 0.        , 0.07949558