### Download Dataset

In [7]:
import SoccerNet

In [8]:
from SoccerNet.Downloader import SoccerNetDownloader
mySoccerNetDownloader = SoccerNetDownloader(LocalDirectory="./")
mySoccerNetDownloader.downloadDataTask(task="tracking", split=["train","test","challenge"])

  from .autonotebook import tqdm as notebook_tqdm
Downloading ./tracking/train.zip...: : 9.58GiB [09:52, 16.2MiB/s]                           
Downloading ./tracking/test.zip...: : 8.71GiB [09:13, 15.7MiB/s]                           
Downloading ./tracking/challenge.zip...: : 11.0GiB [11:50, 15.5MiB/s]                           


In [9]:
%%bash
unzip -q tracking/train.zip -d ./Dataset/tracking
unzip -q tracking/test.zip -d ./Dataset/tracking
unzip -q tracking/challenge.zip -d ./Dataset/tracking

In [None]:
import os
import numpy as np
import glob
import json
import cv2
from tqdm import tqdm

### Generate labels for dataset

#### output: txt file including below information for each image:
    
    class id(0), Track ID(tid), normalized x coordinate of the bounding box center, normalized y coordinate of the bounding box center, normalized height of the bounding box, normalized width of the bounding box. 
    
    (normalization made relative to the frame width/height)

In [3]:
def mkdirs(d):
    if not os.path.exists(d):
        os.makedirs(d)

seq_roots = ['./dataset/images/train', './dataset/images/test']
label_roots = ['./dataset/labels_with_ids/train', './dataset/labels_with_ids/test']

for seq_root, label_root in zip(seq_roots, label_roots):
    mkdirs(label_root)
    seqs = [s for s in os.listdir(seq_root)]

    tid_curr = 0
    tid_last = -1

    output = {}

    for seq in tqdm(seqs, desc=f"Processing sequences in {seq_root}"):
        seq_info = open(os.path.join(seq_root, seq, 'seqinfo.ini')).read()
        seq_width = int(seq_info[seq_info.find('imWidth=') + 8:seq_info.find(
            '\nimHeight')])
        seq_height = int(seq_info[seq_info.find('imHeight=') + 9:seq_info.find(
            '\nimExt')])

        gt_txt = os.path.join(seq_root, seq, 'gt', 'gt.txt')
        gt = np.loadtxt(gt_txt, dtype=np.float64, delimiter=',')

        seq_label_root = os.path.join(label_root, seq, 'img1')
        mkdirs(seq_label_root)
        
        # Frame ID, Track ID, left coor X, top coor Y, width W, height H, Score, Unused, Unused, Unused
        for fid, tid, x, y, w, h, score, label, _, _ in tqdm(gt, desc=f"Processing GT for {seq}", leave=False):
            if score == 0 or not label == -1.0:
                continue

            fid = int(fid)
            tid = int(tid)
            if not tid == tid_last:
                tid_curr += 1
                tid_last = tid
            # bounding box center coordinates
            x += w / 2 
            y += h / 2
            label_fpath = os.path.join(seq_label_root, '{:06d}.txt'.format(fid))
            # class id(0), tid
            # normalized coordinate of the bounding box center (relative to the frame width)
            # normalized height/width of the bounding box (relative to the frame width).
            label_str = '0 {:d} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format(
                tid_curr, x / seq_width, y / seq_height, w / seq_width,
                h / seq_height)
            if not label_fpath in output:
                output[label_fpath] = []
            output[label_fpath].append(label_str)

    for key in tqdm(output, desc="Writing labels"):
        with open(key, 'w') as f:
            lines = output[key]
            for line in lines:
                f.write(line)

Processing sequences in ./dataset/images/train: 100%|██████████| 57/57 [00:05<00:00, 10.39it/s]
Writing labels: 100%|██████████| 42750/42750 [03:39<00:00, 194.79it/s]
Processing sequences in ./dataset/images/test: 100%|██████████| 49/49 [00:04<00:00, 10.19it/s]
Writing labels: 100%|██████████| 36750/36750 [03:22<00:00, 181.78it/s]


### Generate images list for train/test
#### output:txt file containg path to all images in train/test dataset


In [6]:
def gen_image_list(dataPath, datType, image_list_root='./dataset/image_lists'):
    if not os.path.exists(image_list_root):
        os.mkdir(image_list_root)
    inputPath = f'{dataPath}/images/{datType}'
    pathList = glob.glob(inputPath + '/*')
    pathList = sorted(pathList)
    allImageList = []
    for pathSingle in pathList:
        imgList = sorted(glob.glob(os.path.join(pathSingle, 'img1', '*.jpg')))
        for imgPath in imgList:
            allImageList.append(imgPath)
    image_list_fname = os.path.join(image_list_root, f'{dataPath}.{datType}')
    with open(image_list_fname, 'w') as image_list_file:
        allImageListStr = str.join('\n', allImageList)
        image_list_file.write(allImageListStr)

gen_image_list('dataset', 'train')
gen_image_list('dataset', 'test')

### Converts a dataset with video frames and ground truth annotations into COCO-style JSON format.
#### output: JSON files

    images: Metadata about the images in the dataset.

    annotations: Bounding box annotations.

    videos: Metadata about the video sequences.

    categories: Information about the object categories (in this case, pedestrians).


In [7]:
DATA_PATH = 'dataset/images'
OUT_PATH = 'dataset/annotations'
SPLITS = ['train', 'test']

if not os.path.exists(OUT_PATH):
    os.makedirs(OUT_PATH)

for split in SPLITS:
    data_path = os.path.join(DATA_PATH, split)
    out_path = os.path.join(OUT_PATH, '{}.json'.format(split))
    out = {'images': [], 'annotations': [], 'videos': [],
            'categories': [{'id': 1, 'name': 'pedestrian'}]}
    seqs = os.listdir(data_path)
    image_cnt = 0
    ann_cnt = 0
    video_cnt = 0
    tid_curr = 0
    tid_last = -1
    for seq in sorted(seqs):
        if '.DS_Store' in seq:
            continue
        video_cnt += 1  # video sequence number.
        out['videos'].append({'id': video_cnt, 'file_name': seq})
        seq_path = os.path.join(data_path, seq)
        img_path = os.path.join(seq_path, 'img1')
        ann_path = os.path.join(seq_path, 'gt/gt.txt')
        images = os.listdir(img_path)
        num_images = len([image for image in images if 'jpg' in image])
        image_range = [0, num_images - 1]

        for i in range(num_images):
            if i < image_range[0] or i > image_range[1]:
                continue
            img = cv2.imread(os.path.join(data_path, '{}/img1/{:06d}.jpg'.format(seq, i + 1)))
            height, width = img.shape[:2]
            image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1),  # image name.
                            'id': image_cnt + i + 1,  # image number in the entire training set.
                            'frame_id': i + 1 - image_range[0],  # image number in the video sequence, starting from 1.
                            'prev_image_id': image_cnt + i if i > 0 else -1,  # image number in the entire training set.
                            'next_image_id': image_cnt + i + 2 if i < num_images - 1 else -1,
                            'video_id': video_cnt,
                            'height': height, 'width': width}
            out['images'].append(image_info)
        print('{}: {} images'.format(seq, num_images))
        det_path = os.path.join(seq_path, 'det/det.txt')
        anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
        dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',')
        print('{} ann images'.format(int(anns[:, 0].max())))
        for i in range(anns.shape[0]):
            frame_id = int(anns[i][0])
            if frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]:
                continue
            track_id = int(anns[i][1])
            cat_id = int(anns[i][7])
            ann_cnt += 1
            category_id = 1
            ann = {'id': ann_cnt,
                    'category_id': category_id,
                    'image_id': image_cnt + frame_id,
                    'track_id': tid_curr,
                    'bbox': anns[i][2:6].tolist(),
                    'conf': float(anns[i][6]),
                    'iscrowd': 0,
                    'area': float(anns[i][4] * anns[i][5])}
            out['annotations'].append(ann)
        image_cnt += num_images
        print(tid_curr, tid_last)
    print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
    json.dump(out, open(out_path, 'w'))

SNMOT-060: 750 images
750 ann images
26 26
SNMOT-061: 750 images
750 ann images
53 27
SNMOT-062: 750 images
750 ann images
77 24
SNMOT-063: 750 images
750 ann images
102 25
SNMOT-064: 750 images
750 ann images
126 24
SNMOT-065: 750 images
750 ann images
152 26
SNMOT-066: 750 images
750 ann images
177 25
SNMOT-067: 750 images
750 ann images
203 26
SNMOT-068: 750 images
750 ann images
227 24
SNMOT-069: 750 images
750 ann images
251 24
SNMOT-070: 750 images
750 ann images
276 25
SNMOT-071: 750 images
750 ann images
298 22
SNMOT-072: 750 images
750 ann images
322 24
SNMOT-073: 750 images
750 ann images
347 25
SNMOT-074: 750 images
750 ann images
372 25
SNMOT-075: 750 images
750 ann images
396 24
SNMOT-076: 750 images
750 ann images
425 29
SNMOT-077: 750 images
750 ann images
449 24
SNMOT-097: 750 images
750 ann images
471 22
SNMOT-098: 750 images
750 ann images
497 26
SNMOT-099: 750 images
750 ann images
522 25
SNMOT-100: 750 images
750 ann images
548 26
SNMOT-101: 750 images
750 ann image

### Filter det files
#### out: txt contains only useful fields in det files

In [9]:
seq_roots = ['./dataset/images/train', './dataset/images/test']
det_roots = ['./dataset/det_files/train', './dataset/det_files/test']

for seq_root, det_root in zip(seq_roots, det_roots):
    mkdirs(det_root)
    seqs = [s for s in os.listdir(seq_root)]
    for seq in tqdm(seqs, desc=f"Processing det for {seq}", leave=False):
        src_det_txt = os.path.join(seq_root, seq, 'det', 'det.txt')
        src_det = np.loadtxt(src_det_txt, dtype=np.float64, delimiter=',')
        #only extract 7 items per row: [frame_id],[x0],[y0],[w],[h],[score],[class_id]
        dst_det = src_det[:, [0,2,3,4,5,6,7]]
        dst_det_txt = os.path.join(det_root, f'{seq}.txt')
        np.savetxt(dst_det_txt, dst_det, fmt='%d', delimiter=',')

                                                                             

#### zip ground truth files

In [10]:
import zipfile

def zipdir(path, ziph):
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith('gt.txt') or file.endswith('seqinfo.ini'):
                ziph.write(os.path.join(root, file), 
                        os.path.relpath(os.path.join(root, file), 
                                        os.path.join(path, '..')))

with zipfile.ZipFile('gt.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipdir('./dataset/images/test', zipf)
