In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
!pwd

/content


download necessary libraries

In [25]:
!pip install -r /content/drive/MyDrive/tennis_deep_learning/TrackNet/requirements.txt



In [26]:
import torch
if torch.cuda.is_available():
  print('available')
else:
  print('not available')


not available


# model training

## model.py

In [20]:
import torch
import torch.nn as nn

class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, pad=1, stride=1, bias=True):
        super().__init__()
        self.block = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=pad, bias=bias),
            nn.ReLU(),
            nn.BatchNorm2d(out_channels)
        )

    def forward(self, x):
        return self.block(x)

class TrackNet(nn.Module):
    def __init__(self, out_channels=256):
        super().__init__()
        self.out_channels = out_channels

        self.conv1 = ConvBlock(in_channels=9, out_channels=64)
        self.conv2 = ConvBlock(in_channels=64, out_channels=64)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = ConvBlock(in_channels=64, out_channels=128)
        self.conv4 = ConvBlock(in_channels=128, out_channels=128)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv5 = ConvBlock(in_channels=128, out_channels=256)
        self.conv6 = ConvBlock(in_channels=256, out_channels=256)
        self.conv7 = ConvBlock(in_channels=256, out_channels=256)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv8 = ConvBlock(in_channels=256, out_channels=512)
        self.conv9 = ConvBlock(in_channels=512, out_channels=512)
        self.conv10 = ConvBlock(in_channels=512, out_channels=512)
        self.ups1 = nn.Upsample(scale_factor=2)
        self.conv11 = ConvBlock(in_channels=512, out_channels=256)
        self.conv12 = ConvBlock(in_channels=256, out_channels=256)
        self.conv13 = ConvBlock(in_channels=256, out_channels=256)
        self.ups2 = nn.Upsample(scale_factor=2)
        self.conv14 = ConvBlock(in_channels=256, out_channels=128)
        self.conv15 = ConvBlock(in_channels=128, out_channels=128)
        self.ups3 = nn.Upsample(scale_factor=2)
        self.conv16 = ConvBlock(in_channels=128, out_channels=64)
        self.conv17 = ConvBlock(in_channels=64, out_channels=64)
        self.conv18 = ConvBlock(in_channels=64, out_channels=self.out_channels)

        self.softmax = nn.Softmax(dim=1)
        self._init_weights()

    def forward(self, x, testing=False):
        batch_size = x.size(0)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.pool1(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.pool2(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.conv7(x)
        x = self.pool3(x)
        x = self.conv8(x)
        x = self.conv9(x)
        x = self.conv10(x)
        x = self.ups1(x)
        x = self.conv11(x)
        x = self.conv12(x)
        x = self.conv13(x)
        x = self.ups2(x)
        x = self.conv14(x)
        x = self.conv15(x)
        x = self.ups3(x)
        x = self.conv16(x)
        x = self.conv17(x)
        x = self.conv18(x)
        # x = self.softmax(x)
        out = x.reshape(batch_size, self.out_channels, -1)
        if testing:
            out = self.softmax(out)
        return out

    def _init_weights(self):
        for module in self.modules():
            if isinstance(module, nn.Conv2d):
                nn.init.uniform_(module.weight, -0.05, 0.05)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)

            elif isinstance(module, nn.BatchNorm2d):
                nn.init.constant_(module.weight, 1)
                nn.init.constant_(module.bias, 0)


## TrackNet_utils.py

In [21]:
import os
import pandas as pd
import numpy as np
import cv2


def create_gt_labels(root_dir=r'c:/kyoto/TennisML/TrackNet/Dataset', path_csv_output=r'c:/kyoto/TennisML/TrackNet/Dataset', train_propotion=0.7):
    # Merge label.csv files in each end of clips to one csv file

    df = pd.DataFrame()

    for game_id in range(1, 11):
        path_game = os.path.join(root_dir, f"game{game_id}")
        clips = os.listdir(path_game)
        for clip in clips:
            labels = pd.read_csv(os.path.join(path_game, clip, 'Label.csv'))
            labels['path_now'] = labels['file name'].apply(lambda k: os.path.join(f"game{game_id}", clip, k))

            # labels_gt = labels.iloc[2:].copy()      # remove the first two rows
            labels_gt = labels.iloc[:].copy()
            labels_gt.loc[:, 'path_prev'] = labels['path_now'].shift(1)
            labels_gt.loc[:, 'path_prevprev'] = labels['path_now'].shift(2)
            labels_gt.loc[:, 'path_gt'] = labels['path_now']
            labels_gt = labels_gt[2:]
            df = pd.concat([df, labels_gt], ignore_index=True)

    df = df.loc[:, ['path_now', 'path_prev', 'path_prevprev', 'path_gt', 'x-coordinate', 'y-coordinate', 'status', 'visibility']]
    num_train = int(len(df.index)*train_propotion)
    df_train = df.loc[:num_train]
    df_val = df.loc[num_train:]
    df_train.to_csv(os.path.join(path_csv_output, 'labels_train.csv'), index=False)
    df_val.to_csv(os.path.join(path_csv_output, 'labels_val.csv'), index=False)
    # print(df_train)

# def create_gt_labels_2(path_input='c:/kyoto/TennisML/TrackNet/Dataset', path_output='c:/kyoto/TennisML/TrackNet/Dataset', train_rate=0.7):
#     df = pd.DataFrame()
#     for game_id in range(1,11):
#         game = 'game{}'.format(game_id)
#         clips = os.listdir(os.path.join(path_input, game))
#         for clip in clips:
#             labels = pd.read_csv(os.path.join(path_input, game, clip, 'Label.csv'))
#             labels['gt_path'] = 'gts/' + game + '/' + clip + '/' + labels['file name']
#             labels['path1'] = 'images/' + game + '/' + clip + '/' + labels['file name']
#             labels_target = labels[2:]
#             labels_target.loc[:, 'path2'] = list(labels['path1'][1:-1])
#             labels_target.loc[:, 'path3'] = list(labels['path1'][:-2])
#             df = pd.concat([df, labels_target], ignore_index=True)
#     df = df.reset_index(drop=True)
#     df = df[['path1', 'path2', 'path3', 'gt_path', 'x-coordinate', 'y-coordinate', 'status', 'visibility']]
#     # df = df.sample(frac=1)
#     num_train = int(df.shape[0]*train_rate)
#     df_train = df[:num_train]
#     df_test = df[num_train:]
#     df_train.to_csv(os.path.join(path_output, 'labels_train.csv'), index=False)
#     df_test.to_csv(os.path.join(path_output, 'labels_val.csv'), index=False)


def gaussian_kernel(size, sigma):
    '''
    sigma(variance) is equivalent to the average radius of a tennis ball (about 5 pixels)
    '''
    x, y = np.mgrid[-size:size+1, -size:size+1]
    gk = np.exp( -(x**2 + y**2) / (float(2*sigma)) )
    return gk

def generate_gaussian_kernel_array(size, sigma):
    gaussian_kernel_array = gaussian_kernel(size, sigma)
    gaussian_kernel_array = gaussian_kernel_array * (255 / gaussian_kernel_array[ int(len(gaussian_kernel_array)/2) ][ int(len(gaussian_kernel_array)/2) ])
    gaussian_kernel_array = gaussian_kernel_array.astype(int)
    return gaussian_kernel_array


def create_gt_images(size, sigma, width, height, root_dir='c:/kyoto/TennisML/TrackNet/Dataset', path_output='c:/kyoto/TennisML/TrackNet/Dataset/gts'):
    for game_id in range(1, 11):
        path_game = os.path.join(root_dir, f"game{game_id}")
        clips = os.listdir(path_game)

        path_output_game = os.path.join(path_output, f"game{game_id}")
        if not os.path.exists(path_output_game):
            os.makedirs(path_output_game)

        for clip in clips:
            path_output_clip = os.path.join(path_output_game, clip)
            if not os.path.exists(path_output_clip):
                os.makedirs(path_output_clip)

            labels = pd.read_csv(os.path.join(path_game, clip, 'Label.csv'))
            for idx in range(len(labels.index)):
                file_name, visibility, x, y, _ = labels.loc[idx, :]
                heatmap = np.zeros((width, height, 3), dtype=np.uint8)
                if visibility != 0:
                    x, y = int(x), int(y)
                    for i in range(-size, size+1):
                        for j in range(-size, size+1):
                            if x+i >= 0 and x+i < width and y+j >= 0 and y+j < height:
                                gaussian_kernel_array = generate_gaussian_kernel_array(size, sigma)
                                temp = gaussian_kernel_array[size+i][size+j]
                                if temp > 0:
                                    heatmap[x+i][y+j] = (temp, temp, temp)
                cv2.imwrite(os.path.join(path_output_clip, file_name), heatmap)


def postprocess(feature_map, scale=2, shape=(360, 640), threshold=127, min_radius=2, max_radius=7):
    feature_map = np.array(feature_map)
    feature_map = (feature_map*255).astype(np.uint8)
    feature_map = feature_map.reshape(shape)

    _, heatmap = cv2.threshold(feature_map, threshold, 255, cv2.THRESH_BINARY)
    circles = cv2.HoughCircles(heatmap, cv2.HOUGH_GRADIENT, dp=1, minDist=1, param1=50, param2=2, minRadius=min_radius, maxRadius=max_radius)

    x, y = None, None
    if circles is not None and circles.shape[1] > 0:
        best_circle = circles[0][0]
        x = best_circle[0] * scale
        y = best_circle[0] * scale
    return x, y



## train.py

In [22]:
import math
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import time
# from TrackNet_utils import postprocess
from scipy.spatial import distance


def train(model, train_loader, loss_fn, optimizer, device, epoch, max_iters):
    # loss_fn = nn.CrossEntropyLoss()
    start_time = time.time()
    losses = []
    # Set the model to training mode
    model.train()

    for iter_id, batch in enumerate(train_loader):
        optimizer.zero_grad()
        print('batch[0] : ', batch[0].size(), 'batch[1] : ', batch[1].size())
        outputs = model(batch[0].float().to(device))
        ground_truth = torch.tensor(batch[1], dtype=torch.long, device=device)
        print('outputs : ', outputs.size(), 'ground_truth : ', ground_truth.size())
        outputs = outputs.squeeze(0)
        ground_truth = ground_truth.squeeze(0)
        print('outputs : ', outputs.size(), 'ground_truth : ', ground_truth.size())
        loss = loss_fn(outputs, ground_truth)

        loss.backward()
        optimizer.step()
        # optimizer.zero_grad()

        end_time = time.time()
        duration = end_time - start_time

        losses.append(loss.item())

        print(f'epoch:{epoch}, iteration:{iter_id}/{max_iters}, loss:{round(loss, 5)}, time:{duration}')

        if iter_id >= max_iters - 1:
            break

    return np.mean(losses)


# Test the accuracy, precision, recall and f1-measure of the prediction
def validate(model, val_loader, device, epoch, min_dist=5):
    '''
    tp: True Positive
    fp: False Positive
    tn: True Negative
    fn: False Negative
    '''
    losses = []
    tp = [0, 0, 0, 0]
    fp = [0, 0, 0, 0]
    tn = [0, 0, 0, 0]
    fn = [0, 0, 0, 0]

    criterion = nn.CrossEntropyLoss()
    model.eval()

    for iter_id, batch in enumerate(val_loader):
        with torch.no_grad():
            inputs, gt_heatmap, x_coords, y_coords, visibilities = batch
            out = model(batch[0].float().to(device))
            # gt = torch.tensor(batch[1], dtype=torch.long, device=device)
            gt_heatmap_flat = gt_heatmap.squeeze(1).long().to(device).view(gt_heatmap.size(0), -1)
            loss = criterion(out, gt_heatmap_flat)
            losses.append(loss.item())

            output = out.argmax(dim=1).detach().cpu().numpy().reshape(out.size(0), 640, 360)
            for i in range(len(output)):
                x_pred, y_pred = postprocess(output[i])
                # x_gt = batch[2][i]
                # y_gt = batch[3][i]
                # vis = batch[4][i]
                x_gt = x_coords[i]
                y_gt = y_coords[i]
                vis = visibilities[i]

                if x_pred is not None and y_pred is not None:
                    if vis != 0:
                        if not math.isnan(x_pred) and not math.isnan(y_pred) and not math.isnan(x_gt) and not math.isnan(y_gt):
                            dist = distance.euclidean((x_pred, y_pred), (x_gt, y_gt))
                            if dist < min_dist:
                                tp[int(vis)] += 1
                            else:
                                fp[int(vis)] += 1
                        else:
                            fp[int(vis)] += 1
                    else:
                        fp[int(vis)] += 1
                elif vis != 0:
                    fn[int(vis)] += 1
                else:
                    tn[int(vis)] += 1


    eps = 1e-15

    total_tp = sum(tp)
    total_fp = sum(fp)
    total_tn = sum(tn)
    total_fn = sum(fn)

    precision = sum(tp) / (total_tp + total_fp + eps)
    recall = total_tp / (total_tp + total_fn + eps)
    f1 = 2 * precision * recall / (precision + recall + eps)

    print(f"precision : {precision}")
    print(f"recall : {recall}")
    print(f"f1 : {f1}")

    return np.mean(losses), precision, recall, f1


## dataset.py

In [23]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from torchvision import transforms
import cv2
import math
from PIL import Image


class TrackNetDataset(Dataset):
    def __init__(self, mode, path_dataset, input_height=360, input_width=640):
        self.path_dataset = path_dataset
        self.data = pd.read_csv(os.path.join(self.path_dataset, f"labels_{mode}.csv"))
        self.HEIGHT = input_height
        self.WIDTH = input_width
        print(f"mode = {mode}, samples = {self.data.shape[0]}")

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        path_now, path_prev, path_prevprev, path_gt, x, y, status, visibility = self.data.loc[idx, :]

        path_now = os.path.join(self.path_dataset, path_now)
        path_prev = os.path.join(self.path_dataset, path_prev)
        path_prevprev = os.path.join(self.path_dataset, path_prevprev)
        path_gt = os.path.join(self.path_dataset, 'gts', path_gt)

        if math.isnan(x) or math.isnan(y):
            x = -1
            y = -1

        transform = transforms.Compose([
            transforms.Resize((self.HEIGHT, self.WIDTH)),
            transforms.ToTensor()
        ])

        def pil_to_tensor_rgb(path):
            img = Image.open(path).convert("RGB")
            img = transform(img)
            return img

        gt_img = Image.open(path_gt).convert("RGB")
        gt_img = transform(gt_img)
        gt_img = gt_img[0, :, :]
        gt_img = gt_img.reshape(self.HEIGHT * self.WIDTH)
        gt_img = (gt_img * 255).byte()

        img_now = pil_to_tensor_rgb(path_now)
        img_prev = pil_to_tensor_rgb(path_prev)
        img_prevprev = pil_to_tensor_rgb(path_prevprev)

        imgs = torch.cat((img_now, img_prev, img_prevprev), dim=0)
        return imgs, gt_img, x, y, visibility


## main.py

In [24]:
# from model import TrackNet
import torch
import torch.nn as nn
# from dataset import TrackNetDataset
import os
# from train import train, validate
from tensorboardX import SummaryWriter
# import argparse
# import torch_xla
# import torch_xla.core.xla_model as xm

if __name__ == '__main__':

    # path_dataset = 'C:/kyoto/TennisML/TrackNet/Dataset'
    path_dataset = '/content/drive/MyDrive/tennis_deep_learning/TrackNet/Dataset'
    batch_size = 2
    exp_id = '1'
    num_epochs = 500
    lr = 1.0
    val_intervals = 5
    steps_per_epoch = 200



    train_dataset = TrackNetDataset(mode='train', path_dataset=path_dataset)
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=1,
        pin_memory=True
    )

    val_dataset = TrackNetDataset(mode='val', path_dataset=path_dataset)
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=1,
        pin_memory=True
    )

    model = TrackNet()
    device = 'cuda'
    # device = xm.xla_device()
    model = model.to(device)

    exps_path = './exps/{}'.format(exp_id)
    tb_path = os.path.join(exps_path, 'plots')
    if not os.path.exists(tb_path):
        os.makedirs(tb_path)
    log_writer = SummaryWriter(tb_path)
    model_last_path = os.path.join(exps_path, 'model_last.pt')
    model_best_path = os.path.join(exps_path, 'model_best.pt')

    optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
    val_best_metric = 0

    for epoch in range(num_epochs):
        print(epoch)
        loss_fn = nn.CrossEntropyLoss()
        train_loss = train(model, train_loader, loss_fn, optimizer, device, epoch, steps_per_epoch)
        print(f"train_loss = {train_loss}")
        log_writer.add_scalar('Train/training_loss', train_loss, epoch)
        log_writer.add_scalar('Train/lr', optimizer.param_groups[0]['lr'], epoch)

        if (epoch > 0) and (epoch % val_intervals == 0):
            val_loss, precision, recall, f1 = validate(model, val_loader, device, epoch)
            print(f"val_loss = {val_loss}")
            log_writer.add_scalar('Val/loss', val_loss, epoch)
            log_writer.add_scalar('Val/precision', precision, epoch)
            log_writer.add_scalar('Val/recall', recall, epoch)
            log_writer.add_scalar('Val/f1', f1, epoch)
            if f1 > val_best_metric:
                val_best_metric = f1
                torch.save(model.state_dict(), model_best_path)
            torch.save(model.state_dict(), model_last_path)


mode = train, samples = 13752
mode = val, samples = 5894


RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx