In [1]:
%load_ext autoreload
%autoreload 2

# Load the CSV Data

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("dataframe_transformer.csv")

In [4]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,gameTime,label,position,team,visibility,url_local
0,82,82,1 - 01:07,BALL PLAYER BLOCK,67280,away,visible,england_efl/2019-2020/2019-10-01 - Middlesbrou...
1,204,204,1 - 02:46,CROSS,166520,home,visible,england_efl/2019-2020/2019-10-01 - Middlesbrou...
2,232,232,1 - 03:18,BALL PLAYER BLOCK,198800,away,visible,england_efl/2019-2020/2019-10-01 - Middlesbrou...
3,458,458,1 - 06:58,BALL PLAYER BLOCK,418320,home,visible,england_efl/2019-2020/2019-10-01 - Middlesbrou...
4,459,459,1 - 06:58,SHOT,418080,away,visible,england_efl/2019-2020/2019-10-01 - Middlesbrou...


## Separate Name of Each Game Per Row

In [5]:
df["match_name"] = df.url_local.str.split("/", expand=True)[2]

## Make Label's Categorical -> Number

In [6]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

In [7]:
encoded_labels = label_encoder.fit_transform(df.label)
df["encoded_label"] =  encoded_labels

In [8]:
map_stoi = dict(zip(df.label, encoded_labels))
map_itos = {v: k for k, v in map_stoi.items()}

In [9]:
map_stoi

{'BALL PLAYER BLOCK': 0,
 'CROSS': 1,
 'SHOT': 11,
 'PLAYER SUCCESSFUL TACKLE': 10,
 'GOAL': 4,
 'FREE KICK': 3,
 'THROW IN': 12,
 'OUT': 8,
 'HEADER': 5,
 'HIGH PASS': 6,
 'DRIVE': 2,
 'PASS': 9,
 'NO_EVENT': 7}

# Upload Image Data


In [10]:
!pip install kaggle



In [10]:
from google.colab import userdata
KAGGLE_USERNAME = userdata.get('KAGGLE_USERNAME')
KAGGLE_KEY = userdata.get('KAGGLE_KEY')

In [24]:
import os
os.environ["KAGGLE_CONFIG_DIR"] = "/content"

In [25]:
!kaggle datasets download -d radroof22/soccernet-ball-action-spotting

Downloading soccernet-ball-action-spotting.zip to /content
100% 2.66G/2.66G [02:17<00:00, 21.5MB/s]
100% 2.66G/2.66G [02:17<00:00, 20.9MB/s]


In [None]:
!unzip soccernet-ball-action-spotting.zip -d ./data

# Data Loader

In [10]:
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
import glob
import torch
import numpy as np
from PIL import Image

## Loading of Images from Folder

In [19]:
class SoccerNetDataset(Dataset):
  def __init__(self, root_dir, df, transform=None):
    self.root_dir = root_dir
    self.transform = transform
    self.datapoint_folder = list(df.match_name.str.cat(df.position.astype(str), sep="/"))
    self.class_numeric = list(df.encoded_label)
    self.number_of_classes = max(self.class_numeric) + 1

  def __len__(self):
    return len(self.class_numeric)

  def __getitem__(self, idx):
    folder_path, encoded_class = self.datapoint_folder[idx], self.class_numeric[idx]

    # get the multiple images for the datapoint
    png_files = glob.glob(self.root_dir + folder_path + "/*.png")

    input_frames = np.array([np.array(Image.open(p).convert("RGB")).reshape((3, 224, 398)) / 255 for p in png_files])

    return input_frames, torch.eye(self.number_of_classes)[encoded_class]


## Weighted Sampling of Images

In [20]:
num_classes = df.encoded_label.max() + 1
class_weights = torch.ones(num_classes) / num_classes
equal_sampler = WeightedRandomSampler(class_weights, len(df))
soccernet_dataset = SoccerNetDataset("./data/transformer_data/transformer_data/", df)


In [21]:
BATCH_SIZE = 32
SAMPLER = equal_sampler
dataloader = DataLoader(soccernet_dataset, batch_size = BATCH_SIZE, sampler=SAMPLER)

# Training Loops

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [23]:
from model_torch import Model
import torch.nn as nn
from google.colab import files


EPOCHS = 100
MODEL_NAME = "cnn_single_frame_equal"

model = Model(input_shape=(3, 224, 398)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [24]:
for e in range(EPOCHS):
  model.train()
  running_loss = 0.0
  for input_batches, targets in dataloader:
    optimizer.zero_grad()
    input_batches, targets = input_batches.to(device), targets.to(device)

    single_image = input_batches[:, 3, :, :, :].reshape((-1, 3, 224, 398))

    outputs = model(single_image.float())
    loss = criterion(outputs, targets)

    loss.backward()
    optimizer.step()

    running_loss += loss.item() * single_image.size(0)

  epoch_loss = running_loss / len(soccernet_dataset)

  if e % 10 == 0:
    model_weights_filename = "./" + MODEL_NAME + f"_{e}.pth"
    torch.save(model.state_dict(), model_weights_filename)
    files.download(model_weights_filename)
    print(f'Epoch [{e+1}/{EPOCHS}], Loss: {epoch_loss:.4f}')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Epoch [1/100], Loss: 1.9030


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Epoch [11/100], Loss: 1.6898


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Epoch [21/100], Loss: 1.6893


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Epoch [31/100], Loss: 1.6891


KeyboardInterrupt: 

In [None]:
for input_batches, labels in dataloader:
  print(input_batches.shape)
  print(labels.shape)

In [None]:
import torch
import gc
torch.cuda.empty_cache()
gc.collect()

In [20]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5).softmax(dim=1)
output = loss(input, target)
print(input.shape)
print(target.shape)
print(input)
print(target)
output.backward()

torch.Size([3, 5])
torch.Size([3, 5])
tensor([[-0.3674,  1.3614, -1.7220,  0.8209, -0.6634],
        [ 0.0427, -0.3253,  0.0067,  0.8346, -1.0921],
        [-0.9194, -0.5925, -0.3478, -1.6055,  0.8943]], requires_grad=True)
tensor([[0.0669, 0.1722, 0.3738, 0.3465, 0.0405],
        [0.0078, 0.0575, 0.0611, 0.0127, 0.8609],
        [0.2897, 0.2269, 0.0546, 0.4052, 0.0237]])


In [None]:
# 1.689 Loss after 3 epochs