In [108]:
import os
import cv2
import torch
import numpy as np
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import Dataset

# from roboflow import Roboflow
# rf = Roboflow(api_key="qNiuOYka1Vr6TV3WENNV")
# project = rf.workspace("the-university-of-manchester-8ax5n").project("football-player-detection-demo")
# dataset = project.version(1).download("yolov5")

In [109]:
class PlayerTrackingModel(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(PlayerTrackingModel, self).__init__()
        # Define your model architecture here
        self.conv1 = nn.Conv2d(input_channels, 64, 3, padding=1)
        self.fc1 = nn.Linear(64 * 224 * 224, num_classes)  # Adjust the input size as needed

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x


In [110]:
class CustomObjectDetectionDataset(Dataset):
    def __init__(self, data_dir, split='train', transform=None):
        self.data_dir = data_dir
        self.image_dir = os.path.join(self.data_dir, 'images')
        self.label_dir = os.path.join(self.data_dir, 'labels')

        self.image_files = sorted([file for file in os.listdir(self.image_dir) if file.endswith('.jpg')])
        self.transform = transform
        self.class_ids = set()  # Create a set to store unique class IDs

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_name = self.image_files[idx]
        image_path = os.path.join(self.image_dir, image_name)
        label_path = os.path.join(self.label_dir, image_name.replace('.jpg', '.txt'))

        image = cv2.imread(image_path)
        labels = self.load_labels(label_path)

        if self.transform is not None:
            image = self.transform(image)

        return image, labels

    def load_labels(self, label_path):
        labels = []
        with open(label_path, 'r') as file:
            lines = file.read().splitlines()
            for line in lines:
                values = line.split()
                if len(values) == 5:
                    class_id = int(values[0])  # Extract the class ID as an integer
                    x_min, y_min, width, height = map(float, values[1:])
                    x_max = x_min + width
                    y_max = y_min + height
                    labels.append([x_min, y_min, x_max, y_max, class_id])
                    self.class_ids.add(class_id)  # Add the class ID to the set
        return torch.tensor(labels, dtype=torch.float32)


    def get_num_classes(self):
        return len(self.class_ids)

In [111]:
def preprocess_frame(frame, target_size=(224, 224)):
    # Resize the frame to the target size
    frame = cv2.resize(frame, target_size)
    
    # Normalize the pixel values to the range [0, 1]
    frame = frame.astype(np.float32) / 255.0
    
    return frame

In [112]:
# Define your model
model = PlayerTrackingModel(input_channels=3, num_classes=6)  # Update input_channels and num_classes
# model = YOLO(num_classes=5, pretrained=True)  # Use the appropriate number of classes (80 for COCO)
print(model)

optimizer = optim.SGD(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

PlayerTrackingModel(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=3211264, out_features=6, bias=True)
)


In [113]:
# Specify the directory structure
base_data_dir = 'C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1'  # Update to the actual path
train_data_dir = 'C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1/train'
valid_data_dir = 'C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1/valid'
test_data_dir = 'C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1/test'

print(train_data_dir)
print(valid_data_dir)
print(test_data_dir)

C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1/train
C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1/valid
C:/Users/User/Desktop/UoM 3rd Year Project/Code/Football-Player-Detection-Demo-1/test


In [114]:
# Define data loaders for training, validation, and test sets
train_dataset = CustomObjectDetectionDataset(train_data_dir)
print("Number of Classes:", train_dataset.get_num_classes())

valid_dataset = CustomObjectDetectionDataset(valid_data_dir)
test_dataset = CustomObjectDetectionDataset(test_data_dir)

print(train_dataset)
print(valid_dataset)
print(test_dataset)

Number of Classes: 0
<__main__.CustomObjectDetectionDataset object at 0x0000028C1C0004D0>
<__main__.CustomObjectDetectionDataset object at 0x0000028C2144C190>
<__main__.CustomObjectDetectionDataset object at 0x0000028C1D8EAB90>


In [115]:
# Training and validation loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for frame, annotations in train_dataset:
        optimizer.zero_grad()
        frame = preprocess_frame(frame)
        frame_tensor = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0).float()
        annotations = annotations.float() 
        with torch.no_grad():
            expected_output = model(frame_tensor)
        output = model(frame_tensor)
        loss = criterion(output, expected_output)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    # Validation loop
    model.eval()
    with torch.no_grad():
        valid_loss = 0.0
        for frame, annotations in valid_dataset:
            frame = preprocess_frame(frame)
            frame_tensor = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0).float()
            annotations = annotations.float() 
            output = model(frame_tensor)
            loss = criterion(output, annotations)
            valid_loss += loss.item()
    
    print(f'Epoch {epoch + 1}, Train Loss: {running_loss / len(train_dataset)}, Validation Loss: {valid_loss / len(valid_dataset)}')


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (6) must match the size of tensor b (5) at non-singleton dimension 1

In [None]:
# Testing loop
model.eval()
with torch.no_grad():
    test_loss = 0.0
    for frame, annotations in test_dataset:
        frame = preprocess_frame(frame)
        frame_tensor = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0).float()
        annotations = annotations.float() 
        output = model(frame_tensor)
        loss = criterion(output, annotations)
        test_loss += loss.item()
    print(f'Test Loss: {test_loss / len(test_dataset)}')

Test Loss: 1.3685657183329265


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
# Save the trained model
torch.save(model.state_dict(), 'your_trained_model_weights.pth')

In [None]:
# Import necessary libraries
import cv2
import torch
import numpy as np

# Load your trained model and define preprocessing function
# (Assuming you have a PlayerTrackingModel and preprocess_frame function)
model = PlayerTrackingModel(input_channels=3, num_classes=5)
model.load_state_dict(torch.load('your_trained_model_weights.pth'))
model.eval()

def preprocess_frame(frame, target_size=(224, 224)):
    # Resize the frame to the target size
    frame = cv2.resize(frame, target_size)
    
    # Normalize the pixel values to the range [0, 1]
    frame = frame.astype(np.float32) / 255.0
    
    return frame

# Define a function for object detection and tracking on a single frame
def detect_and_track(frame):
    frame = preprocess_frame(frame)
    frame_tensor = torch.from_numpy(frame).permute(2, 0, 1).unsqueeze(0).float()
    with torch.no_grad():
        output = model(frame_tensor)
    
    # Assuming the model's output is a list of tensors, where each tensor represents detections for one class
    # Modify this part according to your model's output format
    all_boxes = output  # This should be a list of tensors
    confidence_threshold = 0.5  # You can adjust the confidence threshold
    
    # Initialize lists to store bounding boxes and class IDs
    all_bounding_boxes = []
    all_class_ids = []
    
    for boxes in all_boxes:
        # Extract bounding box coordinates
        bounding_boxes = boxes[..., :4]
        
        # Extract class probabilities
        class_probs = boxes[..., 4:]
        
        # Filter boxes based on confidence threshold
        mask = class_probs.max(-1).values > confidence_threshold
        filtered_boxes = bounding_boxes[mask]
        filtered_class_probs = class_probs[mask]
        
        all_bounding_boxes.append(filtered_boxes)
        all_class_ids.append(filtered_class_probs.argmax(-1).int())
    
    # Draw bounding boxes and labels on the frame
    for boxes, class_ids in zip(all_bounding_boxes, all_class_ids):
        for i in range(boxes.shape[0]):
            x_min, y_min, x_max, y_max = boxes[i]
            class_id = class_ids[i].item()
            
            # Draw the bounding box
            cv2.rectangle(frame, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)
            
            # Add label with the class name (replace this with your actual class labels)
            label = f"Class {class_id}"
            cv2.putText(frame, label, (int(x_min), int(y_min) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
    
    return frame  # Modified frame with bounding boxes and labels

# Open the input video file
input_video_path = 'videos/input.mp4'
cap = cv2.VideoCapture(input_video_path)

# Get video properties
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
output_video_path = 'output_video.mp4'

# Define the codec and create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can change the codec as needed
out = cv2.VideoWriter(output_video_path, fourcc, 30, (frame_width, frame_height))

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Process the frame for object detection and tracking
    modified_frame = detect_and_track(frame)

    # Write the modified frame to the output video
    out.write(modified_frame)

# Release the VideoCapture and VideoWriter objects
cap.release()
out.release()


In [None]:
# !pip install roboflow

# from roboflow import Roboflow
# rf = Roboflow(api_key="qNiuOYka1Vr6TV3WENNV")
# project = rf.workspace("the-university-of-manchester-8ax5n").project("football-player-detection-demo")
# dataset = project.version(1).download("yolov5")

# curl -L "https://app.roboflow.com/ds/XCxReYRR3H?key=K8XiEXrZz5" &gt; roboflow.zip; unzip roboflow.zip; rm roboflow.zip

# https://app.roboflow.com/ds/XCxReYRR3H?key=K8XiEXrZz5