<a href="https://colab.research.google.com/github/khshohelrana/Thesis_Purpose_Model/blob/main/yolo_fromScartch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.utils.data import DataLoader

# Function to load and preprocess videos
def load_videos(video_dir, frame_size=(416, 416), max_frames=70, fps=35):
    video_data = []
    labels = []
    for label_dir in os.listdir(video_dir):
        label_path = os.path.join(video_dir, label_dir)
        if os.path.isdir(label_path):
            label = label_dir
            for video_file in os.listdir(label_path):
                video_path = os.path.join(label_path, video_file)
                frames = []
                cap = cv2.VideoCapture(video_path)
                frame_rate = cap.get(cv2.CAP_PROP_FPS)  # Original frame rate of video
                frame_interval = int(frame_rate // fps) if frame_rate > fps else 1

                while len(frames) < max_frames:
                    ret, frame = cap.read()
                    if not ret:
                        break
                    if int(cap.get(cv2.CAP_PROP_POS_FRAMES)) % frame_interval == 0:
                        frame = cv2.resize(frame, frame_size)
                        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
                        frames.append(frame_gray)
                cap.release()

                # If the video has fewer frames, pad with zeros at the end
                while len(frames) < max_frames:
                    frames.append(np.zeros(frame_size, dtype=np.uint8))

                video_data.append(np.array(frames))
                labels.append(label)
    return np.array(video_data), np.array(labels)

# Load videos
video_dir = '/path/to/spin-ball'  # Replace with your dataset path
X, y = load_videos(video_dir, frame_size=(416, 416), max_frames=70, fps=35)

# Preprocess labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Normalize pixel values
X = X / 255.0

# Reshape X for PyTorch: [samples, channels, height, width]
X = X.reshape(X.shape[0], 70, X.shape[2], X.shape[3])  # 70 frames per video

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Create a custom YOLO loss
class YoloLoss(nn.Module):
    def __init__(self):
        super(YoloLoss, self).__init__()

    def forward(self, predictions, targets):
        # predictions: (batch_size, num_boxes, 5) - 5 corresponds to [class_id, x_center, y_center, width, height]
        # targets: (batch_size, num_boxes, 5)

        # Extract predicted and target class and bbox values
        pred_classes = predictions[:, :, 0]
        pred_boxes = predictions[:, :, 1:]
        target_classes = targets[:, :, 0]
        target_boxes = targets[:, :, 1:]

        # Class loss (using cross-entropy)
        class_loss = F.cross_entropy(pred_classes, target_classes)

        # Box loss (MSE for bbox coordinates)
        box_loss = F.mse_loss(pred_boxes, target_boxes)

        # Total loss: sum of class loss and box loss
        total_loss = class_loss + box_loss
        return total_loss

# Define the YOLO model from scratch
class YOLOFromScratch(nn.Module):
    def __init__(self, num_classes):
        super(YOLOFromScratch, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(256 * 52 * 52, 1024)  # Adjust input size based on input image
        self.fc2 = nn.Linear(1024, num_classes + 4)  # num_classes + 4 (bounding box)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.pool3(F.relu(self.conv3(x)))

        x = x.view(x.size(0), -1)  # Flatten for fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model, optimizer, and loss function
num_classes = len(np.unique(y_train))
model = YOLOFromScratch(num_classes=num_classes)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = YoloLoss()

# Convert data to PyTorch tensors
X_train_torch = torch.tensor(X_train, dtype=torch.float32).unsqueeze(1)  # Add channel dimension
y_train_torch = torch.tensor(y_train, dtype=torch.long)

X_test_torch = torch.tensor(X_test, dtype=torch.float32).unsqueeze(1)  # Add channel dimension
y_test_torch = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for training and validation sets
train_dataset = torch.utils.data.TensorDataset(X_train_torch, y_train_torch)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

test_dataset = torch.utils.data.TensorDataset(X_test_torch, y_test_torch)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Evaluation
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Confusion Matrix
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Classification Report
print("Classification Report:\n")
print(classification_report(all_labels, all_preds, target_names=label_encoder.classes_))


FileNotFoundError: [Errno 2] No such file or directory: '/path/to/spin-ball'