In [15]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn

model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [16]:
import cv2

video_path = 'downloaded_video.mp4'
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video.")


In [None]:
from torchvision.transforms import functional as F
import cv2
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn

# Load the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = fasterrcnn_resnet50_fpn(pretrained=True).to(device).eval()

# Define a function to detect objects in a single frame
def detect_objects(frame, model):
    # Convert frame to tensor and normalize it for the model
    frame_tensor = F.to_tensor(frame).to(device)
    detections = model([frame_tensor])[0]
    
    # Define thresholds and class mappings
    threshold = 0.7
    detected_objects = []
    
    for i in range(len(detections['boxes'])):
        score = detections['scores'][i].item()
        if score > threshold:
            box = detections['boxes'][i].detach().cpu().numpy()
            label = detections['labels'][i].item()
            detected_objects.append((box, label, score))
            
    return detected_objects

# Load video
video_path = 'downloaded_video.mp4'
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
output_path = 'tracked_output_task1.mp4'

# Define VideoWriter to save the output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Detect objects in the frame
    detected_objects = detect_objects(frame, model)

    # Draw bounding boxes and labels on the frame
    for box, label, score in detected_objects:
        if label in [1, 37]:  # 1 = Person, 37 = Sports Ball (COCO class IDs)
            x1, y1, x2, y2 = map(int, box)
            color = (255, 0, 0) if label == 1 else (0, 255, 0)  # Blue for person, Green for ball
            label_name = "Person" if label == 1 else "Ball"
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)  # Draw box
            cv2.putText(frame, f"{label_name}: {score:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    
    # Write the processed frame to the output video
    out.write(frame)

    # Display the frame (optional)
    cv2.imshow('Video', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
out.release()  # Ensure the output video is properly saved
cv2.destroyAllWindows()

print(f"Video saved to {output_path}")


Video saved to tracked_output_task1_fixed.mp4


In [None]:
from filterpy.kalman import KalmanFilter
import numpy as np

# Define a Kalman filter for each object
def create_kalman_filter():
    kf = KalmanFilter(dim_x=4, dim_z=2)
    kf.x = np.array([0, 0, 0, 0])  # Initial state (position and velocity)
    kf.P *= 1000.  # Initial uncertainty
    kf.F = np.array([[1, 0, 1, 0],  # State transition matrix
                     [0, 1, 0, 1],
                     [0, 0, 1, 0],                                                   
                     [0, 0, 0, 1]])
    kf.H = np.array([[1, 0, 0, 0],  # Measurement matrix
                     [0, 1, 0, 0]])
    kf.R *= 10  # Measurement noise
    kf.Q = np.array([[0.1, 0, 0, 0],  # Process noise
                     [0, 0.1, 0, 0],
                     [0, 0, 0.1, 0],
                     [0, 0, 0, 0.1]])
    return kf

# Store Kalman filters for each object
kalman_filters = {}


In [7]:
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Detect objects in the frame
    detected_objects = detect_objects(frame, model)  # Returns boxes, labels, scores

    # Process detected objects and apply Kalman Filter
    for box, label, score in detected_objects:
        if label in [1, 37]:  # 1: person, 37: sports ball (COCO class IDs)
            x1, y1, x2, y2 = map(int, box)
            center_x = (x1 + x2) / 2
            center_y = (y1 + y2) / 2

            # Create a unique ID for each object based on label and its detected position
            obj_id = f"{label}-{center_x}-{center_y}"

            # If the object is new, initialize a Kalman filter
            if obj_id not in kalman_filters:
                kalman_filters[obj_id] = create_kalman_filter()
            
            kf = kalman_filters[obj_id]

            # Predict the next state (position and velocity)
            kf.predict()

            # Update the Kalman filter with the detected position
            kf.update(np.array([center_x, center_y]))

            # Get the filtered position (predicted position)
            predicted_x, predicted_y = kf.x[:2]

            # Draw bounding box and predicted position
            color = (255, 0, 0) if label == 1 else (0, 255, 255)  # Blue for person, Yellow for ball
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)  # Draw box
            cv2.putText(frame, f"ID: {obj_id.split('-')[0]} - Score: {score:.2f}", (x1, y1 - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            # Draw the predicted position of the object
            cv2.circle(frame, (int(predicted_x), int(predicted_y)), 5, (0, 255, 0), -1)

    # Display the frame
    cv2.imshow('Video', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


In [13]:
import cv2
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from deep_sort_realtime.deepsort_tracker import DeepSort

# Load Faster R-CNN model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = fasterrcnn_resnet50_fpn(pretrained=True).to(device).eval()

# Load video
video_path = 'downloaded_video.mp4'
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Get the original video properties
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Initialize video writer to save the output
output_path = 'tracked_output_task2.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

# Initialize Deep SORT tracker
deepsort = DeepSort(max_age=5)

# Object detection function
def detect_objects(frame, model):
    frame_tensor = F.to_tensor(frame).to(device)
    detections = model([frame_tensor])[0]
    threshold = 0.7
    detected_objects = [
        (detections['boxes'][i].detach().cpu().numpy(),
         detections['labels'][i].item(),
         detections['scores'][i].item())
        for i in range(len(detections['boxes']))
        if detections['scores'][i].item() > threshold
    ]
    return detected_objects

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Detect objects on the original frame
    detected_objects = detect_objects(frame, model)

    # Prepare bounding boxes for Deep SORT
    bboxes = []
    for box, label, score in detected_objects:
        if label in [1, 33]:  # Track persons (1) and ball (33)
            x1, y1, x2, y2 = map(int, box)  # Ensure integers
            bboxes.append(([x1, y1, x2, y2], score))

    # Update Deep SORT tracker
    outputs = deepsort.update_tracks(bboxes, frame=frame)

    # Draw bounding boxes and track IDs on the frame
    for track in outputs:
        if not track.is_confirmed():
            continue
        track_id = track.track_id
        x1, y1, x2, y2 = map(int, track.to_ltrb())
        label = "Id"  # Cast to int
        cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
        cv2.putText(frame, f"{label} ID: {track_id}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)


    # Write the frame to the output video
    out.write(frame)

    # Display the frame
    cv2.imshow('Tracked Video', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Video saved to {output_path}")


Video saved to tracked_output_task2.mp4


# Hungarian Algorithm:

1. **Subtract the row minimum:** For each row of the matrix, find the smallest element and subtract it from all elements in the row.  
2. **Subtract the column minimum:** For each column, find the smallest element and subtract it from all elements in the column.  
3. **Cover all zeros with a minimum number of horizontal and vertical lines:** Find the smallest number of lines required to cover all zeros in the matrix.  
4. **Test for optimality:** If the minimum number of lines equals the size of the matrix, an optimal assignment can be made.  
5. **Adjust the matrix:** If not optimal, subtract the smallest uncovered value from all uncovered elements, and add it to all elements covered by two lines.  
6. **Repeat the process:** Continue this process until an optimal assignment is found.

**Mathematical Representation:**

Let \( C \) be the cost matrix, and \( X \) the assignment matrix.

1. Subtract the minimum of each row from all elements in the row:

$$
C'_{ij} = C_{ij} - \min_j C_{ij}
$$
2. Subtract the minimum of each column from all elements in the column:

$$
C''_{ij} = C'_{ij} - \min_i C'_{ij}
$$

3. After covering the zeros with the minimum number of lines, the solution to the assignment problem will be found by selecting the positions of the zeros.



# Kalman Filter Equations

**State Prediction:**
$$
\hat{x}_k^{(k-1)} = F \hat{x}_{k-1}^{(k-1)} + B u_k
$$

**Covariance Prediction:**
$$
P_k^{(k-1)} = F P_{k-1}^{(k-1)} F^T + Q
$$

**Innovation (Measurement Residual):**
$$
y_k = z_k - H \hat{x}_k^{(k-1)}
$$

**Kalman Gain:**
$$
K_k = P_k^{(k-1)} H^T \left( H P_k^{(k-1)} H^T + R \right)^{-1}
$$

**State Update:**
$$
\hat{x}_k^{(k)} = \hat{x}_k^{(k-1)} + K_k y_k
$$

**Covariance Update:**
$$
P_k^{(k)} = \left( I - K_k H \right) P_k^{(k-1)}
$$
