<a href="https://colab.research.google.com/github/tracyuniverse/SkyScroll-Website/blob/main/Gesture_Recongition_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, models, datasets
from PIL import Image
from sklearn.model_selection import train_test_split
!git clone https://github.com/tarashakhurana/SkyScroll

Cloning into 'SkyScroll'...
remote: Enumerating objects: 204, done.[K
remote: Counting objects: 100% (94/94), done.[K
remote: Compressing objects: 100% (46/46), done.[K
remote: Total 204 (delta 66), reused 48 (delta 48), pack-reused 110 (from 1)[K
Receiving objects: 100% (204/204), 3.06 MiB | 9.45 MiB/s, done.
Resolving deltas: 100% (76/76), done.


In [3]:
!rm -rf SkyScroll/.git

In [4]:
# code to delete folders
import shutil
import os

output_base = '/content/GEI_Output'

# Delete the folder if it exists
if os.path.exists(output_base):
    shutil.rmtree(output_base)
    print(f"Deleted existing folder: {output_base}")
else:
    print(f"Folder does not exist: {output_base}")

Folder does not exist: /content/GEI_Output


In [5]:
def generate_gei_weighted(video_path, threshold=0.1,  max_brightness=0.85, min_brightness=0.2, decay_power=3.0 ):
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return None

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

    result_image = np.zeros((frame_height, frame_width), dtype=np.float32)
    has_written = np.zeros_like(result_image, dtype=bool)

    frame_idx = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = gray.astype(np.float32) / 255.0

        # Detect new hand presence
        mask = (gray > threshold) & (~has_written)

        # Sharper brightness decay (exponential-like)
        progress = frame_idx / max(total_frames - 1, 1)
        decay = (1.0 - progress) ** decay_power  # faster dropoff
        brightness = min_brightness + (max_brightness - min_brightness) * decay

        result_image[mask] = brightness
        has_written[mask] = True

        frame_idx += 1

    cap.release()

    # Display image
    result_display = (result_image * 255).astype(np.uint8)

    # plt.imshow(result_display, cmap='gray')
    # # plt.title("Fast Fading Motion Trail (Early Emphasis, Sharp Drop)")
    # plt.axis('off')
    # plt.show()

    return result_display


# Example usage:
generate_gei_weighted('/content/SkyScroll/zoom_in_1.mp4')


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [6]:
#Generate and process all files and sort them under Processed_videos

# Paths
input_folder = '/content/SkyScroll'
output_folder = '/content/GEI_Output'
os.makedirs(output_folder, exist_ok=True)

def generate_gei(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = gray.astype(np.float32)
        frames.append(gray)

    cap.release()
    if not frames:
        return None

    frames_np = np.stack(frames, axis=0)
    gei = np.mean(frames_np, axis=0)
    gei = cv2.normalize(gei, None, 0, 255, cv2.NORM_MINMAX)
    return gei.astype(np.uint8)

for filename in os.listdir(input_folder):
    file_path = os.path.join(input_folder, filename)

    if os.path.isfile(file_path) and filename.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
        parts = filename.split('_')
        class_label = '_'.join(parts[:2]) if len(parts) >= 2 else parts[0]

        class_folder = os.path.join(output_folder, class_label)
        os.makedirs(class_folder, exist_ok=True)

        gei_image = generate_gei_weighted(file_path)
        if gei_image is not None:
            base_name = os.path.splitext(filename)[0]
            output_filename = f"processed_{base_name}.png"
            output_path = os.path.join(class_folder, output_filename)
            cv2.imwrite(output_path, gei_image)
            # print(f"✅ Saved GEI: {output_path}")
        # else:
            # print(f"⚠️ Failed to generate GEI for {filename}")

In [7]:
# TRAINING CODE


#train_data = datasets.ImageFolder(root=train_dir, transform=transform)
#train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

#val_data = datasets.ImageFolder(val_dir, transform=transform)
#val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

In [8]:

all_image_paths = []
for root, _, files in os.walk('/content/GEI_Output/'):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            all_image_paths.append(os.path.join(root, file))

# Check if we have enough images
if len(all_image_paths) < 10:
    print(f"Warning: Only found {len(all_image_paths)} images. Need at least 10 for the split.")
else:
    # Perform the split
    # We use stratify=None here as we are splitting file paths, not necessarily ensuring class balance in the split
    train_paths, val_paths = train_test_split(all_image_paths, test_size=0.3, random_state=42)

    print(f"Total images found: {len(all_image_paths)}")
    print(f"Training images: {len(train_paths)}")
    print(f"Validation images: {len(val_paths)}")

    # To use these paths with DataLoader, you would typically:
    # 1. Create new temporary directories for train and validation data.
    # 2. Copy the respective files into those directories, maintaining the class structure.
    # 3. Point datasets.ImageFolder to these new train and validation directories.

    # Example of creating new directories and copying (Simplified):
    train_dir = '/content/GEI_Output_Split/train'
    val_dir = '/content/GEI_Output_Split/val'

    # Clean up previous splits if they exist
    if os.path.exists(train_dir):
        shutil.rmtree(train_dir)
    if os.path.exists(val_dir):
        shutil.rmtree(val_dir)

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)

    def copy_files(file_list, destination_base):
        for file_path in file_list:
            # Determine the original class label from the path
            relative_path = os.path.relpath(file_path, '/content/GEI_Output/')
            class_label = relative_path.split(os.sep)[0]
            dest_class_dir = os.path.join(destination_base, class_label)
            os.makedirs(dest_class_dir, exist_ok=True)
            shutil.copy(file_path, dest_class_dir)

    print("Copying training files...")
    copy_files(train_paths, train_dir)
    print("Copying validation files...")
    copy_files(val_paths, val_dir)
    print("File copying complete.")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    transform = transforms.Compose([
      transforms.Resize((150, 150)),
      transforms.ToTensor(),
      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    # Now, create ImageFolder datasets using the new split directories
    train_data_split = datasets.ImageFolder(root=train_dir, transform=transform)
    val_data_split = datasets.ImageFolder(root=val_dir, transform=transform)

    # Create DataLoaders from the split datasets
    train_loader = DataLoader(train_data_split, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_data_split, batch_size=32, shuffle=False)

    print(f"Train DataLoader has {len(train_loader.dataset)} images.")
    print(f"Validation DataLoader has {len(val_loader.dataset)} images.")

    # You can now use train_loader and val_loader for training and validation


Total images found: 60
Training images: 42
Validation images: 18
Copying training files...
Copying validation files...
File copying complete.
Train DataLoader has 42 images.
Validation DataLoader has 18 images.


In [9]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        # Changed the output size of the final linear layer to 3 for 3 classes
        self.fc1 = nn.Linear(32 * 75 * 75, 512)
        self.fc2 = nn.Linear(512, 6) # Output 6 classes

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = x.view(-1, 32 * 75 * 75)
        x = torch.relu(self.fc1(x))
        # Removed sigmoid activation for multi-class output with CrossEntropyLoss
        x = self.fc2(x)
        return x

In [10]:
#model = CNN()
num_classes = len(train_data_split.classes)
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)
# Changed criterion to CrossEntropyLoss for multi-class classification
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 101MB/s]


In [11]:
epochs = 17 # Define the number of epochs

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in val_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        print("Unique label values:", torch.unique(labels))
        print("Label range:", labels.min(), labels.max())

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(val_loader.dataset)
    print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}')

print('Finished Training')

Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 1/17, Loss: 2.1725
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 2/17, Loss: 0.1971
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 3/17, Loss: 0.0338
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 4/17, Loss: 0.0044
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 5/17, Loss: 0.0026
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 6/17, Loss: 0.0018
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 7/17, Loss: 0.0013
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 8/17, Loss: 0.0010
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tensor(4)
Epoch 9/17, Loss: 0.0007
Unique label values: tensor([0, 1, 2, 3, 4])
Label range: tensor(0) tenso

In [12]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the model on the validation data: {accuracy:.2f}%')

Accuracy of the model on the validation data: 94.44%


# Task
Generate GEIs for all video files in the "SkyScroll" directory and save them in separate folders for each class within that directory.

## Identify video files

### Subtask:
Identify all video files within the `SkyScroll` directory.


**Reasoning**:
Identify all video files within the `SkyScroll` directory by listing files and checking for video extensions.

