In [1]:
import os
import numpy as np
import torch
import subprocess
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tqdm import tqdm
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import shutil
import requests
from tqdm import tqdm
import zipfile

In [2]:
# Device Agnostic 
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [3]:
#Setting up FFMPEG PATH
FFMPEG_PATH = shutil.which("ffmpeg")

if FFMPEG_PATH is None:
    print("Error: ffmpeg not found")
else:
    print(f"ffmpeg located at: {FFMPEG_PATH}")


ffmpeg located at: /opt/homebrew/bin/ffmpeg


In [4]:
# Enter the URL of dataset and the folder name
url = "https://drive.usercontent.google.com/download?id=1yR1GONn37dwKg8jU-5Zi_rCDNmdC2k4L&export=download&authuser=0&confirm=t&uuid=7a4b81a1-ada3-4540-9c3d-fec60be47fa1&at=APZUnTU3AzOQ5iOSa7qQ6b-gRJGQ%3A1714239992461"
zip_path = "./cricketshot.zip"

In [5]:
def download_file_with_progress(url, save_path):
    if not os.path.exists(save_path):
        print("[INFO] File doesn't exist, downloading...")

        response = requests.get(url, stream=True)
        total_size = int(response.headers.get('content-length', 0))
        chunk_size = 1024  # Adjust as needed

        progress_bar = tqdm(total=total_size, unit='B', unit_scale=True)

        with open(save_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=chunk_size):
                file.write(chunk)
                progress_bar.update(len(chunk))

        progress_bar.close()

        if total_size != 0 and progress_bar.n != total_size:
            print("[INFO] Error downloading the file. Please check the URL.")
        else:
            print(f"[INFO] The file has been downloaded and saved as {save_path}")
    else:
        print(f"[INFO] File {save_path} exists.")


download_file_with_progress(url, zip_path)

[INFO] File doesn't exist, downloading...


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.41G/1.41G [03:28<00:00, 6.73MB/s]

[INFO] The file has been downloaded and saved as ./cricketshot.zip





In [7]:
# Unzip the folder
current_path = os.getcwd()
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(current_path)

print("[INFO]: Downloaded folder have been unzipped!")
os.chdir("./cricketshot")
current_path = os.getcwd()
print("Current working directory:", current_path)

[INFO]: Downloaded folder have been unzipped!
Current working directory: /Users/rohitkumar/Documents/fanplayiot_final/cricketshot


In [8]:
# Base path where your class folders are located
base_path = "./CricShot10dataset"

# Output directory to save frames
output_dir = "./frames"
os.makedirs(output_dir, exist_ok=True)

In [9]:
# Iterate over each class folder
for class_folder in os.listdir(base_path):
    class_folder_path = os.path.join(base_path, class_folder)
    
    # Ensure it's a directory
    if os.path.isdir(class_folder_path):
        # Create an output folder for each class
        class_output_dir = os.path.join(output_dir, class_folder)
        os.makedirs(class_output_dir, exist_ok=True)

        # Iterate over each video file in the class folder
        for video_file in os.listdir(class_folder_path):
            video_path = os.path.join(class_folder_path, video_file)
            video_name = os.path.splitext(video_file)[0]
            output_pattern = os.path.join(class_output_dir, f"{video_name}_%04d.jpg")
            
            # Command to extract frames at 1 fps
            ffmpeg_command = f"{FFMPEG_PATH} -i {video_path} -vf fps=1 {output_pattern} -loglevel quiet"
            
            # Execute the command
            try:
                subprocess.run(ffmpeg_command, shell=True, check=True)
                print(f"Processed {video_file} in {class_folder}")
            except subprocess.CalledProcessError:
                print(f"Failed to process video: {video_file} in {class_folder}")


Processed lofted_0198.avi in lofted
Processed lofted_0173.avi in lofted
Processed lofted_0167.avi in lofted
Processed lofted_0007.avi in lofted
Processed lofted_0013.avi in lofted
Processed lofted_0012.avi in lofted
Processed lofted_0006.avi in lofted
Processed lofted_0166.avi in lofted
Processed lofted_0172.avi in lofted
Processed lofted_0164.avi in lofted
Processed lofted_0170.avi in lofted
Processed lofted_0010.avi in lofted
Processed lofted_0004.avi in lofted
Processed lofted_0038.avi in lofted
Processed lofted_0039.avi in lofted
Processed lofted_0005.avi in lofted
Processed lofted_0011.avi in lofted
Processed lofted_0171.avi in lofted
Processed lofted_0165.avi in lofted
Processed lofted_0161.avi in lofted
Processed lofted_0175.avi in lofted
Processed lofted_0029.avi in lofted
Processed lofted_0015.avi in lofted
Processed lofted_0001.avi in lofted
Processed lofted_0014.avi in lofted
Processed lofted_0028.avi in lofted
Processed lofted_0174.avi in lofted
Processed lofted_0148.avi in

In [10]:
# frames embeddings creation 

# Path to the model and processor directories
saved_model_path = './model'
saved_processor_path = './processor'

# Load the CLIP model and processor
processor = CLIPProcessor.from_pretrained(saved_processor_path)
clip_model = CLIPModel.from_pretrained(saved_model_path)
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model.to(device)

def batch_process_images(image_paths, batch_size, processor, model, device):
    embeddings = []
    for i in range(0, len(image_paths), batch_size):
        batch_paths = image_paths[i:i + batch_size]
        batch_images = [Image.open(path).convert("RGB") for path in batch_paths]
        tokens = processor(
            text=None,
            images=batch_images,
            return_tensors="pt"
        ).to(device)
        batch_embeddings = model.get_image_features(**tokens)
        batch_embeddings = batch_embeddings.detach().cpu().numpy()
        embeddings.append(batch_embeddings)
    return np.concatenate(embeddings, axis=0)

# Path where the extracted frames are stored
main_folder = './frames'
output_folder = './frames_embeddings'
os.makedirs(output_folder, exist_ok=True)

subfolders = [f.path for f in os.scandir(main_folder) if f.is_dir()]
total_subfolders = len(subfolders)
processed_subfolders = 0

for subfolder in subfolders:
    subfolder_name = os.path.basename(subfolder)
    output_subfolder = os.path.join(output_folder, subfolder_name)
    os.makedirs(output_subfolder, exist_ok=True)
    image_files = [f for f in os.listdir(subfolder) if f.endswith(('.png', '.jpg', '.jpeg'))]
    
    if image_files:
        image_paths = [os.path.join(subfolder, f) for f in image_files]
        embeddings = batch_process_images(image_paths, batch_size=100, processor=processor, model=clip_model, device=device)

        # Save each embedding with a filename that reflects its original image
        for i, emb in enumerate(embeddings):
            original_file_name = image_files[i].rsplit('.', 1)[0]  # Remove extension
            output_path = os.path.join(output_subfolder, f'{original_file_name}_embedding.npy')
            np.save(output_path, emb)

    processed_subfolders += 1
    print(f"Processed {processed_subfolders}/{total_subfolders} subfolders.")

print("All subfolders processed.")


Processed 1/4 subfolders.
Processed 2/4 subfolders.
Processed 3/4 subfolders.
Processed 4/4 subfolders.
All subfolders processed.


In [11]:
# Load embeddings and their labels
def load_embeddings_and_labels(embeddings_folder):
    embeddings = []
    labels = []
    label_mapping = {}  # To convert class names to numerical labels
    current_label = 0

    for class_folder in sorted(os.listdir(embeddings_folder)):
        class_path = os.path.join(embeddings_folder, class_folder)
        if os.path.isdir(class_path):
            if class_folder not in label_mapping:
                label_mapping[class_folder] = current_label
                current_label += 1
            for emb_file in sorted(os.listdir(class_path)):
                if emb_file.endswith('_embedding.npy'):
                    emb_path = os.path.join(class_path, emb_file)
                    embeddings.append(np.load(emb_path))
                    labels.append(label_mapping[class_folder])

    embeddings = torch.tensor(embeddings, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.long)
    return embeddings, labels, label_mapping

In [12]:
# Define the LSTM neural network
class LSTMNetwork(nn.Module):
    def __init__(self, input_size=512, hidden_size=256, num_classes=4):
        super(LSTMNetwork, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=1, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.fc(x[:, -1, :])  # Use the output of the last time step
        return x

In [13]:
# Load data
embeddings_folder = './frames_embeddings'
os.makedirs(embeddings_folder, exist_ok=True)

embeddings, labels, class_label_mapping = load_embeddings_and_labels(embeddings_folder)

# Split data
dataset = TensorDataset(embeddings.unsqueeze(1), labels)  # Add an extra dimension for LSTM sequence length
train_size = int(0.7 * len(dataset))
val_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Model
model = LSTMNetwork(input_size=512, hidden_size=256, num_classes=len(class_label_mapping)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

  embeddings = torch.tensor(embeddings, dtype=torch.float32)


In [14]:
# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for data, target in tqdm(train_loader, desc=f'Epoch {epoch + 1}'):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}, Training Loss: {total_loss / len(train_loader)}")

    # Validation
    model.eval()
    total_correct = total = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)
            total_correct += (predicted == target).sum().item()
            total += target.size(0)

    val_accuracy = total_correct / total
    print(f"Validation Accuracy: {val_accuracy:.2f}")

# Test
model.eval()
total_correct = total = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        _, predicted = torch.max(output, 1)
        total_correct += (predicted == target).sum().item()
        total += target.size(0)

test_accuracy = total_correct / total
print(f"Test Accuracy: {test_accuracy:.2f}")

#confusion matrix
all_labels = []
all_preds = []
for data, target in test_loader:
    data, target = data.to(device), target.to(device)
    output = model(data)
    _, predicted = torch.max(output, 1)
    all_labels.extend(target.tolist())
    all_preds.extend(predicted.tolist())

conf_mat = confusion_matrix(all_labels, all_preds)
print("Confusion Matrix:")
print(conf_mat)

from sklearn.metrics import classification_report
report = classification_report(all_labels, all_preds, target_names=list(class_label_mapping.keys()))
print("Classification Report:")
print(report)

Epoch 1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 52.04it/s]


Epoch 1, Training Loss: 1.148882295936346
Validation Accuracy: 0.67


Epoch 2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 76.01it/s]


Epoch 2, Training Loss: 0.7554335594177246
Validation Accuracy: 0.67


Epoch 3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 60.59it/s]


Epoch 3, Training Loss: 0.6354547329246998
Validation Accuracy: 0.70


Epoch 4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 34.80it/s]


Epoch 4, Training Loss: 0.5643118508160114
Validation Accuracy: 0.71


Epoch 5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 39.93it/s]


Epoch 5, Training Loss: 0.5163781829178333
Validation Accuracy: 0.74


Epoch 6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 30.89it/s]


Epoch 6, Training Loss: 0.44454110972583294
Validation Accuracy: 0.75


Epoch 7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 73.43it/s]


Epoch 7, Training Loss: 0.39107582066208124
Validation Accuracy: 0.78


Epoch 8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 68.62it/s]


Epoch 8, Training Loss: 0.39299500547349453
Validation Accuracy: 0.75


Epoch 9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 75.72it/s]


Epoch 9, Training Loss: 0.31345116812735796
Validation Accuracy: 0.80


Epoch 10: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 76.32it/s]


Epoch 10, Training Loss: 0.28965521790087223
Validation Accuracy: 0.75


Epoch 11: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 70.01it/s]


Epoch 11, Training Loss: 0.2615962466225028
Validation Accuracy: 0.79


Epoch 12: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 55.62it/s]


Epoch 12, Training Loss: 0.24786584917455912
Validation Accuracy: 0.79


Epoch 13: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 43.52it/s]


Epoch 13, Training Loss: 0.2222776673734188
Validation Accuracy: 0.78


Epoch 14: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 42.66it/s]


Epoch 14, Training Loss: 0.19434516178444028
Validation Accuracy: 0.75


Epoch 15: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 69.95it/s]


Epoch 15, Training Loss: 0.20066893054172397
Validation Accuracy: 0.77


Epoch 16: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 65.78it/s]


Epoch 16, Training Loss: 0.17452391795814037
Validation Accuracy: 0.79


Epoch 17: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 74.69it/s]


Epoch 17, Training Loss: 0.179461270570755
Validation Accuracy: 0.79


Epoch 18: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 31.03it/s]


Epoch 18, Training Loss: 0.15086807357147336
Validation Accuracy: 0.77


Epoch 19: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 53.80it/s]


Epoch 19, Training Loss: 0.13932127389125526
Validation Accuracy: 0.78


Epoch 20: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 74.31it/s]

Epoch 20, Training Loss: 0.1372630875557661
Validation Accuracy: 0.78
Test Accuracy: 0.87
Confusion Matrix:
[[58  1  0  0]
 [ 0 79  1  7]
 [ 0  2  7  3]
 [ 2 11  1 38]]
Classification Report:
              precision    recall  f1-score   support

     defense       0.97      0.98      0.97        59
      lofted       0.85      0.91      0.88        87
  square_cut       0.78      0.58      0.67        12
       sweep       0.79      0.73      0.76        52

    accuracy                           0.87       210
   macro avg       0.85      0.80      0.82       210
weighted avg       0.86      0.87      0.86       210






In [15]:
# Save model
filepath = "./cricket.pt"
torch.save(model.state_dict(), filepath)

In [16]:
# idx_to_class = {v:k for k,v in class_label_mapping.items()}
# FFMPEG_PATH = "/opt/homebrew/bin/ffmpeg"
# input_file = "/Users/rohitkumar/Documents/fanplayiot/demo/defense_0001.avi"
# frames_dir = "/Users/rohitkumar/Documents/fanplayiot/demo/frames/"
# os.makedirs(frames_dir, exist_ok=True)

# video_name = os.path.splitext(input_file.split("/")[-1])[-2]
# output_pattern = os.path.join(frames_dir, f"{video_name}_%04d.jpg")
# ffmpeg_command = f"{FFMPEG_PATH} -i {input_file} -vf fps=1 {output_pattern} -loglevel quiet"

# try:
#     subprocess.run(ffmpeg_command, shell=True, check=True)
#     print(f"Processed {input_file}.")
# except subprocess.CalledProcessError:
#     print(f"Failed to process video: {input_file}")

# image_paths = []
# for root, dirs, files in os.walk(frames_dir):
#     for file in files:
#         if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
#             image_paths.append(os.path.join(root, file))
            
# inference_images = [Image.open(path).convert("RGB") for path in image_paths]
# tokens = processor(text=None, images=inference_images, return_tensors="pt").to(device)
# inference_embeddings = clip_model.get_image_features(**tokens)
# with torch.no_grad():
#     output = model(inference_embeddings.unsqueeze(0))
#     idx = output.argmax()
#     print(idx_to_class[idx.item()])

# try:
#     # Attempt to delete the folder and its contents
#     shutil.rmtree(frames_dir)
#     print(f"Folder '{frames_dir}' and its contents have been deleted.")
# except Exception as e:
#     print(f"Error while deleting folder '{frames_dir}': {e}")