# Getting the CSV Files Without Any Empty Directory 

In [None]:
import pandas as pd

data = pd.read_csv('./20bn-jester-v1/annotations/jester-v1-validation.csv', delimiter=';', header=None)


classes_to_select = ['Swiping Left', 'Swiping Right', 'Swiping Down', 'Swiping Up', 'Doing other things']  
class_column = 1 
filtered_data = data[data[class_column].isin(classes_to_select)]


print(filtered_data)


print(filtered_data.shape)
filtered_data.to_csv('5_class_validation_data.csv',header=False,index=False,sep=';')

In [None]:
data = pd.read_csv("./CVND---Gesture-Recognition/20bn-jester-v1/annotations/5_class_training_data.csv")
print(data.shape)
print(data.head())

In [None]:
import os

root_folder = "./CVND---Gesture-Recognition/data2/20bn-jester-v1"

def check_empty_folders(root):
    empty_folders = []
    for dirpath, dirnames, filenames in os.walk(root):
        for dirname in dirnames:
            folder_path = os.path.join(dirpath, dirname)
            if not os.listdir(folder_path):
                print(f"The folder '{folder_path}' is empty.")
                empty_folders.append(dirname) 
            else:
               
                continue
    return empty_folders

empty_folders_list = check_empty_folders(root_folder)
print("Empty folders:", empty_folders_list)


In [None]:
import pickle

pickle_file = "empty_folders_list.pickle"
with open(pickle_file, 'wb') as f:
    pickle.dump(empty_folders_list, f)

print(f"Empty folders list saved to '{pickle_file}'.")

In [None]:
import pickle

pickle_file_path = "./CVND---Gesture-Recognition/empty_folders_list.pickle"
with open(pickle_file_path, 'rb') as f:
    empty_folders_list = pickle.load(f)
print("Empty folders list:", empty_folders_list)

In [None]:
empty_folders_list

In [None]:
csv_path = './20bn-jester-v1/annotations/5_class_training_data.csv'
df = pd.read_csv(csv_path, header=None,delimiter=';')
filtered_df = df[~df[0].isin(empty_folders_list)]
filtered_csv_path = '5_class_training_data_filtered.csv'
filtered_df.to_csv(filtered_csv_path, index=False, header=False,sep=';')

In [None]:
csv_path = './20bn-jester-v1/annotations/5_class_validation_data.csv'
df = pd.read_csv(csv_path, header=None,delimiter=';')
filtered_df = df[~df[0].isin(empty_folders_list)]
filtered_csv_path = '5_class_valid_data_filtered.csv'
filtered_df.to_csv(filtered_csv_path, index=False, header=False,sep=';')

In [None]:
import pandas as pd
import pickle

with open('your_empty_folders_list_path.pickle', 'rb') as file:
    empty_folders_list = pickle.load(file)
df = pd.read_csv('./20bn-jester-v1/annotations/5classvalid10000.csv', header=None, sep=';')
df[0] = df[0].astype(str) 
filtered_df = df[~df[0].isin(empty_folders_list)]
filtered_df.to_csv('filtered_val_data.csv', index=False, header=False, sep=';')


# Inferencing video on trained model:

In [7]:
!python3 test.py

In [8]:
import torch
import cv2
from torchvision.transforms import Compose, CenterCrop, ToTensor, Normalize
from model import GestureDetection
from PIL import Image
import os

image_folder = './CVND---Gesture-Recognition/data2/20bn-jester-v1/60971'

video_path = './output_video.mp4'

frame_width = 1920  
frame_height = 1080 
fps = 30  

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(video_path, fourcc, fps, (frame_width, frame_height))


files = [os.path.join(image_folder, f) for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f))]
files.sort()


for filename in files:
    img = cv2.imread(filename)
    if img is None:
        continue  
    img = cv2.resize(img, (frame_width, frame_height))  
    out.write(img) 

out.release()
cv2.destroyAllWindows()

print(f'Video saved at {video_path}')

label_mapping_path = './20bn-jester-v1/annotations/jester-v1-labels-quick-testing copy.csv'


def load_label_mapping(label_mapping_path):
    label_mapping = {}
    with open(label_mapping_path, 'r') as f:
        for i, line in enumerate(f):
            label = line.strip()
            label_mapping[i] = label
    return label_mapping

label_mapping = load_label_mapping(label_mapping_path)


def preprocess_frames(frames, transform):
    processed_frames = [transform(Image.fromarray(frame)) for frame in frames]
    frame_stack = torch.stack(processed_frames, dim=1)
    return frame_stack.unsqueeze(0)


model = GestureDetection(num_classes=7)

checkpoint = torch.load('./trainings/jpeg_model/7_classes/v17/checkpoint.pth.tar', map_location='cpu')
state_dict = checkpoint['state_dict'] if 'state_dict' in checkpoint else checkpoint


new_state_dict = {}
for k, v in state_dict.items():
    name = k[7:] if k.startswith('module.') else k 
    new_state_dict[name] = v

model.load_state_dict(new_state_dict, strict=False)
model.eval()


transform = Compose([
    CenterCrop(84),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


def load_label_mapping(label_mapping_path):
    label_mapping = {}
    with open(label_mapping_path, 'r') as f:
        for i, line in enumerate(f):
            label = line.strip()
            label_mapping[i] = label
    return label_mapping

label_mapping = load_label_mapping('./20bn-jester-v1/annotations/jester-v1-labels-quick-testing copy.csv')

video_path = './output_video.mp4'
cap = cv2.VideoCapture(video_path)
frame_sequence = []
sequence_length = 16

fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
out_video_path = './output_predictions_video.mp4'
out = cv2.VideoWriter(out_video_path, fourcc, cap.get(cv2.CAP_PROP_FPS), (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    if len(frame_sequence) < sequence_length:
        frame_sequence.append(frame_rgb)
        continue
    else:
        frame_sequence.pop(0)
        frame_sequence.append(frame_rgb)


    frame_processed = preprocess_frames(frame_sequence, transform)
    with torch.no_grad():
        outputs = model(frame_processed)
    _, predicted = torch.max(outputs.data, 1)
    predicted_gesture = label_mapping[predicted.item()]

    cv2.putText(frame, predicted_gesture, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    cv2.imshow('Video - Gesture Recognition', frame)
    out.write(frame) 

    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()
print("Video with predictions saved to:", out_video_path)


OpenCV: FFMPEG: tag 0x44495658/'XVID' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Video saved at /Users/atharvamusale/Downloads/DL_Project/output_video.mp4




Video with predictions saved to: /Users/atharvamusale/Downloads/DL_Project/output_predictions_video.mp4
