In [1]:
#Most of the data engineering is not there, these are just some of the main functions used
#import the necessary libraries.
#utilizes mediapipe so it needs to be installed in case it is not. Requires a somewhat updated version of python (3.7 to 3.10)
import cv2
import os
import numpy as np
import mediapipe as mp

import csv

In [2]:
#function to draw landmarks on the images
#takes in input the rgb image and the detected hand landmarks, and outputs the modified image

from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

MARGIN = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

def draw_landmarks_on_image(rgb_image, detection_result):
  hand_landmarks_list = detection_result.hand_landmarks
  handedness_list = detection_result.handedness
  annotated_image = np.copy(rgb_image)

  # Loop through the detected hands to visualize.
  for idx in range(len(hand_landmarks_list)):
    hand_landmarks = hand_landmarks_list[idx]
    handedness = handedness_list[idx]

    # Draw the hand landmarks.
    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    hand_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      hand_landmarks_proto,
      solutions.hands.HAND_CONNECTIONS,
      solutions.drawing_styles.get_default_hand_landmarks_style(),
      solutions.drawing_styles.get_default_hand_connections_style())

    # Get the top left corner of the detected hand's bounding box.
    height, width, _ = annotated_image.shape
    x_coordinates = [landmark.x for landmark in hand_landmarks]
    y_coordinates = [landmark.y for landmark in hand_landmarks]
    text_x = int(min(x_coordinates) * width)
    text_y = int(min(y_coordinates) * height) - MARGIN

    # Draw handedness (left or right hand) on the image.
    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

  return annotated_image

In [3]:
#Need to have the mediapipe library installed
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

In [4]:
#downloads the mediapipe hand landmarker detector i will be using
import requests

url = "https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task"
response = requests.get(url)

with open("hand_landmarker.task", "wb") as f:
    f.write(response.content)

print("File downloaded successfully.")

File downloaded successfully.


In [5]:
#Creates an Hand Landmarker object
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

In [6]:
#function to process a video and extract data from it. The video will be processed by the mediapipe hand landmarker 
#the data extracted will be the positions of the hand landmarks, and will be wrote in a csv file. The data format is the following:
# [0 (right hand) or 1 (left hand), x_1,...,x_n, y_1,...,y_n, z_1...,z_n, y].
# where x_k, y_k, z_k are the coordinates of the k-th hand landmark detected, and y is the label i assigned to every gesture i am interested in recognizing.
#More info in the readme


#Inputs: path of the video, and the name of the output file it will write the dataset on
def video_to_data(video_path,filename):
# Verify the video path exists
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"The video path {video_path} does not exist.")

# Load the video
    #the file names of the videos specify the label: it's the last number before .mp4
    label = video_path[-5]
    cap = cv2.VideoCapture(video_path)
    mani=0
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        # Applico il detector di mediapipe e ottengo i risultati
        image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
        detection_result = detector.detect(image)

        hand_landmarks_list = detection_result.hand_landmarks
        handedness_list = detection_result.handedness
        #extracts the data and places it into lists
        for idx in range(len(hand_landmarks_list)):
            hand_landmarks = hand_landmarks_list[idx]
            handedness = handedness_list[idx][0].index
            #the following command ignores the left hand
            #if handedness == 1:
                #break
            x_coordinates = [landmark.x for landmark in hand_landmarks]
            y_coordinates = [landmark.y for landmark in hand_landmarks]
            z_coordinates = [landmark.z for landmark in hand_landmarks]
            mani+=1
            
            #writes the obtained lists on the csv file
            with open(filename, mode='a', newline='') as file:
                writer = csv.writer(file)
                for idx in range(len(hand_landmarks_list)):
                # Write handedness, vector and label in a row
                    writer.writerow([handedness] + x_coordinates + y_coordinates + z_coordinates + [label])
            

            
        
        frame_count += 1

    cap.release()
    cv2.destroyAllWindows()
    print("finito")
    
    #Check the results
    #print(mani)
    #print(frame_count)


In [8]:
#Repeats the last function on multiple files in a folder, to create a database
def videos_to_data(folder_path, output_csv):
    video_files = [f for f in os.listdir(folder_path) if f.endswith('.mp4')]  # List of all .mp4 videos
    
    for video_file in video_files:
        video_path = os.path.join(folder_path, video_file)
        video_to_data(video_path, output_csv)

In [27]:
#Used this to generate the database
videos_to_data('D:/progetto_video/video_ultimi', 'D:\progetto_video\dataset\dataset_tutto2.csv')

  videos_to_data('D:/progetto_video/video_ultimi', 'D:\progetto_video\dataset\dataset_tutto2.csv')


finito
finito
finito


In [18]:
#creates the list of all the labels to check the dataset distribution
import csv
with open('D:\progetto_video\dataset\dataset_nuovo_nolabel.csv', 'r') as file:
    reader = csv.reader(file)
    lista = []
    for row in reader:
        # Each row is a list of values (strings)
        lista.append(row[-1])


  with open('D:\progetto_video\dataset\dataset_nuovo_nolabel.csv', 'r') as file:


In [21]:
#counts the distribution of each label in the dataset
import numpy as np
lista = [int(i) for i in lista]
lista = np.array(lista)
labels = []
for i in range(1,8):
    labels.append(np.sum(lista==i))

In [22]:
print(labels)

[2248, 2278, 2780, 2776, 2652, 2989, 2034]


In [9]:
#various code used to edit the dataset, as removing one label and each of its data and changing one label to another.
#keeping this just in case

import pandas as pd

# Load the CSV file
df = pd.read_csv('D:\progetto_video\dataset\dataset.csv')

# Define the label you want to remove and the label to change
label_to_remove = '7'
old_label = 8
new_label = 7

# Remove rows where the last column matches the label_to_remove
df_cleaned = df[df.iloc[:, -1] != label_to_remove]

# Replace the old label with the new label in the last column
df_cleaned.iloc[:, -1] = df_cleaned.iloc[:, -1].replace(old_label, new_label)

# Overwrite the original file with the cleaned and updated data
df_cleaned.to_csv('D:\progetto_video\dataset\dataset.csv', index=False)

print("Rows with the label removed and label 8 changed to 7 successfully.")


  df = pd.read_csv('D:\progetto_video\dataset\dataset.csv')
  df_cleaned.to_csv('D:\progetto_video\dataset\dataset.csv', index=False)


Rows with the label removed and label 8 changed to 7 successfully.


In [21]:
#again, removes some label

df = pd.read_csv('D:\progetto_video\dataset\dataset.csv')
df_cleaned = df[df.iloc[:, -1] != label_to_remove]
df_cleaned.to_csv('D:\progetto_video\dataset\dataset.csv', index=False)

  df = pd.read_csv('D:\progetto_video\dataset\dataset.csv')
  df_cleaned.to_csv('D:\progetto_video\dataset\dataset.csv', index=False)


In [26]:
#function to merge two csv files

import csv

def append_csv(file1, file2, output_file):
    # Open the first file for reading
    with open(file1, 'r', newline='') as f1:
        reader1 = list(csv.reader(f1))
        
        # Open the second file for reading
        with open(file2, 'r', newline='') as f2:
            reader2 = list(csv.reader(f2))
            
            # Check if both CSV files have the same number of columns
            if len(reader1[0]) != len(reader2[0]):
                print("Error: The two CSV files have a different number of columns.")
                return
            
            # Append the rows of the second file to the first file
            merged_data = reader1 + reader2
            
            # Write the merged data to a new CSV file
            with open(output_file, 'w', newline='') as out_file:
                writer = csv.writer(out_file)
                writer.writerows(merged_data)
    
    print(f"Files merged successfully into {output_file}")

# Example usage
file1 = 'D:\progetto_video\dataset\dataset_todo.csv'
file2 = 'D:\progetto_video\dataset\dataset_webcam.csv'
output_file = 'D:\progetto_video\dataset\dataset_tutto.csv'
append_csv(file1, file2, output_file)


  file1 = 'D:\progetto_video\dataset\dataset_todo.csv'
  file2 = 'D:\progetto_video\dataset\dataset_webcam.csv'
  output_file = 'D:\progetto_video\dataset\dataset_tutto.csv'


Files merged successfully into D:\progetto_video\dataset\dataset_tutto.csv
