# Preprocessing image data for ASL Training

The following code manages the retrieval, extraction and preprocessing of images used to to train and test the ASL translator. The process uses MediaPipe from Google to overlay a geometric skeleton over images of hands and generate coordinates for landmarks on the hand. This data is being transformed and saved as a CSV for later use in the ASL Translator Project.

## Retrieve the files from Kaggle

To retrieve the file from Kaggle, we can use curl. The code below demonstrates how we can download the files directly from Kaggle and unzip it ino a data directoy in the content folder.

In [1]:
# Download the ASL alphabet
! curl -L -o /content/archive.zip https://www.kaggle.com/api/v1/datasets/download/grassknoted/asl-alphabet

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 1049M  100 1049M    0     0  74.2M      0  0:00:14  0:00:14 --:--:-- 60.3M


In [2]:
# unzip the archive file
! unzip -q /content/archive.zip -d /content/data

## Setup the Environment for processing the image data

In [3]:
# let's install mediapipe
! pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.1-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.18-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.1/36.1 MB[0m [31m33.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.1-py3-none-any.whl (32 kB)
Installing collected packages: sounddevice, mediapipe
Successfully installed mediapipe-0.10.18 sounddevice-0.5.1


In [4]:
import os

import mediapipe as mp # install mediapipe
import cv2 # install OpenCV library
import matplotlib.pyplot as plt
#from google.colab import drive

import csv
import copy
import argparse
import itertools

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
# Let's set up some constants we will be using from hear on out
data_dir = '/content/data'
csv_path = '/content/drive/MyDrive/landmarks_v2.csv'

In [7]:
# import the hands mediapipe model
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, max_num_hands=1, min_detection_confidence=0.35)

In [8]:
# Define the function to get the landmarks
def calc_landmark_list(image, landmarks):
    image_width, image_height = image.shape[1], image.shape[0]

    landmark_point = []

    # Keypoint
    for _, landmark in enumerate(landmarks.landmark):
        landmark_x = min(int(landmark.x * image_width), image_width - 1)
        landmark_y = min(int(landmark.y * image_height), image_height - 1)
        # We will not be using the z coordinate for our project.
        # landmark_z = landmark.z

        landmark_point.append([landmark_x, landmark_y])

    return landmark_point

In [9]:
# Preprocessing of landmarks. Basically normalization, min-max
# My min max normalization
def pre_process_landmark(landmark_list):
    temp_landmark_list = copy.deepcopy(landmark_list)

    aux_x = []
    aux_y = []

    # Convert to relative coordinates
    for landmark_point in temp_landmark_list:
        current_x, current_y = landmark_point[0], landmark_point[1]
        aux_x.append(current_x)
        aux_y.append(current_y)


    min_x, min_y = min(aux_x), min(aux_y)
    max_x, max_y = max(aux_x), max(aux_y)

    def normalize_(n, min_value, max_value):
        return (n - min_value) / (max_value - min_value)

    x_normalized = list(map(lambda x: normalize_(x, min_x, max_x), aux_x))
    y_normalized = list(map(lambda x: normalize_(x, min_y, max_y), aux_y))

    #print(x_normalized)
    #print(y_normalized)
    final_list = [cor for pair in zip(x_normalized, y_normalized) for cor in pair]

    return final_list

In [10]:
def logging_csv(label, landmark_list,csv_path):
    with open(csv_path, 'a', newline="") as f:
        writer = csv.writer(f)
        writer.writerow([ *landmark_list, label])
    return


## Generate the CSV file

In [11]:
# only run this once because this creates a new row with headers
header = [
    "1_X", "1_Y", "2_X", "2_Y", "3_X", "3_Y", "4_X", "4_Y", "5_X", "5_Y",
    "6_X", "6_Y", "7_X", "7_Y", "8_X", "8_Y", "9_X", "9_Y", "10_X", "10_Y",
    "11_X", "11_Y", "12_X", "12_Y", "13_X", "13_Y", "14_X", "14_Y", "15_X", "15_Y",
    "16_X", "16_Y", "17_X", "17_Y", "18_X", "18_Y", "19_X", "19_Y", "20_X", "20_Y",
    "21_X", "21_Y", "Label"
]

with open(csv_path, 'a', newline="") as f:
    writer = csv.writer(f)
    writer.writerow(header)



In [18]:
# ---- now with the good part ----


# for img_path in os.listdir(os.path.join(data_dir, dir_)): # open the data directory
for root, dirs, files in os.walk(data_dir):
  for img_path in files:
    # print(len(files))
    # break
    label = img_path[0]
    # print(img_path, label)
    # raise SystemExit()
    img = cv2.imread(os.path.join(root, img_path))
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)


    result = hands.process(img_rgb) # get landmarks

    if result.multi_hand_landmarks:
      for hand_landmarks in result.multi_hand_landmarks:

        landmark_list = calc_landmark_list(img_rgb, hand_landmarks)
        #print(landmark_list)

        pre_processed_landmark = pre_process_landmark(landmark_list) # preprocess landmarks
        #print(pre_processed_landmark)

        logging_csv(label, pre_processed_landmark, csv_path) # write a csv