In [None]:
# RUN ONCE

import kagglehub

# Download latest version
path = kagglehub.dataset_download("debashishsau/aslamerican-sign-language-aplhabet-dataset")

print("Path to dataset files:", path)

# move the dataset to a new location

import shutil
shutil.move(path, "debashishsau_dataset")

In [None]:
# make sure you have CUDA available
import torch
print("CUDA available:", torch.cuda.is_available())
print(f"PyTorch CUDA version: {torch.version.cuda}")
print(f"PyTorch built with CUDA: {torch.version.cuda}")

In [None]:
### NOTICE 
# 1. clone https://github.com/guglielmocamporese/hands-segmentation-pytorch 
# 2. run ./scripts/download_model_checkpoint.sh from hands directory in bash. Make sure to have the conda environment activated, with gdown installed.
# if you don't do this, you'll get a bunch of blank images.

# !python hands/main.py --mode predict --data_base_path debashishsau_dataset/ASL_Alphabet_Dataset/asl_alphabet_train/A --model_checkpoint "hands/checkpoint/checkpoint.ckpt" --model_pretrained

### NOW YOU MUST SEGMENT THE DATA USING THE HANDS MODEL AND SAVE THE SEGMENTED DATA TO A NEW LOCATION.
# now loop this for every letter
import os
from tqdm import tqdm

for letter in tqdm(os.listdir("debashishsau_dataset/ASL_Alphabet_Dataset/asl_alphabet_train")):
    path = os.path.join("debashishsau_dataset/ASL_Alphabet_Dataset/asl_alphabet_train/", letter)
    !python hands/main.py --mode predict --data_base_path $path --model_checkpoint "hands/checkpoint/checkpoint.ckpt" --model_pretrained

In [None]:
# take the regular data and the segmented data, and make overlap so that the pixels that are white in the segmented data are kept as normal in the regular data.
# and the pixels that are black in the segmented data are set to black in the regular data.
# note that the regular data is for ex xxx.jpg, and the segmented data is for ex xxx_pred.png, in the same location.

import os
import cv2
import numpy as np

def overlap_images(data_path, segmented_path, output_path):
    for root, dirs, files in os.walk(data_path):
        for file in files:
            if file.endswith(".jpg"):
                file_name = file.replace(".jpg", "")
                img = cv2.imread(os.path.join(root, file))
                seg = cv2.imread(os.path.join(segmented_path, file_name + "-pred.png"))
              
                # cv2.imshow('Result', seg)
                # cv2.waitKey(0)
                # cv2.destroyAllWindows()
                mask = np.all(seg == 0, axis=-1)
                
                processed_img = img.copy()
                processed_img[mask] = [0,0,0]
                
                # if output dir doesn't exist yet, create it
                if not os.path.exists(output_path):
                    os.makedirs(output_path)
                    
                    
                cv2.imwrite(os.path.join(output_path, file), processed_img)
                
               
               
data_path = "debashishsau_dataset/ASL_Alphabet_Dataset/asl_alphabet_train/" 
for letter in os.listdir(data_path):
    overlap_images(os.path.join(data_path, letter), os.path.join(data_path, letter), os.path.join(data_path, letter + "_processed"))

In [None]:
# move the processed data to a new location, only move folders that end with _processed

import shutil
data_path = "debashishsau_dataset/ASL_Alphabet_Dataset/asl_alphabet_train/"
for letter in os.listdir(data_path):
    if letter.endswith("_processed"):
        shutil.move(os.path.join(data_path, letter), "debashishsau_dataset_processed")



In [None]:
# split into training and validation
import os
import shutil
import random

data_path = "debashishsau_dataset_processed"
train_path = "debashishsau_dataset_processed_train"
val_path = "debashishsau_dataset_processed_val"

if not os.path.exists(train_path):
    os.makedirs(train_path)
if not os.path.exists(val_path):
    os.makedirs(val_path)
    
for letter in os.listdir(data_path):
    if not letter.startswith("."):
        files = os.listdir(os.path.join(data_path, letter))
        random.shuffle(files)
        split = int(0.8 * len(files))
        train_files = files[:split]
        val_files = files[split:]
        
        for file in train_files:
            if not os.path.exists(os.path.join(train_path, letter)):
                os.makedirs(os.path.join(train_path, letter))
            shutil.move(os.path.join(data_path, letter, file), os.path.join(train_path, letter, file))
        for file in val_files:
            if not os.path.exists(os.path.join(val_path, letter)):
                os.makedirs(os.path.join(val_path, letter))
            shutil.move(os.path.join(data_path, letter, file), os.path.join(val_path, letter, file))

In [None]:
# delete useless images that are more than 90% black. Don't forget to also delete form the val folder by changing the data_dir

import os
import cv2
import numpy as np

data_dir = "debashishsau_dataset_processed_train"
# data_dir = "debashishsau_dataset_processed_val" # then delete from val

# if image is more than 90% black, useless

useless = 0
total = 0
for letter in os.listdir(data_dir):
    for file in os.listdir(os.path.join(data_dir, letter)):
        # if file is a nothing instance, skip
        if letter == "nothing_processed":
            continue
        img = cv2.imread(os.path.join(data_dir, letter, file), cv2.IMREAD_GRAYSCALE)
        total += 1
         # Calculate the total number of pixels
        total_pixels = img.size

        # Count the number of black pixels (intensity value of 0)
        black_pixels = np.sum(img == 0)

        # Calculate the percentage of black pixels
        black_percentage = (black_pixels / total_pixels) * 100

        if black_percentage > 90:
            # # 1/200 chance of showing image
            # if np.random.randint(0, 200) == 0:
            #     cv2.imshow('Result', img)
            #     cv2.waitKey(0)
            #     cv2.destroyAllWindows()
            useless += 1
            os.remove(os.path.join(data_dir, letter, file)) # uncomment to delete
        
print(f"Useless images removed: {useless}")
print(f"Total images: {total}")
print(f"Percentage useless: {useless / total * 100}%")


In [None]:
# in case you want to check the black percentage of a specific image

specific_file_path= "debashishsau_dataset_processed_val\E_processed\E (77).jpg"

# test black % of this image
img = cv2.imread(specific_file_path, cv2.IMREAD_GRAYSCALE)
total_pixels = img.size
black_pixels = np.sum(img == 0)
black_percentage = (black_pixels / total_pixels) * 100
print(f"Black percentage of {specific_file_path}: {black_percentage}")


In [None]:
# By this stage your preprocessed data should be ready for training in the folder 
# debashishsau_dataset_processed_train and debashishsau_dataset_processed_val

# transfer learing from resnet18, 512 to one layer of 256 neurons, to 128, and then 29 neurons for the 29 classes
#once your model is done, move the model and plot file to the models and plots dirs respectively so you don't accidentally overwrite them later

import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import time
import os

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = "debashishsau_dataset_processed"

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir + '_' + x), data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4) for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

training_losses = []
validation_losses = []


def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            if phase == 'train':
                training_losses.append(epoch_loss)
            else:
                validation_losses.append(epoch_loss)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model

model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
print(num_ftrs)

# 512 -> 256 -> 128 -> 29

model_ft.fc = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 29),
)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                          num_epochs=22)



# save the model
torch.save(model_ft, "asl_model_256_128.pth")

# show plot of training and validation loss
import matplotlib.pyplot as plt

#save plot to file
plt.plot(training_losses, label="Training Loss")
plt.plot(validation_losses, label="Validation Loss")
plt.legend()
plt.savefig("loss_plot.png")
plt.show()

In [None]:
# segment the test data
# remember test folder is flat, no subfolders
# Download any dataset from the internet and put it in a folder of your choosing.
# make sure to change the name of the folder in the test_dir variable below
# make sure the files are flat in the folder, no subfolders, and that the files are .jpg
# also make sure the first letter of the file is the letter it represents

import os

test_dir = "misc_test"
 
for file in os.listdir(test_dir):
    if file.endswith(".jpg"):
        !python hands/main.py --mode predict --data_base_path $test_dir --model_checkpoint "hands/checkpoint/checkpoint.ckpt" --model_pretrained
            


In [None]:
# do segmentation preprocessing on the test data as well
# again don't forget to change the dir name as needed
# overlap images in test that end in -pred.png with the original images
import os
import cv2
import numpy as np

def overlap_images(data_path, segmented_path, output_path):
    # files are flat in test dir
    
    for file in os.listdir(data_path):
        if file.endswith(".jpg"):
            file_name = file.replace(".jpg", "")
            img = cv2.imread(os.path.join(data_path, file))
            seg = cv2.imread(os.path.join(segmented_path, file_name + "-pred.png"))
            # cv2.imshow('Result', seg)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
            mask = np.all(seg == 0, axis=-1)

            processed_img = img.copy()
            processed_img[mask] = [0,0,0]

            # if output dir doesn't exist yet, create it
            if not os.path.exists(output_path):
                os.makedirs(output_path)


            cv2.imwrite(os.path.join(output_path, file), processed_img)
            
test_dir = "roboflow_test"
# test_dir = "misc_test"
overlap_images(test_dir, test_dir, "test_processed")

In [None]:
# remove images that are more than 90% black from the test data
import os
import cv2
import numpy as np

data_dir = "test_processed"

useless = 0
total = 0

for file in os.listdir(data_dir):
    img = cv2.imread(os.path.join(data_dir, file), cv2.IMREAD_GRAYSCALE)
    total += 1
     # Calculate the total number of pixels
    total_pixels = img.size

    # Count the number of black pixels (intensity value of 0)
    black_pixels = np.sum(img == 0)

    # Calculate the percentage of black pixels
    black_percentage = (black_pixels / total_pixels) * 100

    if black_percentage > 90:
        # # 1/200 chance of showing image
        # if np.random.randint(0, 200) == 0:
        #     cv2.imshow('Result', img)
        #     cv2.waitKey(0)
        #     cv2.destroyAllWindows()
        useless += 1
        os.remove(os.path.join(data_dir, file)) # uncomment to delete
        
print(f"Useless images removed: {useless}")
print(f"Total images: {total}")
print(f"Percentage useless: {useless / total * 100}%")


In [None]:
# test the images in the test_processed folder
# uncomment the cv2.imshow lines to see the images with the prediction and actual letter
# you can also change that if statement to test a specific letter

import torch
import torchvision.transforms as transforms
from PIL import Image
import os
import cv2
import numpy as np

model = torch.load("models/asl_model_256.pth")
model.eval()

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

data_path = "test_processed"
class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']
correct = 0
total = 0
for file in os.listdir(data_path):
    img = Image.open(os.path.join(data_path, file))
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img = transform(img)
    img = img.unsqueeze(0)
    output = model(img.cuda())
    _, pred = torch.max(output, 1)
    
    # get the letter from the file name
    letter = file[0]
    
    if class_names[pred] == letter:
        correct += 1
    
    # show image with prediction in image
    # if (letter == 'D'):
    #     img = cv2.imread(os.path.join(data_path, file))
    #     cv2.putText(img, 'pred ' + class_names[pred], (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
    #     cv2.putText(img, 'actual ' + letter, (200, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
    #     cv2.imshow('Result', img)
    #     cv2.waitKey(0)
    #     cv2.destroyAllWindows()
        
    total += 1
    
print(f"Accuracy: {correct / total * 100}%")
    
    
    # cv2.imshow('Result', cv2.imread(os.path.join(data_path, file)))
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()
    

In [None]:
# test the model
# show webcam feed and predict the sign language letter
# this does not work well since the segmentation model is rather slow

import cv2
import torch
import numpy as np
from torchvision import transforms
from PIL import Image
import subprocess
import os

model = torch.load("models/asl_model_256.pth")
model.eval()

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'del', 'nothing', 'space']

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    
    img = Image.fromarray(frame)
    img = transform(img)    
    img = img.unsqueeze(0)
    
    # persist to webcam folder
    if not os.path.exists("webcam"):
        os.makedirs("webcam")
    
    cv2.imwrite("webcam/webcam.jpg", frame)
    
    result = subprocess.run(
        ["python", "./hands/main.py", "--mode", "predict", "--data_base_path", "webcam", "--model_checkpoint", "hands/checkpoint/checkpoint.ckpt", "--model_pretrained"],  # Command to run the script,
        text=True,               # Ensures the output is captured as a string
        capture_output=True,      # Captures stdout and stderr
    )

    segmented_img_data = result.stdout
    data_path = "webcam"

    # overlap segmented webcam with original webcam
    for file in os.listdir(data_path):
        if file.endswith(".jpg"):
            file_name = file.replace(".jpg", "")
            img = cv2.imread(os.path.join(data_path, file))
            
            # cv2.imshow('Result', seg)
            # cv2.waitKey(0)
            # cv2.destroyAllWindows()
            mask = np.all(segmented_img_data == 0, axis=-1)

            processed_img = img.copy()
            processed_img[mask] = [0,0,0]
    
    processed_img = Image.fromarray(processed_img)
    processed_img = transform(processed_img)
    processed_img = processed_img.unsqueeze(0)
 
    with torch.no_grad():
        outputs = model(processed_img.cuda())
        _, preds = torch.max(outputs, 1)
        print(class_names[preds.item()])
        
        cv2.putText(frame, class_names[preds.item()], (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
        cv2.imshow('Result', frame)
        
        
        
cap.release()
cv2.destroyAllWindows()



    

In [None]:
# edit this as needed if you inadvetently created masks

delete_dir = "blahblahblah"

import os
for filename in os.listdir(delete_dir):
    if 'pred' in filename:
        os.remove(f'{delete_dir}/{filename}')
    