In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from PIL import Image
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator , img_to_array, load_img
from tensorflow.keras.optimizers import RMSprop
from keras.applications.vgg16 import preprocess_input

In [2]:
#run this to install mediapipe for the first time
#!pip install mediapipe

In [3]:
import cv2
import mediapipe as mp
import math


class HandDetector:
    """
    Finds Hands using the mediapipe library. Exports the landmarks
    in pixel format. Adds extra functionalities like finding how
    many fingers are up or the distance between two fingers. Also
    provides bounding box info of the hand found.
    """

    def __init__(self, mode=False, maxHands=2, detectionCon=0.5, minTrackCon=0.5):
        """
        :param mode: In static mode, detection is done on each image: slower
        :param maxHands: Maximum number of hands to detect
        :param detectionCon: Minimum Detection Confidence Threshold
        :param minTrackCon: Minimum Tracking Confidence Threshold
        """
        self.mode = mode
        self.maxHands = maxHands
        self.detectionCon = detectionCon
        self.minTrackCon = minTrackCon

        self.mpHands = mp.solutions.hands
        self.hands = self.mpHands.Hands(static_image_mode=self.mode, max_num_hands=self.maxHands,
                                        min_detection_confidence=self.detectionCon,
                                        min_tracking_confidence=self.minTrackCon)
        self.mpDraw = mp.solutions.drawing_utils
        self.tipIds = [4, 8, 12, 16, 20]
        self.fingers = []
        self.lmList = []

    def findHands(self, img, draw=True, flipType=True):
        """
        Finds hands in a BGR image.
        :param img: Image to find the hands in.
        :param draw: Flag to draw the output on the image.
        :return: Image with or without drawings
        """
        #blank_image = np.zeros((1080,1080,3), np.uint8)
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        self.results = self.hands.process(imgRGB)
        allHands = []
        h, w, c = img.shape
        if self.results.multi_hand_landmarks:
            for handType, handLms in zip(self.results.multi_handedness, self.results.multi_hand_landmarks):
                myHand = {}
                ## lmList
                mylmList = []
                xList = []
                yList = []
                for id, lm in enumerate(handLms.landmark):
                    px, py, pz = int(lm.x * w), int(lm.y * h), int(lm.z * w)
                    mylmList.append([px, py, pz])
                    xList.append(px)
                    yList.append(py)

                ## bbox
                xmin, xmax = min(xList), max(xList)
                ymin, ymax = min(yList), max(yList)
                boxW, boxH = xmax - xmin, ymax - ymin
                bbox = xmin, ymin, boxW, boxH
                cx, cy = bbox[0] + (bbox[2] // 2), \
                         bbox[1] + (bbox[3] // 2)

                myHand["lmList"] = mylmList
                myHand["bbox"] = bbox
                myHand["center"] = (cx, cy)

                if flipType:
                    if handType.classification[0].label == "Right":
                        myHand["type"] = "Left"
                    else:
                        myHand["type"] = "Right"
                else:
                    myHand["type"] = handType.classification[0].label
                allHands.append(myHand)

                ## draw
                if draw:
                    self.mpDraw.draw_landmarks(img, handLms,
                                               self.mpHands.HAND_CONNECTIONS)
#                     cv2.rectangle(img, (bbox[0] - 20, bbox[1] - 20),
#                                   (bbox[0] + bbox[2] + 20, bbox[1] + bbox[3] + 20),
#                                   (255, 0, 255), 2)
#                     cv2.putText(img, myHand["type"], (bbox[0] - 30, bbox[1] - 30), cv2.FONT_HERSHEY_PLAIN,
#                                 2, (255, 0, 255), 2)
        if draw:
            return allHands,img
        else:
            return allHands

    def fingersUp(self, myHand):
        """
        Finds how many fingers are open and returns in a list.
        Considers left and right hands separately
        :return: List of which fingers are up
        """
        myHandType = myHand["type"]
        myLmList = myHand["lmList"]
        if self.results.multi_hand_landmarks:
            fingers = []
            # Thumb
            if myHandType == "Right":
                if myLmList[self.tipIds[0]][0] > myLmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)
            else:
                if myLmList[self.tipIds[0]][0] < myLmList[self.tipIds[0] - 1][0]:
                    fingers.append(1)
                else:
                    fingers.append(0)

            # 4 Fingers
            for id in range(1, 5):
                if myLmList[self.tipIds[id]][1] < myLmList[self.tipIds[id] - 2][1]:
                    fingers.append(1)
                else:
                    fingers.append(0)
        return fingers

    def findDistance(self, p1, p2, img=None):
        """
        Find the distance between two landmarks based on their
        index numbers.
        :param p1: Point1
        :param p2: Point2
        :param img: Image to draw on.
        :param draw: Flag to draw the output on the image.
        :return: Distance between the points
                 Image with output drawn
                 Line information
        """

        x1, y1 = p1
        x2, y2 = p2
        cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
        length = math.hypot(x2 - x1, y2 - y1)
        info = (x1, y1, x2, y2, cx, cy)
        if img is not None:
            cv2.circle(img, (x1, y1), 15, (255, 0, 255), cv2.FILLED)
            cv2.circle(img, (x2, y2), 15, (255, 0, 255), cv2.FILLED)
            cv2.line(img, (x1, y1), (x2, y2), (255, 0, 255), 3)
            cv2.circle(img, (cx, cy), 15, (255, 0, 255), cv2.FILLED)
            return length, info, img
        else:
            return length, info

# Train Validation Split

In [3]:
#train = ImageDataGenerator(rescale = 1./255
#)
##Normalizing the inputs
#validation = ImageDataGenerator(rescale = 1./255)
#
##Splitting the dataset into trian and validation
#train_dataset = train.flow_from_directory('C:/graduationProject2023/Dataset/trainSet',
#                                         target_size = (350,350),
#                                         batch_size = 32,
#                                         class_mode = "categorical")
#validation_dataset = validation.flow_from_directory('C:/graduationProject2023/croppedDataset/validSet',
#                                                   target_size = (350,350),
#                                                   batch_size = 32,
#                                                   class_mode = "categorical")
#train_dataset.class_indices
#dic = list(train_dataset.class_indices)
#dic

Found 900 images belonging to 15 classes.
Found 180 images belonging to 9 classes.


['drink',
 'food',
 'full',
 'have',
 'hello',
 'i',
 'i love you',
 'police',
 'prefer',
 'shirt',
 'telephone',
 'water',
 'wrong',
 'yes',
 'you']

# Model Load

In [4]:
#Loading the model
#model = load_model("C:/graduationProject2023/model/VGG16_Augmented1.h5")

## Real time capture

In [None]:
import cv2
import numpy as np
import time
import math

#Receiving input from live camera feed
cap = cv2.VideoCapture(0)
detector = HandDetector(maxHands=1)
offset = 10
imgSize = 350

word = ""
oldWord = ""

#Setting the prediction to 5 frames per second
frame_rate = 5
prev = 0

labels = []
word = {}

#Counter for saving dataset collection
counter = 0
result = 0

while True:
    #Calculating the framerate
    time_elapsed = time.time() - prev
    res, image = cap.read()
    try:
        success, img = cap.read()
        imgOutput = img.copy()
        hands, img = detector.findHands(img)
        #If hands are detected using MediaPipe
        if hands:
            #Since we only have 1 hand, we'll be using hands[0] not hands[1]
            hand = hands[0]
            #Setting the x and y from the bounding box as well as the width and height
            x, y, w, h = hand['bbox']
            imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255
            #Cropping to the hand
            imgCrop = img[y - offset:y + h + offset, x - offset:x + w + offset]
            imgCropShape = imgCrop.shape
            aspectRatio = h / w
            #Checking if the hand is vertical or horizontal
            #and filling the empty spaces with white
            if aspectRatio > 1:
                k = imgSize / h
                wCal = math.ceil(k * w)
                imgResize = cv2.resize(imgCrop, (wCal, imgSize))
                imgResizeShape = imgResize.shape
                wGap = math.ceil((imgSize - wCal) / 2)
                imgWhite[:, wGap:wCal + wGap] = imgResize
            else:
                k = imgSize / w
                hCal = math.ceil(k * h)
                imgResize = cv2.resize(imgCrop, (imgSize, hCal))
                imgResizeShape = imgResize.shape
                hGap = math.ceil((imgSize - hCal) / 2)
                imgWhite[hGap:hCal + hGap, :] = imgResize
            #Labels are stored in labels list
            imgWhiteCopy = imgWhite.copy()
            #Necessary processing for the image
            imgWhite = img_to_array(imgWhite)
            imgWhite = imgWhite.reshape((1, imgWhite.shape[0], imgWhite.shape[1], imgWhite.shape[2]))
            imgWhite = preprocess_input(imgWhite)
            #Checking to see if the time between frames has passed
            if time_elapsed > 1./frame_rate:
                prev = time.time()
                #result = model.predict(imgWhite)
                #word = dic[result.argmax()]
                oldWord = word
                #result = result * 100000000
                #maxVal = (result[0].max()/sum(result[0])) * 100
                maxVal = 100
                #Checking to see if the prediction confidence is greater than 99.9999999
                if(maxVal > 99.9999999):
                    #Putting the result on the box
                    #cv2.putText(imgOutput, dic[result.argmax()],(x,y-20), cv2.FONT_HERSHEY_DUPLEX,2,(255,0,255),2)
                    cv2.imshow(f"ImageCrop", imgCrop)
                else:
                    #Putting an empty prediction
                    #cv2.putText(imgOutput, "",(x,y-20), cv2.FONT_HERSHEY_DUPLEX,2,(255,0,255),2)
                    cv2.imshow(f"ImageCrop", imgCrop)
            else:
                cv2.putText(imgOutput, word,(x,y-20), cv2.FONT_HERSHEY_DUPLEX,2,(255,0,255),2)
            #cv2.rectangle(imgOutput, (x-offset, y-offset),
                         # (x + w+offset, y + h+offset), (255, 0, 255), 4)
    #            cv2.putText(imgOutput, dic[result.argmax()],(x,y-20), cv2.FONT_HERSHEY_COMPLEX,2,(255,0,255),2)
    #            cv2.imshow(f"ImageCrop", imgCrop)
           # print(imgWhite.shape)
            cv2.imshow(f"imgWhite", imgWhiteCopy)
        cv2.imshow("Image", imgOutput)
        key = cv2.waitKey(1)
        #Saving the image if "S" is pressed
        if key == ord("s"):            
            counter+=1
            cv2.imwrite(f"C:/Users/User/Desktop/user_no_imagename{counter}.jpg", imgOutput)
    except:
        pass

# Single Image Detector

In [None]:
import os
detector = HandDetector(maxHands=1)
offset = 20
imgSize = 350
path = "C:/graduationProject2023/croppedDataset/trainSet/i love you"
savedFolder = "C:/graduationProject2023/Dataset/Cropped Images Generated"

#print(f"Count:{count} -  {folderPath}/{folder}/{image}")
for folder in os.listdir(path):
    counter =0
    for img in os.listdir(f"{path}/{folder}"):
        img = cv2.imread(f"{path}/{folder}/{img}")
        imgOutput = img.copy()
        hands, img = detector.findHands(img)
        #cv2.imshow(f"Image{count+1}",img)
        if hands:
            hand = hands[0]
            x, y, w, h = hand['bbox']
            imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255
            imgCrop = img[y - offset:y + h + offset, x - offset:x + w + offset]
            imgCropShape = imgCrop.shape
            aspectRatio = h / w
            if aspectRatio > 1:
                k = imgSize / h
                wCal = math.ceil(k * w)
                imgResize = cv2.resize(imgCrop, (wCal, imgSize))
                imgResizeShape = imgResize.shape
                wGap = math.ceil((imgSize - wCal) / 2)
                imgWhite[:, wGap:wCal + wGap] = imgResize
            else:
                k = imgSize / w
                hCal = math.ceil(k * h)
                imgResize = cv2.resize(imgCrop, (imgSize, hCal))
                imgResizeShape = imgResize.shape
                hGap = math.ceil((imgSize - hCal) / 2)
                imgWhite[hGap:hCal + hGap, :] = imgResize
            cv2.rectangle(imgOutput, (x - offset, y - offset-50),
                          (x - offset+90, y - offset-50+50), (255, 0, 255), cv2.FILLED)
            #Labels are stored in labels lis
            #cv2.putText(imgOutput, labels[index], (x, y -26), cv2.FONT_HERSHEY_COMPLEX, 1.7, (255, 255, 255), 2)
            cv2.rectangle(imgOutput, (x-offset, y-offset),
                          (x + w+offset, y + h+offset), (255, 0, 255), 4)
            #       cv2.imshow("ImageCrop", imgCrop)
            print(imgWhite.shape)
            imgWhiteCopy = imgWhite.copy()

            imgWhite = img_to_array(imgWhite)
            imgWhite = imgWhite.reshape((1, imgWhite.shape[0], imgWhite.shape[1], imgWhite.shape[2]))
            imgWhite = preprocess_input(imgWhite)
            result = model.predict(imgWhite).argmax()
            cv2.imwrite(f"C:/graduationProject2023/croppedDataset/trainSet/i love you/i love you_11.jpg", imgWhiteCopy)


# Vertical Flip

In [None]:
import os
import cv2

path = "C:/graduationProject2023/croppedDataset/validSet/"
for folder in os.listdir(path):
    for image in os.listdir(f"{path}/{folder}"):
        img = cv2.imread(f"{path}/{folder}/{image}")
        flipped = cv2.flip(img,1)
        cv2.imwrite(f"{path}/{folder}/{image}_flipped.jpg",flipped)


# Individual Image Examiner

In [None]:
#import cv2
#from cvzone.HandTrackingModule import HandDetector

detector = HandDetector(maxHands =1)
img = cv2.imread(f"C:/graduationProject2023/Dataset/New_Images/i love you/WhatsApp Image 2022-12-07 at 18.03.04.jpg")
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
hands, img = detector.findHands(img)
cv2.imshow("Image",img)
cv2.waitKey(0)

# Automatic Hand Cropper

In [None]:
import cv2
import numpy as np
import os
import math

detector = HandDetector(maxHands=1)
offset = 20
imgSize = 350
#folder = "C:/graduationProject2023/Dataset/TrainSet/bed/"
folderPath = "C:/graduationProject2023/Dataset/ValidationSet"
folderList = ["bed","father","full","police","shirt","water","wrong"]
counter = 0
for folder in folderList:
    for count, image in enumerate(os.listdir(f"{folderPath}/{folder}")):
        img = cv2.imread(f"{folderPath}/{folder}/{image}")
        #print(f"Count:{count} -  {folderPath}/{folder}/{image}")
        imgOutput = img.copy()
        hands, img = detector.findHands(img)
        #cv2.imshow(f"Image{count+1}",img)
        if hands:
            counter+=1
            hand = hands[0]
            x, y, w, h = hand['bbox']
            imgWhite = np.ones((imgSize, imgSize, 3), np.uint8) * 255
            imgCrop = img[y - offset:y + h + offset, x - offset:x + w + offset]
            imgCropShape = imgCrop.shape
            aspectRatio = h / w
            if aspectRatio > 1:
                k = imgSize / h
                wCal = math.ceil(k * w)
                imgResize = cv2.resize(imgCrop, (wCal, imgSize))
                imgResizeShape = imgResize.shape
                wGap = math.ceil((imgSize - wCal) / 2)
                imgWhite[:, wGap:wCal + wGap] = imgResize
            else:
                k = imgSize / w
                hCal = math.ceil(k * h)
                imgResize = cv2.resize(imgCrop, (imgSize, hCal))
                imgResizeShape = imgResize.shape
                hGap = math.ceil((imgSize - hCal) / 2)
                imgWhite[hGap:hCal + hGap, :] = imgResize
            cv2.rectangle(imgOutput, (x - offset, y - offset-50),
                          (x - offset+90, y - offset-50+50), (255, 0, 255), cv2.FILLED)
            #Labels are stored in labels list

            #cv2.putText(imgOutput, labels[index], (x, y -26), cv2.FONT_HERSHEY_COMPLEX, 1.7, (255, 255, 255), 2)
            cv2.rectangle(imgOutput, (x-offset, y-offset),
                          (x + w+offset, y + h+offset), (255, 0, 255), 4)
    #         cv2.imshow("ImageCrop", imgCrop)
            #cv2.imshow("ImageWhite", imgWhite)
            cv2.imwrite(f"C:/graduationProject2023/croppedDataset/validSet/{image}",imgWhite)
        else:
            cv2.imwrite(f"C:/graduationProject2023/croppedDataset/testSet/{image}",img)
print(counter)

# cv2.imshow("Image", imgOutput)
# cv2.waitKey(0)

In [None]:
print("""python {}/research/object_detection/exporter_main_v2.py --input_type=image_tensor --pipeline_config_path={}/{}/pipeline.config --trained_checkpoint_dir={} --output_directory={}export""".format(APIMODEL_PATH, MODEL_PATH, CUSTOM_MODEL_NAME,CHECKPOINT_PATH, CHECKPOINT_PATH))