In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
!pip install -q mediapipe

In [None]:
!wget -q https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task

In [None]:
!wget -q -O image.jpg https://storage.googleapis.com/mediapipe-tasks/hand_landmarker/woman_hands.jpg

import cv2
from google.colab.patches import cv2_imshow

img = cv2.imread("image.jpg")
cv2_imshow(img)

In [None]:
# STEP 1: Import the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# STEP 2: Create an HandLandmarker object.
base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
options = vision.HandLandmarkerOptions(base_options=base_options,
                                       num_hands=2)
detector = vision.HandLandmarker.create_from_options(options)

im_name = "image.jpg"
# STEP 3: Load the input image.
image = mp.Image.create_from_file(im_name)

# STEP 4: Detect hand landmarks from the input image.
detection_result = detector.detect(image)

# STEP 5: Process the classification result. In this case, visualize it.
# annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
# cv2_imshow(cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))

In [None]:
x = []
y = []
for hand in detection_result.hand_landmarks:
  for landmark in hand:
    x.append(landmark.x)
    y.append(landmark.y)

In [None]:
img = cv2.imread(im_name)
# print(img)

for i in range(len(x)):
  x_coor = int(x[i] * img.shape[1])
  y_coor = int(y[i] * img.shape[0])
  cv2.circle(img, (x_coor, y_coor), 5, (0, 255, 0), -1)

cv2_imshow(img)



In [None]:
import json

In [None]:
# step 1: change this based on my code
def str_to_index(str):
  if(str == 'thumb_base'):
    index = 2
  elif(str == 'thumb_mp'):
    index = 3
  elif(str == 'thumb_tip'):
    index = 4
  elif(str == 'index_base'):
    index = 5
  elif(str == 'index_mp'):
    index = 7
  elif(str == 'index_tip'):
    index = 8
  elif(str == 'middle_base'):
    index = 9
  elif(str == 'middle_mp'):
    index = 11
  elif(str == 'middle_tip'):
    index = 12
  elif(str == 'ring_base'):
    index = 13
  elif(str == 'ring_mp'):
    index = 15
  elif(str == 'ring_tip'):
    index = 16
  elif(str == 'pinkie_base'):
    index = 17
  elif(str == 'pinkie_mp'):
    index = 19
  elif(str == 'pinkie_tip'):
    index = 20
  elif(str == 'wrist'):
    index = 0


  return index

In [None]:
def draw_keypoints(im_name, L_x,L_y,R_x,R_y):
  print(im_name)

  img = cv2.imread(im_name)
  # cv2_imshow(img)

  for i in range(len(L_x)):
    x_coor = int(L_x[i] * img.shape[1]/100)
    y_coor = int(L_y[i] * img.shape[0]/100)
    if(x_coor == -1):
      pass
    cv2.circle(img, (x_coor, y_coor), 2, (0, 255, 0), -1)

  for i in range(len(R_x)):
    x_coor = int(R_x[i] * img.shape[1]/100)
    y_coor = int(R_y[i] * img.shape[0]/100)
    if(x_coor == -1):
      pass
    cv2.circle(img, (x_coor, y_coor), 2, (0, 0, 255), -1)

  cv2_imshow(img)

In [None]:
import numpy as np

In [None]:
class Annotation:
    def __init__(self, image_name, L_keypoint_label_arr, L_x_arr, L_y_arr, \
                 R_keypoint_label_arr, R_x_arr, R_y_arr):
        self.image_name = image_name

        self.L_keypoint_label_arr = L_keypoint_label_arr
        self.L_x_arr = L_x_arr
        self.L_y_arr = L_y_arr

        self.R_keypoint_label_arr = R_keypoint_label_arr
        self.R_x_arr = R_x_arr
        self.R_y_arr = R_y_arr

In [None]:
NUM_HAND_POINTS = 21

with open('more_data.json', 'r') as file:
    data = json.load(file)

# Extract x and y coordinates
coordinates = []
annotations = []
for item in data:

    image_name = item["data"]["img"]

    cut_index = image_name.find("-")
    if cut_index != -1:
      new_image_name = image_name[cut_index + len("-"):]

    L_x_coors = np.ones(NUM_HAND_POINTS)*-1
    L_y_coors = np.ones(NUM_HAND_POINTS)*-1
    L_keypoints = [""]*NUM_HAND_POINTS

    R_x_coors = np.ones(NUM_HAND_POINTS)*-1
    R_y_coors = np.ones(NUM_HAND_POINTS)*-1
    R_keypoints = [""]*NUM_HAND_POINTS

    for annotation in item["annotations"]:
        for point in annotation["result"]:
          keypoint = point["value"]["keypointlabels"][0]
          if keypoint.startswith("L"):
            new_string = keypoint[len("L_"):]
            index = str_to_index(new_string)

            L_keypoints[index] = new_string
            L_x_coors[index] = point["value"]["x"]
            L_y_coors[index] = point["value"]["y"]

          else:
            new_string = keypoint[len("R_"):]
            index = str_to_index(new_string)
            # print(new_string)
            R_keypoints[index] = new_string
            R_x_coors[index] = point["value"]["x"]
            R_y_coors[index] = point["value"]["y"]

        annotations.append(Annotation(new_image_name,L_x_coors,L_x_coors,L_y_coors, \
                                      R_x_coors,R_x_coors,R_y_coors))

In [None]:
for anno in annotations:
  print(anno.image_name)
  # print(anno.L_keypoint_label_arr)
  # print(anno.R_keypoint_label_arr)
  # print(anno.L_x_arr)
  # print(anno.R_x_arr)

In [None]:
# Loop over the first 5 annotations
for i, anno in enumerate(annotations):
    # Check if the index is less than 5
    if i < 20:
        # Draw keypoints for the current annotation
        draw_keypoints(anno.image_name, anno.L_x_arr, anno.L_y_arr, anno.R_x_arr, anno.R_y_arr)
    else:
        # Break the loop if we've reached the 6th annotation
        break

In [None]:
def load_image(image_name, target_size=(240, 176)):
    # Load image using OpenCV
    image = cv2.imread(image_name)

    # Resize the image to the target size if necessary
    if target_size is not None:
        image = cv2.resize(image, target_size)

    # Normalize pixel values to be between 0 and 1
    image = image / 255.0

    return image

In [None]:
from sklearn.model_selection import train_test_split

# Split the annotations into train and test sets
train_annotations, test_annotations = train_test_split(annotations, test_size=0.1, random_state=42)

# Further split the train annotations into train and validation sets
train_annotations, val_annotations = train_test_split(train_annotations, test_size=0.3, random_state=42)

# Now you have train_annotations, val_annotations, and test_annotations
# You can use these arrays to train, validate, and test your model


In [None]:
# Define training data
x_train = []  # List to store training images
y_train = []  # List to store training annotations

for annotation in train_annotations:
    # Assuming you have a function to load images based on image names
    image = load_image(annotation.image_name)  # Load the image
    x_train.append(image)  # Append the image to x_train
    concatenated_arr = np.concatenate([annotation.L_x_arr[:, np.newaxis], annotation.L_y_arr[:, np.newaxis],
                                       annotation.R_x_arr[:, np.newaxis], annotation.R_y_arr[:, np.newaxis]], axis=-1)
    y_train.append(concatenated_arr)

# Define validation data
x_val = []  # List to store validation images
y_val = []  # List to store validation annotations

for annotation in val_annotations:
    # Assuming you have a function to load images based on image names
    image = load_image(annotation.image_name)  # Load the image
    x_val.append(image)  # Append the image to x_val
    # Append the annotations to y_val directly, assuming Annotation object has attributes L_x_arr, L_y_arr, R_x_arr, R_y_arr
    concatenated_arr = np.concatenate([annotation.L_x_arr[:, np.newaxis], annotation.L_y_arr[:, np.newaxis],
                                       annotation.R_x_arr[:, np.newaxis], annotation.R_y_arr[:, np.newaxis]], axis=-1)

    y_val.append(concatenated_arr)

# Define testing data
x_test = []  # List to store testing images
y_test = []  # List to store testing annotations

for annotation in test_annotations:
    # Assuming you have a function to load images based on image names
    image = load_image(annotation.image_name)  # Load the image
    x_test.append(image)  # Append the image to x_test
    # Append the annotations to y_test directly, assuming Annotation object has attributes L_x_arr, L_y_arr, R_x_arr, R_y_arr
    concatenated_arr = np.concatenate([annotation.L_x_arr[:, np.newaxis], annotation.L_y_arr[:, np.newaxis],
                                       annotation.R_x_arr[:, np.newaxis], annotation.R_y_arr[:, np.newaxis]], axis=-1)

    y_test.append(concatenated_arr)

# Convert the lists to numpy arrays
x_train = np.array(x_train)
y_train = np.array(y_train)
# y_train = np.swapaxes(y_train, 1, 2)
x_val = np.array(x_val)
y_val = np.array(y_val)
x_test = np.array(x_test)
y_test = np.array(y_test)

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)
print(y_test.shape)

In [None]:
# copy old hand pose estimation model
  # x = 11-->18
  # 18 --> 21, 42
# Define a simple neural network for hand pose estimation
# input
  #1 image - 176 pixels x 240 pixels x 3 rgb channels
# output
  # number of joints/21 joints
def create_hand_pose_model(input_shape=(176, 240, 3), num_joints=21):
  model = tf.keras.models.Sequential()

  model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=input_shape))
  model.add(tf.keras.layers.MaxPooling2D((2, 2)))
  model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
  model.add(tf.keras.layers.MaxPooling2D((2, 2)))
  model.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu'))
  model.add(tf.keras.layers.MaxPooling2D((2, 2)))
  model.add(tf.keras.layers.Conv2D(512, (3, 3), activation='relu'))
  model.add(tf.keras.layers.MaxPooling2D((2, 2)))
  model.add(tf.keras.layers.Conv2D(512, (3, 3), activation='relu'))
  model.add(tf.keras.layers.MaxPooling2D((2, 2)))

  # Flatten and fully connected layers
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(700, activation='relu'))
  model.add(tf.keras.layers.Dropout(0.5))

  # Modify the last Dense layer for (21, 3) output
  model.add(tf.keras.layers.Dense(num_joints * 4, activation='linear'))

  # Reshape the output to (21, 3)
  model.add(tf.keras.layers.Reshape((num_joints, 4)))       # output is 21 joints

  return model


hand_pose_model = create_hand_pose_model()
hand_pose_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

# hand_pose_model.summary()

In [None]:
# originally epochs=50
history = hand_pose_model.fit(x_train, y_train, epochs=50,shuffle=True,validation_data=(x_val, y_val))

In [None]:
# Save the model
MODEL_TF = "hand_pose_model_tf_my_data"
hand_pose_model.save(MODEL_TF)

In [None]:
# Load the saved model
loaded_model = tf.keras.models.load_model(MODEL_TF)

# Now you can use the loaded model for predictions or further training

In [None]:
# generate loss curve here
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()

In [None]:
def draw_hand(y_train,color='cyan'):
  zero_arr = [1,3,5,7,9]

  for i in zero_arr:
    plt.plot([y_train[0,0],y_train[i,0]],[y_train[0,1],y_train[i, 1]],color=color)
    plt.plot([y_train[i,0],y_train[i+1,0]],[y_train[i,1],y_train[i+1, 1]],color=color)


In [None]:
i = 2

guess = hand_pose_model.predict(x_train[i].reshape(-1,176,240,3))

plt.imshow(x_train[i])

print(guess.shape)

# draw_hand(y_train[i])
# plt.scatter(y_train[i, :,  0], y_train[i, :, 1])
draw_hand(guess[0],'orange')
plt.scatter(guess[0, :,  0], guess[0, :, 1])

plt.show()

print()

In [None]:
i = 2

guess = hand_pose_model.predict(x_test[i].reshape(-1,176,240,3))

plt.imshow(x_test[i])
# plt.scatter(y_test[i, :,  0], y_test[i, :, 1])
draw_hand(guess[0],'orange')
plt.scatter(guess[0, :,  0], guess[0, :, 1])

plt.show()