In [6]:
import cv2
import mediapipe as mp
import os
import numpy as np
from functools import reduce
from datetime import datetime

In [7]:
locations = (
    (33, 4),
    (468, 3),
    (21, 3),
    (21, 3)
)
keypoints_len = reduce(lambda r, loc: r + loc[0] * loc[1], locations, 0)
keypoints_len

1662

In [8]:
def extract_keypoints(results):
  pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
  face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
  lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
  rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
  return np.concatenate([pose, face, lh, rh])

In [9]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [10]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

def draw_styled_landmarks(image, results):
    # # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [11]:
VIDEO_PATH = "s2s_vids"
# for word in os.listdir(VIDEO_PATH):
#     print(f'{datetime.now().time()} | Start processing {word}')
#     word_path = os.path.join(VIDEO_PATH, word)

#     for i, name in enumerate(os.listdir(word_path)):

#         print(i, name[:-4])

In [12]:
# with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
#     for word in os.listdir(VIDEO_PATH):
#         print(f'{datetime.now().time()} | Start processing {word}')
#         word_path = os.path.join(VIDEO_PATH, word)

#         for i, name in enumerate(os.listdir(word_path)):
#             word_video_path = os.path.join(word_path, name)

#             cap = cv2.VideoCapture(word_video_path)
#             frame_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

#             track = np.empty([frame_len, keypoints_len])

#             for i in range(frame_len):
#                 ret, frame = cap.read()

#                 landmarks = holistic.process(
#                     cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#                 )

#                 track[i] = extract_keypoints(landmarks)
            
#             save_path = os.path.join('tracks_binary', word)
#             os.makedirs(save_path, exist_ok=True)
#             track.astype('float32').tofile(f'tracks_binary/{word}/{name}')

#     cap.release()

In [32]:
words = os.listdir('s2s_vids')

for idx in range(23):
  for word in words[idx:idx+1]:
    adds = len(os.listdir(f'tracks_binary_manual/{word}'))
    print(idx, word, adds)

0 buy 80
1 do 60
2 eat 60
3 fine 80
4 go 60
5 hamburger 60
6 hi_hello 40
7 how 40
8 i_me 40
9 leave 220
10 long 220
11 no 40
12 perfect 40
13 pizza 120
14 same 60
15 since 40
16 thanks_thank_you 40
17 time 40
18 want 100
19 water 40
20 what 40
21 yes 40
22 you 160


In [90]:
words = os.listdir('s2s_vids')

idx = 23
hand = "left"

for word in words[idx:idx+1]:
  try:
    adds = len(os.listdir(f'tracks_binary_manual/{word}'))
    print(adds)
  except:
    adds = 0
    print("new one")
  print(word)
print(hand)

60
yes
left


In [91]:
num_vids = 20
frame_len = 32

cap = cv2.VideoCapture(0)

with mp.solutions.holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
  for word in words[idx:idx+1]:
    
    for video in range(num_vids): 
      track = np.empty([frame_len, keypoints_len])

      for frame_num in range(frame_len):
        ret, frame = cap.read()
        
        # flip horizontal
        image = cv2.flip(frame, 1)

        image, landmarks = mediapipe_detection(image, holistic)
        draw_styled_landmarks(image, landmarks)
        
        if frame_num == 0: 
          cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                      cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
          cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(word, video), (15,12), 
                      cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
          # Show to screen
          cv2.imshow('OpenCV Feed', image)
          cv2.waitKey(500)
        else: 
          cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(word, video), (15,12), 
                      cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
          # Show to screen
          cv2.imshow('OpenCV Feed', image)
        

        track[frame_num] = extract_keypoints(landmarks)
      
        save_path = os.path.join('tracks_binary_manual', word)
        os.makedirs(save_path, exist_ok=True)
        
        if hand == "left":
          track.astype('float32').tofile(f'tracks_binary_manual/{word}/{video+adds}_left')
        else:
          track.astype('float32').tofile(f'tracks_binary_manual/{word}/{video+adds}')
      
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
          break

  cap.release()
  cv2.destroyAllWindows()
  

## Cam Test

In [35]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

### Rename

In [47]:
words = os.listdir('s2s_vids')

idx = 22

for word in words[idx:idx+1]:
  adds = len(os.listdir(f'tracks_binary_manual/{word}'))
  print(adds)
  print(word)

160
you


In [48]:
words = os.listdir('s2s_vids')

augs = 20
start = 140
end = start + augs

for word in words[idx:idx+1]:
  print(word)
  for i in range(start, end):
    old = f"tracks_binary_manual/{word}/{i}"
    new = f"tracks_binary_manual/{word}/{i}_left"
    os.rename(old, new)

you
