In [2]:
import cv2
import mediapipe as mp
import time
from scipy import spatial
from IPython.core.display import ProgressBar

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [3]:
# initialize mediapipe requirements
mpPose = mp.solutions.pose
pose = mpPose.Pose()
mpDraw = mp.solutions.drawing_utils

In [4]:
# input video path
cap = cv2.VideoCapture('../test_data/run1.mp4')

In [5]:
# global dict to store the coordiantes (required towards MVP)
dict_coordinates = {'left_hand': [], 'right_hand': [], 'left_leg': [], 'right_leg': [], 'left_hip': [], 'right_hip': []}

In [6]:
# compute landmarks for a frame
def find_pose(img):
  break_signal = False
  results = []
  try:
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = pose.process(imgRGB)
    print('Landmarks:', results.pose_landmarks)
  except:
    break_signal = True

  return img, results, break_signal
  

In [7]:
# retrieve coordinates from lm_list and store the coordinates in the global dict
def store_coordinates(lm_list):
  global dict_coordinates
  dict_coordinates['left_hand'].append((lm_list[38], lm_list[39])) #left_index - x, y 
  dict_coordinates['right_hand'].append((lm_list[40], lm_list[41])) #right_index - x, y
  dict_coordinates['left_hip'].append((lm_list[46], lm_list[47])) #left_hip - x, y
  dict_coordinates['right_hip'].append((lm_list[48], lm_list[49])) #right_hip - x, y
  dict_coordinates['left_leg'].append((lm_list[62], lm_list[63])) #left_foot - x, y
  dict_coordinates['right_leg'].append((lm_list[64], lm_list[65])) #right_foot - x, y

In [None]:
# compute cosine smilarity between two lm lists 
# def check_similarity(list1, list2):
#   result = 1 - spatial.distance.cosine(list1, list2)
#   return result

In [10]:
import sys
sys.path.append('../')

from src.utils.pose_utils import check_similarity

In [19]:
# plot the image only when the frames are dissimilar
def plot_image(img, results, cx, cy, pTime):
  mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
  cv2.circle(img, (cx, cy), 5, (255,0, 150), cv2.FILLED)
  cTime = time.time()
  fps = 1 / (cTime - pTime)
  pTime = cTime
    
  cv2.putText(img, str(int(fps)), (50, 50), cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), 3)
  cv2.imshow('image', img)
  cv2.waitKey(1)

In [20]:
def main(cap):
  prev = []
  first_frame_flag = True
  total_frames_count = 0
  stored_frames_count = 0
  pTime = 0

  while True:
    print('----------------------')
    print('Processing a new frame')
    success, img = cap.read()
    img, results, main_break_signal = find_pose(img)
    
    # the signal means that there are no more input frames in the video, and thus the code must terminate
    if (main_break_signal == True):
      break
    
    lm_list = []

    if results.pose_landmarks:     
      # add all 66 cordinates to lm_list
      for id, lm in enumerate(results.pose_landmarks.landmark):  
        h, w, c = img.shape
        cx, cy = int(lm.x*w), int(lm.y*h)
        lm_list.append(cx)
        lm_list.append(cy)

      # for the first frame, compute and store the coordinates
      if(first_frame_flag == True):
        store_coordinates(lm_list)
        print("Similarity found for the first frame and coordinates stored")
        prev = lm_list
        first_frame_flag = False
        stored_frames_count += 1
        plot_image(img, results, cx, cy, pTime)


      # from next frame onwards, first check similarity and then store the coordinates
      else:
        result = check_similarity(prev, lm_list) #prev = 66 cordinates, lm_list = 66 cordinates
        print('Similarity Value:', result)
        if(result < 0.99999):
          store_coordinates(lm_list)
          print("Similarity found and coordinates stored")
          stored_frames_count += 1
          plot_image(img, results, cx, cy, pTime)
        
        prev = lm_list
          
      print('Prev list: ', prev)
      print('Length of prev list: ', len(prev))
      print('LM list: ', lm_list)
      print('Length of lm_list: ', len(lm_list))

    total_frames_count += 1

  print('---------- Processsing Completed ----------')
  print('Total frames processed: ', total_frames_count)
  print('Total frames stored: ', stored_frames_count)

In [21]:
def get_last_frame_cordinates(cap):
  last_frame_cordinates = []

  #Run code on input video - cap and store coordinates of all frames in dict_coordinates
  main(cap)

  #Return last frame coordinates
  last_frame_cordinates.append(dict_coordinates['left_hand'][-1])
  last_frame_cordinates.append(dict_coordinates['right_hand'][-1])
  last_frame_cordinates.append(dict_coordinates['left_hip'][-1])
  last_frame_cordinates.append(dict_coordinates['right_hip'][-1])
  last_frame_cordinates.append(dict_coordinates['left_leg'][-1])
  last_frame_cordinates.append(dict_coordinates['right_leg'][-1])
  
  return (last_frame_cordinates)

In [22]:
if __name__ == "__main__":
    main(cap)

----------------------
Processing a new frame
Landmarks: landmark {
  x: 0.3832739
  y: 0.356197596
  z: -0.075113453
  visibility: 0.989738047
}
landmark {
  x: 0.380711377
  y: 0.349635273
  z: -0.0711547807
  visibility: 0.981137633
}
landmark {
  x: 0.380109757
  y: 0.348896146
  z: -0.0712005
  visibility: 0.977962255
}
landmark {
  x: 0.379480183
  y: 0.34818846
  z: -0.0712573
  visibility: 0.982801557
}
landmark {
  x: 0.380763143
  y: 0.351932526
  z: -0.0874556154
  visibility: 0.988811672
}
landmark {
  x: 0.380241841
  y: 0.352776796
  z: -0.0874517336
  visibility: 0.987520576
}
landmark {
  x: 0.379693449
  y: 0.353653938
  z: -0.0874286592
  visibility: 0.988484859
}
landmark {
  x: 0.373395383
  y: 0.350268304
  z: -0.0445981845
  visibility: 0.970776677
}
landmark {
  x: 0.373822272
  y: 0.356826514
  z: -0.118309222
  visibility: 0.97672081
}
landmark {
  x: 0.380796373
  y: 0.361558914
  z: -0.0623917505
  visibility: 0.984050214
}
landmark {
  x: 0.38052997
  y: 0.3



Landmarks: landmark {
  x: 0.379488051
  y: 0.360316485
  z: -0.0807285234
  visibility: 0.992469966
}
landmark {
  x: 0.37719211
  y: 0.354516387
  z: -0.0766474903
  visibility: 0.986163
}
landmark {
  x: 0.376668543
  y: 0.354168862
  z: -0.076677151
  visibility: 0.98383069
}
landmark {
  x: 0.376090437
  y: 0.353778332
  z: -0.0767231733
  visibility: 0.987377524
}
landmark {
  x: 0.376897544
  y: 0.355722666
  z: -0.091690734
  visibility: 0.991791546
}
landmark {
  x: 0.376222223
  y: 0.356241584
  z: -0.0916984901
  visibility: 0.990836084
}
landmark {
  x: 0.375498474
  y: 0.356789351
  z: -0.0916963667
  visibility: 0.991532266
}
landmark {
  x: 0.369995803
  y: 0.355931073
  z: -0.0479443856
  visibility: 0.978419542
}
landmark {
  x: 0.369455397
  y: 0.360058963
  z: -0.117534995
  visibility: 0.98295337
}
landmark {
  x: 0.377238929
  y: 0.366358668
  z: -0.0679970905
  visibility: 0.988146424
}
landmark {
  x: 0.376582533
  y: 0.368054241
  z: -0.0882673934
  visibility: 