In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model

In [2]:
model=load_model('classification.h5',compile=False)

In [3]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_46 (Conv2D)           (None, 222, 222, 16)      448       
_________________________________________________________________
conv2d_47 (Conv2D)           (None, 220, 220, 16)      2320      
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 110, 110, 16)      0         
_________________________________________________________________
conv2d_48 (Conv2D)           (None, 108, 108, 32)      4640      
_________________________________________________________________
conv2d_49 (Conv2D)           (None, 106, 106, 32)      9248      
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 53, 53, 32)        0         
_________________________________________________________________
conv2d_50 (Conv2D)           (None, 51, 51, 64)       

In [4]:
import cv2 as cv
BODY_PARTS = { "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
               "LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
               "RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "REye": 14,
               "LEye": 15, "REar": 16, "LEar": 17, "Background": 18 }

POSE_PAIRS = [ ["Neck", "RShoulder"], ["Neck", "LShoulder"], ["RShoulder", "RElbow"],
               ["RElbow", "RWrist"], ["LShoulder", "LElbow"], ["LElbow", "LWrist"],
               ["Neck", "RHip"], ["RHip", "RKnee"], ["RKnee", "RAnkle"], ["Neck", "LHip"],
               ["LHip", "LKnee"], ["LKnee", "LAnkle"], ["Neck", "Nose"], ["Nose", "REye"],
               ["REye", "REar"], ["Nose", "LEye"], ["LEye", "LEar"] ]

width = 368
height = 368                                            # setting up the
                                                        # deafult values
net = cv.dnn.readNetFromTensorflow("graph_opt.pb")
thr = 0.2

def poseDetector(image):
    imageWidth = image.shape[1]
    imageHeight = image.shape[0]
    
    net.setInput(cv.dnn.blobFromImage(image, 1.0, (width, height), (127.5, 127.5, 127.5), swapRB=True, crop=False))
    out = net.forward()
    out = out[:, :19, :, :]  #basically its just here because we only need the first 19 elements

    assert(len(BODY_PARTS) == out.shape[1])

    points = []
    for i in range(len(BODY_PARTS)):
        # taking heatmap of respective body's part.
        heatMap = out[0, i, :, :]

        _, conf, _, point = cv.minMaxLoc(heatMap)
        x = (imageWidth* point[0]) / out.shape[3]
        y = (imageHeight * point[1]) / out.shape[2]
        points.append((int(x), int(y)) if conf > thr else None)

    for pair in POSE_PAIRS:
        From = pair[0]
        To = pair[1]
        assert(From in BODY_PARTS)
        assert(To in BODY_PARTS)

        idFrom = BODY_PARTS[From]
        idTo = BODY_PARTS[To]

        if points[idFrom] and points[idTo]:
            cv.line(image, points[idFrom], points[idTo], (0, 255, 0), 3)
            cv.ellipse(image, points[idFrom], (3, 3), 0, 0, 360, (0, 0, 255), cv.FILLED)
            cv.ellipse(image, points[idTo], (3, 3), 0, 0, 360, (0, 0, 255), cv.FILLED)

    t, _ = net.getPerfProfile()

    return (image/255.)
   

In [5]:
def text(c,output):
    if c.argmax()==0:
        cv.putText(output,labels[0],(100,100),4,1,250,4)
    elif c.argmax()==1:
        cv.putText(output,labels[1],(100,100),4,1,250,4)
    elif c.argmax()==2:
        cv.putText(output,labels[2],(100,100),4,1,250,4)
    elif c.argmax()==3:
        cv.putText(output,labels[3],(100,100),4,1,250,4)
    else:
        cv.putText(output,labels[4],(100,100),4,1,250,4)

In [6]:
def preprocess(inp,dims=224):
  ret,frame=inp.read()
  output = poseDetector(frame)

      # Create resized image using the calculated dimentions
  resized_image = cv.resize(output,(dims,dims),interpolation=cv.INTER_AREA)
  #resized_image=resized_image/resized_image.max()
  #resized_image=tf.cast(tf.constant(resized_image),dtype=tf.float32) 
  resized_image=tf.expand_dims(resized_image,axis=0)
  c=model.predict(resized_image)
  text(c,output)
  return output

In [9]:
import numpy as np
labels=['downdog','goddess','plank','tree','warrior2']
insf = cv.VideoCapture('Tree Poses - Yoga Technique.mp4')
while(True):

  
  #fourcc = cv.VideoWriter_fourcc('X','V','I','D')
  #out = cv.VideoWriter("output8.avi", fourcc, 5.0, (1280,720))
  output=preprocess(insf)
  cv.imshow('output',output)  
  if cv.waitKey(1) & 0xFF ==ord('q'):
        break

insf.release()
cv.destroyAllWindows()
#out.release()