## Trainer Code

This notebook takes trainer video, which will be ideal for exercise and processing too. 
Trainer video will directly start the exercise, so initial position will be 0th frame
Trainer video contains side view of trainer and isn't tilted
Trainer will do one round of exercise only
The points and angles obtained will be stored in csv and used to compare with user frames for counting no of correct reps. 

## Import Libraries

In [38]:
import cv2
import os
import tensorflow_hub as hub
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import gdown

pose_model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
movenet = pose_model.signatures['serving_default']
key_frames_angles = []
key_frames_points = []

## Change the name of input video to whatever the user wants to check on

In [39]:
input_video = "wrong_pushup.mp4"
exercise_name = "PushUp"

In [40]:
threshold_angle = 45

## Change trainer video to desired exercise video

In [41]:

trainer_video = "pushup.gif"
if not os.path.exists(exercise_name):
    os.makedirs(exercise_name)
    print(f"Directory '{exercise_name}' created.")

### Get angles and key points

In [42]:
### Returns key points by taking model output on a image
def get_keypoints(outputs):
    points = outputs['output_0'].numpy()[0, 0, :, :]
    nose = points[0]
    left_shoulder = points[5]
    right_shoulder = points[6]
    left_elbow = points[7]
    right_elbow = points[8]
    left_wrist = points[9]
    right_wrist = points[10]
    left_hip = points[11]
    right_hip = points[12]
    left_knee = points[13]
    right_knee = points[14]
    left_ankle = points[15]
    right_ankle = points[16]
    neck = (left_shoulder + right_shoulder) / 2

    body_parts = {
        'nose': nose,
        'left_shoulder': left_shoulder,
        'right_shoulder': right_shoulder,
        'left_elbow': left_elbow,
        'right_elbow': right_elbow,
        'left_wrist': left_wrist,
        'right_wrist': right_wrist,
        'left_hip': left_hip,
        'right_hip': right_hip,
        'left_knee': left_knee,
        'right_knee': right_knee,
        'left_ankle': left_ankle,
        'right_ankle': right_ankle,
        'neck': neck
    }
    return body_parts

In [43]:
def cosine_angle(v1, v2):
    numer = np.dot(v1, v2)
    denom = np.linalg.norm(v1) * np.linalg.norm(v2)
    if denom == 0:
        if np.array_equal(v1, v2):
            return 0.0
        else:
            return 180.0
    degrees = np.degrees(np.arccos(numer / denom))
    return degrees

def calculate_angle(point1,point2,point3):
    v1 = point1[:2] - point2[:2]
    v2 = point3[:2] - point2[:2]
    return cosine_angle(v1, v2)

In [44]:
## Get angles from key points. 
def get_angles(keypoints):
    n_n_rs = calculate_angle(keypoints['nose'], keypoints['neck'], keypoints['right_elbow'])
    n_n_ls = calculate_angle(keypoints['nose'], keypoints['neck'], keypoints['left_elbow'])
    n_rs_re = calculate_angle(keypoints['neck'], keypoints['right_shoulder'], keypoints['right_elbow'])
    n_ls_le = calculate_angle(keypoints['neck'], keypoints['left_shoulder'], keypoints['left_elbow'])
    rs_re_rw = calculate_angle(keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist'])
    ls_le_lw = calculate_angle(keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist'])
    n_rh_rk = calculate_angle(keypoints['neck'], keypoints['right_hip'], keypoints['right_knee'])
    n_lh_lk = calculate_angle(keypoints['neck'], keypoints['left_hip'], keypoints['left_knee'])
    rh_rk_ra = calculate_angle(keypoints['right_hip'], keypoints['right_knee'], keypoints['right_ankle'])
    lh_lk_la = calculate_angle(keypoints['left_hip'], keypoints['left_knee'], keypoints['left_ankle'])

    angles = {
        "Nose-Neck-Right Shoulder": n_n_rs,
        "Nose-Neck-Left Shoulder": n_n_ls,
        "Neck-Right Shoulder-Right Elbow": n_rs_re,
        "Neck-Left Shoulder-Left Elbow": n_ls_le,
        "Right Shoulder-Right Elbow-Right Wrist": rs_re_rw,
        "Left Shoulder-Left Elbow-Left Wrist": ls_le_lw,
        "Neck-Right Hip-Right Knee": n_rh_rk,
        "Neck-Left Hip-Left Knee": n_lh_lk,
        "Right Hip-Right Knee-Right Ankle": rh_rk_ra,
        "Left Hip-Left Knee-Left Ankle": lh_lk_la
    }

    return angles

In [45]:
# Takes input as image path and return all angles and points
def get_data_from_model(image):

    image_tensor = tf.image.resize_with_pad(image, 192, 192)
    image_tensor = tf.cast(image_tensor, dtype=tf.int32)
    image_tensor = tf.expand_dims(image_tensor, axis=0)

    # Run the model.
    outputs = movenet(image_tensor)
    keypoints = get_keypoints(outputs)
    angles = get_angles(keypoints)
    return keypoints, angles

# Gets angles of current frame and compares wrt prev frame and saves if there is significant difference
def process_frame(frame, count, angles):
    
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = tf.convert_to_tensor(image,dtype=tf.int32)
    keypoints, angles1 = get_data_from_model(image)  # Pass the image to the model
    angle_diff = sum(abs(angles[key] - angles1[key]) for key in angles)
    if angle_diff > threshold_angle:
        filename = f"pose{count}.jpg"
        filename = os.path.join(exercise_name,filename)
        cv2.imwrite(filename, frame)
        angles = angles1
        key_frames_angles.append(angles)
        key_frames_points.append(keypoints)
    return angles

In [46]:
video = cv2.VideoCapture(trainer_video)
count = 0
ret, frame = video.read()

if not ret:
    print("Failed to read video")
else:
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = tf.convert_to_tensor(image,dtype=tf.int32)

    _, angles = get_data_from_model(image)
    count += 1

    while True:
        ret, frame = video.read()
        if not ret:
            break
        angles = process_frame(frame, count, angles)
        
        count += 1
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

video.release()
cv2.destroyAllWindows()

In [47]:
import csv
csv_file1 = "key_frame_angles.csv"
csv_file2 = "key_frame_points.csv"
csv_file1 = os.path.join(exercise_name,csv_file1)
csv_file2 = os.path.join(exercise_name,csv_file2)
# Extract the headers from the first dictionary (keys of the dictionary)
headers1 = key_frames_angles[0].keys()
headers2 = key_frames_points[0].keys()

# Write data to CSV
with open(csv_file1, "w", newline='') as f:
    writer = csv.DictWriter(f, fieldnames=headers1)
    writer.writeheader()
    writer.writerows(key_frames_angles)
# Write data to CSV
with open(csv_file2, "w", newline='') as f:
    writer = csv.DictWriter(f, fieldnames=headers2)
    writer.writeheader()
    writer.writerows(key_frames_points)

# User Code 

This notebook takes user video , processes the key frames to compare them to the trainer frames stored in csv 

# Import Libraries

In [48]:
import cv2
import os
import tensorflow_hub as hub
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import csv

pose_model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
movenet = pose_model.signatures['serving_default']

key_frames_angles = []
key_frames_points = []


In [49]:
# map of indices and body parts
body_parts = {
    0: "Nose-Neck-Right Shoulder",
    1: "Nose-Neck-Left Shoulder",
    2: "Neck-Right Shoulder-Right Elbow",
    3: "Neck-Left Shoulder-Left Elbow",
    4: "Right Shoulder-Right Elbow-Right Wrist",
    5: "Left Shoulder-Left Elbow-Left Wrist",
    6: "Neck-Right Hip-Right Knee",
    7: "Neck-Left Hip-Left Knee",
    8: "Right Hip-Right Knee-Right Ankle",
    9: "Left Hip-Left Knee-Left Ankle"
}


In [50]:
csv_file = os.path.join(exercise_name,"key_frame_angles.csv")

with open(csv_file, "r") as f:
    reader = csv.DictReader(f)
    data = [dict(row) for row in reader]

csv_file = os.path.join(exercise_name,"key_frame_points.csv")

with open(csv_file, "r") as f:
    reader = csv.DictReader(f)
    data2 = [dict(row) for row in reader]


In [51]:
key_frames_angles = [[float(d[key]) for key in d] for d in data]
key_frames_points = [[d[key] for key in d] for d in data2]

In [52]:
key_frames_points = [[[float(co) for co in part.strip("[]").split() ] for part in frame] for frame in key_frames_points]

In [53]:
trainer_frame_count = len(key_frames_angles)

In [54]:
### Returns key points by taking model output on a image
def get_keypoints(outputs):
    points = outputs['output_0'].numpy()[0, 0, :, :]
    nose = points[0]
    left_shoulder = points[5]
    right_shoulder = points[6]
    left_elbow = points[7]
    right_elbow = points[8]
    left_wrist = points[9]
    right_wrist = points[10]
    left_hip = points[11]
    right_hip = points[12]
    left_knee = points[13]
    right_knee = points[14]
    left_ankle = points[15]
    right_ankle = points[16]
    neck = (left_shoulder + right_shoulder) / 2

    body_parts = {
        'nose': nose,
        'left_shoulder': left_shoulder,
        'right_shoulder': right_shoulder,
        'left_elbow': left_elbow,
        'right_elbow': right_elbow,
        'left_wrist': left_wrist,
        'right_wrist': right_wrist,
        'left_hip': left_hip,
        'right_hip': right_hip,
        'left_knee': left_knee,
        'right_knee': right_knee,
        'left_ankle': left_ankle,
        'right_ankle': right_ankle,
        'neck': neck
    }
    return body_parts

In [55]:
def cosine_angle(v1, v2):
    numer = np.dot(v1, v2)
    denom = np.linalg.norm(v1) * np.linalg.norm(v2)
    if denom == 0:
        if np.array_equal(v1, v2):
            return 0.0
        else:
            return 180.0
    degrees = np.degrees(np.arccos(numer / denom))
    return degrees

def calculate_angle(point1,point2,point3):
    v1 = point1[:2] - point2[:2]
    v2 = point3[:2] - point2[:2]
    return cosine_angle(v1, v2)

In [56]:
## Get angles from key points. 
def get_angles(keypoints):
    n_n_rs = calculate_angle(keypoints['nose'], keypoints['neck'], keypoints['right_elbow'])
    n_n_ls = calculate_angle(keypoints['nose'], keypoints['neck'], keypoints['left_elbow'])
    n_rs_re = calculate_angle(keypoints['neck'], keypoints['right_shoulder'], keypoints['right_elbow'])
    n_ls_le = calculate_angle(keypoints['neck'], keypoints['left_shoulder'], keypoints['left_elbow'])
    rs_re_rw = calculate_angle(keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist'])
    ls_le_lw = calculate_angle(keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist'])
    n_rh_rk = calculate_angle(keypoints['neck'], keypoints['right_hip'], keypoints['right_knee'])
    n_lh_lk = calculate_angle(keypoints['neck'], keypoints['left_hip'], keypoints['left_knee'])
    rh_rk_ra = calculate_angle(keypoints['right_hip'], keypoints['right_knee'], keypoints['right_ankle'])
    lh_lk_la = calculate_angle(keypoints['left_hip'], keypoints['left_knee'], keypoints['left_ankle'])

    angles = {
        "Nose-Neck-Right Shoulder": n_n_rs,
        "Nose-Neck-Left Shoulder": n_n_ls,
        "Neck-Right Shoulder-Right Elbow": n_rs_re,
        "Neck-Left Shoulder-Left Elbow": n_ls_le,
        "Right Shoulder-Right Elbow-Right Wrist": rs_re_rw,
        "Left Shoulder-Left Elbow-Left Wrist": ls_le_lw,
        "Neck-Right Hip-Right Knee": n_rh_rk,
        "Neck-Left Hip-Left Knee": n_lh_lk,
        "Right Hip-Right Knee-Right Ankle": rh_rk_ra,
        "Left Hip-Left Knee-Left Ankle": lh_lk_la
    }

    return angles

In [57]:
def get_data(image):
    image_tensor = tf.image.resize_with_pad(image, 192, 192)
    image_tensor = tf.cast(image_tensor, dtype=tf.int32)
    image_tensor = tf.expand_dims(image_tensor, axis=0)
    
    # Run the model.
    outputs = movenet(image_tensor)
    keypoints = outputs['output_0'].numpy()[0, 0, :, :]
    
    return keypoints

In [58]:

def l1_norm(l1, l2):
    if len(l1) != len(l2):
        raise ValueError("Lists must be of the same length.")
    err = np.sum(np.abs(np.array(l1) - np.array(l2)))
    ind = np.argmax(err)
    return err, ind

def get_angles_from_model(frame):
    # Convert the frame from BGR to RGB
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Convert the frame to a TensorFlow tensor
    image = tf.convert_to_tensor(frame_rgb, dtype=tf.int32)
    
    image_tensor = tf.image.resize_with_pad(image, 192, 192)
    image_tensor = tf.cast(image_tensor, dtype=tf.int32)
    image_tensor = tf.expand_dims(image_tensor, axis=0)

    # Run the model.
    outputs = movenet(image_tensor)
    keypoints = get_keypoints(outputs)
    angles = get_angles(keypoints)
    return angles,keypoints

  
def process_frame(frame, frame_id_from_trainer,mapping_frames):
    angles1,keypoints = get_angles_from_model(frame)
    angles_only = [float(angles1[key]) for key in angles1]
    # closest_frame = None
    prob = ''
    if(frame_id_from_trainer == len(key_frames_angles)):
        return 0
    d_cur, ind_cur = l1_norm(key_frames_angles[frame_id_from_trainer], angles_only)
    d_next, ind_next = l1_norm(key_frames_angles[(frame_id_from_trainer+1)%trainer_frame_count], angles_only)
    if d_next<d_cur:
        frame_id_from_trainer=  (frame_id_from_trainer+1)%trainer_frame_count
        prob = 'Problem in ' + body_parts[ind_next] + ' Angle'
    else:
        prob = 'Problem in ' + body_parts[ind_cur] + ' Angle'

    mapping_frames.append({'user':keypoints,'trainer':key_frames_points[frame_id_from_trainer], 'problem': prob})
    return frame_id_from_trainer
    
    

In [59]:
trainer_frame_count

18

In [60]:
mapping_frames = []
final_frames = []
def main():
    video = cv2.VideoCapture("wrong_pushup.mp4")
    count = 0
    wait_count = 0
    frame_id = 0
    num_frames = 0
    old_frame_index = 0
    old_frame = 0
    while True:
        ret, frame = video.read()
        if not ret:
            break
        
        # Skip 4 out of 5 frames
        frame_id = process_frame(frame, frame_id,mapping_frames)
        if(old_frame!= frame_id):
            final_frames.append([old_frame_index,num_frames-1])
            old_frame_index = num_frames
            

        old_frame = frame_id
        print(frame_id,num_frames)
        

        if (frame_id == 0):
            count += 1
        num_frames += 1
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    final_frames.append([old_frame_index,num_frames-1])
    video.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

0 0
0 1
0 2
0 3
0 4
0 5
0 6
0 7
0 8
0 9
0 10
0 11
0 12
0 13
0 14
0 15
0 16
0 17
0 18
0 19
0 20
0 21
0 22
0 23
0 24
0 25
0 26
0 27
0 28
0 29
0 30
0 31
0 32
0 33
0 34
0 35
0 36
0 37
0 38
0 39
0 40
0 41
1 42
2 43
3 44
4 45
5 46
6 47
7 48
8 49
9 50
9 51
9 52
9 53
9 54
9 55
9 56
9 57
9 58
10 59
10 60
10 61
11 62
11 63
11 64
11 65
12 66
12 67
12 68
12 69
12 70
12 71
12 72
12 73
12 74
12 75
12 76
12 77
12 78
12 79
12 80
12 81
12 82
12 83
12 84
12 85
12 86
12 87
12 88
12 89
12 90
12 91
12 92
12 93
12 94
12 95
12 96
13 97
14 98
15 99
16 100
17 101
17 102
17 103
17 104
17 105
17 106
17 107
17 108
17 109
17 110
17 111
17 112
17 113
17 114
17 115
17 116
17 117
17 118
17 119
17 120
17 121
17 122
17 123
17 124
17 125
17 126
17 127
17 128
17 129
17 130
17 131
17 132
17 133
17 134
17 135
17 136
17 137
17 138
17 139
17 140
17 141
17 142
17 143
17 144
17 145
17 146
17 147
17 148
17 149
17 150
17 151
17 152
17 153
17 154
17 155
17 156
17 157
17 158
17 159
17 160
17 161
17 162
17 163
17 164
17 165
17 166


In [61]:
len(mapping_frames)

290

In [62]:
X = np.array([d['trainer'] for d in mapping_frames])
Y = np.array([d['user'] for d in mapping_frames])
Y = np.array([[frame[key] for key in frame] for frame in Y])

In [63]:
print(X.shape)
print(Y.shape)

(290, 14, 3)
(290, 14, 3)


In [64]:
final_Y = []

for left, right in final_frames:
        frame_left = Y[left]
        frame_right = Y[right]
        # print(left)
        a = right-left+1
        # print(right)
        # print(a)
        if(a==1):
            final_Y.append(frame_left)
            continue
        for i in range(a):
            # print(a-i)
            final_Y.append(((a-i-1)*frame_left + i*frame_right)/(a-1))
        # print(right,len(final_Y),a)
Y = np.array(final_Y)

In [65]:
X[0][0]

array([0.46218392, 0.8383655 , 0.55697393])

In [66]:
import numpy as np

def convert_to_2d(frame):
    sliced_matrix = frame[:, :, :2]  # This will have the shape (40, 14, 2)
    reshaped_matrix = sliced_matrix.reshape(-1, 2)  # Reshape to (40*14, 2)
    return reshaped_matrix
X1 = convert_to_2d(X)
Y1 = convert_to_2d(Y)

In [67]:
# We will show positions to user based on trainer video. For that affine transformation from trainer to user should be done
import numpy as np
from scipy.optimize import least_squares

def affine_transformation(X, Y):
    n, m = X.shape

    def residuals(params):
        A = params[:m*m].reshape(m, m)
        b = params[m*m:]
        return (Y - (X @ A + b)).flatten()

    initial_guess = np.zeros((m * m) + m)
    result = least_squares(residuals, initial_guess)
    optimized_params = result.x
    A_opt = optimized_params[:m*m].reshape(m, m)
    b_opt = optimized_params[m*m:]
    
    return A_opt, b_opt

A_opt, b_opt = affine_transformation(X1, Y1)

print("Optimized A:")
print(A_opt)
print("Optimized b:")
print(b_opt)

Optimized A:
[[ 0.57771126 -0.04797943]
 [-0.04076296  0.92072509]]
Optimized b:
[ 0.17769757 -0.02764757]


In [68]:
def draw_lines_user(keypoints,frame,width,height):
    connections = [
        (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6),  # Head
        (6, 8), (8, 10), (5, 7), (7, 9),  # Arms
        (6, 12), (12, 14), (14, 16), (5, 11), (11, 13), (13, 15),  # Legs
        (11, 12)  
    ]

    for pt in connections:
        x,y = pt
        if(keypoints[x][2] > 0.3 and keypoints[y][2] > 0.3):
            x_a = int(keypoints[x][1]*width)
            y_a = int(keypoints[x][0]*height)
            x_b = int(keypoints[y][1]*width)
            y_b = int(keypoints[y][0]*height)
            cv2.line(frame, (x_a, y_a), (x_b, y_b), (0, 0, 255), 2)
            

In [69]:
def draw_lines_trainer(points, frame,width,height):
    connections = [
        (0, 1), (1,2), (1,3), (2,4), (4,6) , (3,5), (5,7), (1,8), (1,9), (8,10), (9,11), (10,12), (11,13)
    ]

    for pt in connections:
        x,y = pt
        x_a = int(points[x][1])
        y_a = int(points[x][0])
        x_b = int(points[y][1])
        y_b = int(points[y][0])
        cv2.line(frame, (x_a, y_a), (x_b, y_b), (0, 255, 0), 2)


In [70]:
print(X[10])
print(X[20])

[[0.46218392 0.8383655  0.55697393]
 [0.4362504  0.70156425 0.73102593]
 [0.44442138 0.7325984  0.79765177]
 [0.56914806 0.65825725 0.6244836 ]
 [0.58961695 0.67819065 0.58298576]
 [0.6766716  0.66571444 0.6589131 ]
 [0.7245861  0.7045313  0.8648901 ]
 [0.48684645 0.48715037 0.7160776 ]
 [0.49075055 0.48461437 0.6765288 ]
 [0.5681819  0.29134303 0.5311298 ]
 [0.5722823  0.29169473 0.850004  ]
 [0.61683524 0.10930017 0.7851962 ]
 [0.62781    0.08928271 0.76107085]
 [0.44033587 0.7170813  0.76433885]]
[[0.46218392 0.8383655  0.55697393]
 [0.4362504  0.70156425 0.73102593]
 [0.44442138 0.7325984  0.79765177]
 [0.56914806 0.65825725 0.6244836 ]
 [0.58961695 0.67819065 0.58298576]
 [0.6766716  0.66571444 0.6589131 ]
 [0.7245861  0.7045313  0.8648901 ]
 [0.48684645 0.48715037 0.7160776 ]
 [0.49075055 0.48461437 0.6765288 ]
 [0.5681819  0.29134303 0.5311298 ]
 [0.5722823  0.29169473 0.850004  ]
 [0.61683524 0.10930017 0.7851962 ]
 [0.62781    0.08928271 0.76107085]
 [0.44033587 0.7170813  0.7

In [72]:
video2 = cv2.VideoCapture(input_video)
index = 0
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4
out = cv2.VideoWriter('output_pushup.mp4', fourcc, 30.0, (int(video2.get(3)), int(video2.get(4))))
while True:
    ret, frame = video2.read()
    if not ret:
        break

    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = tf.convert_to_tensor(image, dtype=tf.int32)
    
    keypoints = get_data(image)
    height, width, _ = frame.shape
    points = []
    for keypoint in X[index]:
        y, x, confidence = keypoint
        # if confidence > 0.3:
        y_new, x_new = np.matmul(np.array([y, x]), A_opt) + b_opt
        x_new = int(x_new * width)
        y_new = int(y_new * height)
        cv2.circle(frame, (x_new, y_new), 5, (0, 255, 0), -1)
        points.append([y_new, x_new])
    points = np.array(points)
    draw_lines_user(keypoints,frame,width,height)
    draw_lines_trainer(points,frame,width,height)
    cv2.putText(frame, mapping_frames[index]['problem'], (50, 50), cv2.FONT_HERSHEY_SIMPLEX,0.7, (0, 0, 0), 2,)
    index += 1
    out.write(frame)
    cv2.imshow("Frame", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

video2.release()
out.release()
cv2.destroyAllWindows()