# import and install

In [1]:
!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib



In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# key points using MP holistic

In [3]:
mp_holistic = mp.solutions.holistic #holistic model
mp_drawing = mp.solutions.drawing_utils #drawing utilites

In [4]:
def mediapipe_detection(image,model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #BGR 2 RGB
    image.flags.writeable = False                 #image is no longer writeable
    results = model.process(image)                 #make prediction
    image.flags.writeable = True                  #image is writeable
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) #RGB 2 BGR
    return image,results

In [5]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) #draw pose connections

# 3 extract values

In [10]:
results.pose_landmarks.landmark[0].visibility

NameError: name 'results' is not defined

In [6]:
def extract_keypoints(results):
    pose =[]
    count=0
    for res in results.pose_landmarks.landmark:
        if(res.visibility>0.9 and (count==0 or count==11 or count==12)):
            test = np.array([res.x, res.y,res.z])
            pose.append(test)
        count+=1
    pose = np.array(pose).flatten()
    return pose

In [131]:
def extract_badkeypoints(results):
    pose =[]
    pose2 =[]
    pose3 =[]
    pose4 =[]
    count=0
    for res in results:
        if(res>0):
            v=res-0.2
            v2=res+0.2
            pose.append(v)
            pose2.append(v2)
        else:
            pose.append(res)
            pose2.append(res)
    pose3=(results[0]+0.2,results[1]-0.2,results[2],results[3]+0.2,results[4]-0.2,results[5],results[6]+0.2,results[7]-0.2
                ,results[8])
    pose4=(results[0]-0.2,results[1]+0.2,results[2],results[3]-0.2,results[4]+0.2,results[5],results[6]-0.2,results[7]+0.2
                ,results[8])
    pose5=(results[0]-0.1,results[1],results[2],results[3]-0.1,results[4],results[5],results[6]-0.1,results[7]
                ,results[8])
        
    pose = np.array(pose).flatten()
    pose2 = np.array(pose2).flatten()
    pose3 = np.array(pose3).flatten()
    pose4 = np.array(pose4).flatten()
    pose5 = np.array(pose5).flatten()
    return pose, pose2, pose3, pose4, pose5

# setup folder

In [132]:
# path for exproted data
DATA_PATH = os.path.join("MP_DATA")

#Action that we try to detect
actions = np.array(['good','bad1','bad2','bad3','bad4','bad5'])

#thirty videos worth of data
no_sequences = 3

#videos are goint to be 30 frames in length
sequence_length =10

In [133]:
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass


# 5 collect keypoint for trainging

In [135]:
cap = cv2.VideoCapture(0)
#set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    #LOOP through sequneces aka videos
    for sequence in range(no_sequences):
        #Loop through video length aka sequnece length
        for frame_num in range(sequence_length):
                
            #Read feed
            ret,frame = cap.read()

            #make detection
            image , results  = mediapipe_detection(frame,holistic)
            draw_landmarks(image,results)
                
            #Apply collection logic
            if frame_num == 0:
                cv2.putText(image, 'STARTING COLLECTION', (120,200),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 4, cv2.LINE_AA)
                    
                cv2.putText(image, 'Collectiong frames for {} Video Number {}'.format(action,sequence), (15,12),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 4, cv2.LINE_AA)
                #show to screen
                cv2.imshow('OpenCV Feed',image)
                cv2.waitKey(1000)
            else:
                cv2.putText(image, 'Collectiong frames for {} Video Number {}'.format(action,sequence), (15,12),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 4,cv2.LINE_AA)
                    
                #show to screen
                cv2.imshow('OpenCV Feed',image)
                  
                
            #new export keypoints
            keypoints = extract_keypoints(results)
            npy_path=os.path.join(DATA_PATH, 'good', str(sequence),str(frame_num))
            np.save(npy_path,keypoints)
            badkeypoints1,badkeypoints2, badkeypoints3, badkeypoints4,badkeypoints5 = extract_badkeypoints(keypoints)
            npy_path=os.path.join(DATA_PATH, 'bad1', str(sequence),str(frame_num))
            np.save(npy_path,badkeypoints1)
            npy_path=os.path.join(DATA_PATH, 'bad2', str(sequence),str(frame_num))
            np.save(npy_path,badkeypoints2)
            npy_path=os.path.join(DATA_PATH, 'bad3', str(sequence),str(frame_num))
            np.save(npy_path,badkeypoints3)
            npy_path=os.path.join(DATA_PATH, 'bad4', str(sequence),str(frame_num))
            np.save(npy_path,badkeypoints4)
            npy_path=os.path.join(DATA_PATH, 'bad5', str(sequence),str(frame_num))
            np.save(npy_path,badkeypoints5)
            
                
            #breaking
            if cv2.waitKey(10) & 0xFF == ord('q'):
                break
                    
    cap.release()
    cv2.destroyAllWindows()

In [91]:
keypoints

array([ 0.60382783,  0.53910089, -1.53915215,  0.89506209,  0.86493379,
       -0.55764329,  0.31095186,  0.86760604, -0.60566676])

In [96]:
badkeypoints4

array([ 0.40382783,  0.73910089, -1.53915215,  0.69506209,  1.06493379,
       -0.55764329,  0.11095186,  1.06760604, -0.60566676])

# 6. preprocess data and create labels

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [137]:
label_map = {label:num for num, label in enumerate(actions)}

In [138]:
label_map

{'good': 0, 'bad1': 1, 'bad2': 2, 'bad3': 3, 'bad4': 4, 'bad5': 5}

In [139]:
sequences , labels = [],[]
for action in actions:
    for sequence in range(no_sequences):
        window=[]
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH,action,str(sequence),"{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [140]:
np.array(sequences).shape
#행렬의 길이가 다르면 warning을 발생시킴.

(18, 10, 9)

In [141]:
X = np.array(sequences)

[[array([ 0.61077142,  0.61884946, -1.65579736,  0.85848218,  0.87080282,
         -0.76630515,  0.32945901,  0.87804246, -0.81908739]),
  array([ 0.60810083,  0.6187104 , -1.51928365,  0.85850841,  0.86740339,
         -0.62158251,  0.33106768,  0.87476516, -0.67214793]),
  array([ 0.60458076,  0.61153942, -1.45579123,  0.8586728 ,  0.86573029,
         -0.58174711,  0.33144146,  0.87270707, -0.64277059]),
  array([ 0.6044656 ,  0.61164582, -1.50811946,  0.85871065,  0.86493421,
         -0.63410944,  0.33217633,  0.87190628, -0.69114423]),
  array([ 0.60400045,  0.61180979, -1.53994823,  0.8586669 ,  0.86474258,
         -0.64868516,  0.3324315 ,  0.87105006, -0.70091105]),
  array([ 0.60337842,  0.60794854, -1.55424118,  0.85867643,  0.86474556,
         -0.65757477,  0.33244297,  0.87023646, -0.71300972]),
  array([ 0.60355508,  0.60729259, -1.56336439,  0.85862207,  0.86473012,
         -0.65399331,  0.33244392,  0.87014467, -0.71790183]),
  array([ 0.60489058,  0.60740966, -1.624

In [142]:
X.shape

(18, 10, 9)

In [143]:
y= to_categorical(labels).astype(int)

In [144]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.05)

In [145]:
y_test

array([[0, 0, 0, 0, 0, 1]])

# 7 Build and train LSTM Neural Network

In [146]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [147]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [148]:
model = Sequential()
model.add(LSTM(64,return_sequences=True,activation='relu',input_shape=(10,9)))
model.add(LSTM(128,return_sequences=True,activation='relu'))
model.add(LSTM(64,return_sequences=False,activation='relu'))

model.add(Dense(64,activation='relu'))
model.add(Dense(64,activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [149]:
res=[0.7,0.2,0.1]

In [150]:
actions[np.argmax(res)]

'good'

In [150]:
#왜 이러한 구조로 구성하였나? -> 
#1. 적은 양의 데이터만 사용할 예정이고
#2. 빠르게 학습시킬 수 있다는 장점과
#3. 실시간으로 평가를 빠르게 내려줄 수 있기 때문입니다.

In [151]:
model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['categorical_accuracy'])
#crossentropy -> 수치로 표시하기에 유리한 방식으로 출력해주기 때문이다.

 model.fit(X_train,y_train,epochs=100,callbacks=[tb_callback])

In [152]:
model.fit(X_train,y_train,epochs=100,callbacks=[tb_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x212ce3c69c8>

In [166]:
#cmd tensorboard --logdir=. 을 통해서 log와 LSTM 레이어가 어떻게 학습되는지 관찰할 수 있다.

# 8. make predictions

[0.7, 0.2, 0.1]

In [153]:
actions[np.argmax(res[0])]

'good'

In [154]:
actions[np.argmax(y_test[0])]

'bad5'

# 9. save model

In [155]:
model.save('action.h5')

In [156]:
model.load_weights('action.h5')

# 10. Evaluation using confusion matrix and accuracy

In [157]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [158]:
yhat = model.predict(X_train)

In [159]:
ytrue = np.argmax(y_train, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [160]:
multilabel_confusion_matrix(ytrue,yhat)

array([[[14,  0],
        [ 0,  3]],

       [[14,  0],
        [ 0,  3]],

       [[14,  0],
        [ 0,  3]],

       [[14,  0],
        [ 0,  3]],

       [[14,  0],
        [ 0,  3]],

       [[15,  0],
        [ 0,  2]]], dtype=int64)

In [122]:
accuracy_score(ytrue,yhat)

1.0

# 11. Test in Real Time

In [161]:
colors = [(245,117,16),(117,245,16),(16,117,245),(15,116,15),(255,116,15),(255,255,255)]
def prob_viz(res,actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100),90+num*40),colors[num],-1)
        cv2.putText(output_frame,actions[num],(0,85+num*40),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2,cv2.LINE_AA)
    return output_frame

In [125]:
res

[0.7, 0.2, 0.1]

In [162]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
#1 Net detection variables

sequence = []
sentence = []
predictions=[]
threshold = 0.4

cap = cv2.VideoCapture(0)
#set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        #Read feed
        ret,frame = cap.read()

        #make detection
        image , results  = mediapipe_detection(frame,holistic)
        
        #draw_landmark
        draw_landmarks(image,results)

        #2. prediction logic
        keypoints = extract_keypoints(results)
        sequence.insert(0,keypoints)
        sequence = sequence[:10]
        
        if len(sequence) == 10:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            predictions.append(np.argmax(res))
        
        #3. vizs logic - 0.4보다 큰 수치를 가졌을 경우에 상태가 바뀌면 바뀐 상태로, 안바뀌면 안바뀐 상태로 
        if res[np.argmax(res)] > threshold:
            if len(sentence) > 0:
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
            else:
                sentence.append(actions[np.argmax(res)])
                    
        if len(sentence)>5:
            sentence = sentence[-5:]
            
        #viz
        image = prob_viz(res,actions,image,colors)
        cv2.rectangle(image, (0,0),(640,40), (245,117,16),-1)
        cv2.putText(image, ' '.join(sentence),(3,30), cv2.FONT_HERSHEY_SIMPLEX,1,
                   (255,255,255),2,cv2.LINE_AA)
        
        #show to screen
        cv2.imshow('OpenCV Feed',image)
        #breaking
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

In [125]:
np.expand_dims(sequence, axis=0)

array([[[ 0.77106994,  0.55455083, -1.57093263,  0.98291284,
          0.89581984, -0.51946706,  0.42568725,  0.88737643,
         -0.68295014],
        [ 0.77108729,  0.55449498, -1.56668329,  0.98290873,
          0.89579952, -0.51899773,  0.42568731,  0.88765031,
         -0.67366982],
        [ 0.77117032,  0.55454916, -1.56097162,  0.98290884,
          0.89563698, -0.51355469,  0.42567593,  0.88780922,
         -0.67357385],
        [ 0.77136815,  0.55455738, -1.5634712 ,  0.98287535,
          0.89549625, -0.50800848,  0.42566076,  0.88842493,
         -0.67496264],
        [ 0.77194899,  0.55453861, -1.5686568 ,  0.98280782,
          0.89544404, -0.51559126,  0.42564774,  0.88887411,
         -0.67585647],
        [ 0.77332622,  0.55454516, -1.59330094,  0.98279178,
          0.89537585, -0.52319878,  0.42588633,  0.88931233,
         -0.69508219],
        [ 0.77441746,  0.55455977, -1.56486261,  0.9828105 ,
          0.89556354, -0.50235397,  0.42590156,  0.88934797,
        

In [129]:
if( model.predict(np.expand_dims(sequence, axis=0)) 

SyntaxError: invalid syntax (1196282161.py, line 1)

In [127]:
res

array([1.], dtype=float32)

In [111]:
sequence

[array([ 0.50510514,  0.57523799, -1.33585072,  0.8504442 ,  0.84953171,
        -0.5108431 ,  0.31640014,  0.88596261, -0.45764357]),
 array([ 0.28994605,  0.59063625, -0.94232875,  0.70112467,  0.81348842,
        -0.48816568,  0.21541998,  0.90519696, -0.13850515]),
 array([ 0.18531999,  0.62626845, -0.88276702,  0.61158442,  0.78733104,
        -0.49978966,  0.13356446,  0.91602474, -0.10879368]),
 array([ 0.12598976,  0.65510923, -0.63006675,  0.56234843,  0.77665555,
        -0.47775126,  0.08958142,  0.93116921, -0.03752357]),
 array([ 0.07712992,  0.66336399, -0.6016711 ,  0.5293985 ,  0.76835257,
        -0.55867636,  0.06525694,  0.94813412, -0.07443521]),
 array([ 0.02917137,  0.68252313, -0.72592342,  0.494445  ,  0.76066649,
        -0.74803966,  0.04498962,  0.94270498, -0.07351524]),
 array([-0.03962031,  0.73452175, -0.81641519,  0.45952097,  0.7645824 ,
        -0.9177413 , -0.01852623,  0.94218159, -0.11136822]),
 array([-0.05298466,  0.76346779, -0.81368703,  0.43507

In [80]:
x=np.array([3,4])

In [83]:
x.shape

(2,)

In [94]:
y=np.expand_dims(keypoints,axis=0)

In [95]:
y.shape

(1, 9)

In [96]:
y

array([[ 0.55471349,  0.59935188, -1.14001048,  0.85744655,  0.86038339,
        -0.41185656,  0.33047742,  0.87098163, -0.41079435]])