# import and install

In [2]:
!pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib

Collecting tensorflow==2.4.1
  Using cached tensorflow-2.4.1-cp37-cp37m-win_amd64.whl (370.7 MB)
Collecting tensorflow-gpu==2.4.1
  Using cached tensorflow_gpu-2.4.1-cp37-cp37m-win_amd64.whl (370.7 MB)
Collecting sklearn
  Using cached sklearn-0.0.tar.gz (1.1 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting keras-preprocessing~=1.1.2
  Using cached Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
Collecting opt-einsum~=3.3.0
  Using cached opt_einsum-3.3.0-py3-none-any.whl (65 kB)
Collecting tensorboard~=2.4
  Using cached tensorboard-2.8.0-py3-none-any.whl (5.8 MB)
Collecting grpcio~=1.32.0
  Using cached grpcio-1.32.0-cp37-cp37m-win_amd64.whl (2.5 MB)
Collecting h5py~=2.10.0
  Using cached h5py-2.10.0-cp37-cp37m-win_amd64.whl (2.5 MB)
Collecting astunparse~=1.6.3
  Using cached astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting gast==0.3.3
  Using cached gast-0.3.3-py2.py3-none-any.whl (9.7 kB)
Collecting

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

# key points using MP holistic

In [29]:
mp_holistic = mp.solutions.holistic #holistic model
mp_drawing = mp.solutions.drawing_utils #drawing utilites

In [30]:
def mediapipe_detection(image,model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #BGR 2 RGB
    image.flags.writeable = False                 #image is no longer writeable
    results = model.process(image)                 #make prediction
    image.flags.writeable = True                  #image is writeable
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR) #RGB 2 BGR
    return image,results

In [4]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) #draw pose connections

In [5]:
def draw_styled_landmarks(image,results):
    mp_drawing.draw_landmarks(image,results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circule_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121),thickness=1, circle_radius=1)) #draw pose by color

In [2]:
cap = cv2.VideoCapture(0)
#set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        #Read feed
        ret,frame = cap.read()

        #make detection
        image , results  = mediapipe_detection(frame,holistic)
        draw_landmarks(image,results)

        #show to screen
        cv2.imshow('OpenCV Feed',image)
        #breaking
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

NameError: name 'cv2' is not defined

In [6]:
len(results.face_landmarks.landmark) #x,y,z x위치 y 위치 z는 카메라와의 거리

NameError: name 'results' is not defined

# 3 extract values

In [31]:
results.pose_landmarks.landmark[0].visibility

0.9971548318862915

In [32]:
pose = np.array([[res.x, res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)

In [50]:
def extract_keypoints(results):
    pose =[]
    for res in results.pose_landmarks.landmark:
        test = np.array([res.x, res.y,res.z,res.visibility])
        pose.append(test)
    return pose

In [51]:
pose=extract_keypoints(results)

In [53]:
pose

[array([ 0.53656113,  0.7451185 , -1.49604678,  0.99837661]),
 array([ 0.56787074,  0.68525219, -1.42949486,  0.99658793]),
 array([ 0.5831219 ,  0.68845671, -1.42999613,  0.99712366]),
 array([ 0.59866965,  0.69232386, -1.43029273,  0.99601716]),
 array([ 0.51497608,  0.67834687, -1.43897653,  0.99748147]),
 array([ 0.49484119,  0.6779837 , -1.43881989,  0.99823433]),
 array([ 0.47777629,  0.67916757, -1.43959212,  0.9983272 ]),
 array([ 0.62370616,  0.71793455, -0.93928319,  0.99641418]),
 array([ 0.45044932,  0.70325029, -0.96907932,  0.99902034]),
 array([ 0.56590021,  0.81904727, -1.30181575,  0.99842185]),
 array([ 0.49593005,  0.81701463, -1.31201887,  0.99935794]),
 array([ 0.75011593,  1.00130415, -0.55053008,  0.98427594]),
 array([ 0.28863496,  0.99323213, -0.59569877,  0.99681717]),
 array([ 0.85251075,  1.36948764, -0.56127161,  0.11930533]),
 array([ 0.15906475,  1.37286651, -0.47421733,  0.4663035 ]),
 array([ 0.81544101,  1.65848732, -1.11137509,  0.01474966]),
 array([

# setup folder

In [36]:
# path for exproted data
DATA_PATH = os.path.join("MP_DATA")

#Action that we try to detect
actions = np.array(['good','bad'])

#thirty videos worth of data
no_sequences = 5

#videos are goint to be 30 frames in length
sequence_length =5

In [34]:
#good 
## 0
## 1
## 2
## 29

#bad
#neutral

In [37]:
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass


# 5 collect keypoint for trainging

In [54]:
cap = cv2.VideoCapture(0)
#set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    #Loop through actions
    for action in actions:
        #LOOP through sequneces aka videos
        for sequence in range(no_sequences):
            #Loop through video length aka sequnece length
            for frame_num in range(sequence_length):
                
                #Read feed
                ret,frame = cap.read()

                #make detection
                image , results  = mediapipe_detection(frame,holistic)
                draw_landmarks(image,results)
                
                #Apply collection logic
                if frame_num == 0:
                    cv2.putText(image, 'STARTING COLLECTION', (120,200),
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 4, cv2.LINE_AA)
                    
                    cv2.putText(image, 'Collectiong frames for {} Video Number {}'.format(action,sequence), (15,12),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 4, cv2.LINE_AA)
                    #show to screen
                    cv2.imshow('OpenCV Feed',image)
                    cv2.waitKey(1000)
                else:
                    cv2.putText(image, 'Collectiong frames for {} Video Number {}'.format(action,sequence), (15,12),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 4,cv2.LINE_AA)
                    
                    #show to screen
                    cv2.imshow('OpenCV Feed',image)
                  
                
                #new export keypoints
                keypoints = extract_keypoints(results)
                npy_path=os.path.join(DATA_PATH, action, str(sequence),str(frame_num))
                np.save(npy_path,keypoints)
                
                #breaking
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

[array([ 0.55916846,  0.57386929, -1.22183239,  0.99828553]),
 array([ 0.58085144,  0.50923181, -1.14988017,  0.99599141]),
 array([ 0.59723675,  0.51158726, -1.15024006,  0.99582827]),
 array([ 0.6128546 ,  0.51444262, -1.15050447,  0.99462384]),
 array([ 0.52914441,  0.50459737, -1.174793  ,  0.99745184]),
 array([ 0.50673568,  0.50431776, -1.17438376,  0.99802428]),
 array([ 0.48643789,  0.50545496, -1.17491257,  0.99817407]),
 array([ 0.62946695,  0.54131246, -0.66639495,  0.99438745]),
 array([ 0.4540351 ,  0.5351789 , -0.76584423,  0.99873275]),
 array([ 0.583143  ,  0.64348775, -1.03637052,  0.99831235]),
 array([ 0.51802784,  0.64381051, -1.06507671,  0.99931544]),
 array([ 0.74665773,  0.86156017, -0.29812184,  0.99083537]),
 array([ 0.27407104,  0.85421616, -0.48251671,  0.99575716])]

AttributeError: module 'cv2' has no attribute 'destroyAllwindows'

# 6. preprocess data and create labels

In [55]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [56]:
label_map = {label:num for num, label in enumerate(actions)}

In [57]:
label_map

{'good': 0, 'bad': 1}

In [58]:
sequences , labels = [],[]
for action in actions:
    for sequence in range(no_sequences):
        window=[]
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH,action,str(sequence),"{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

In [59]:
np.array(sequences).shape

(10, 5, 33, 4)

In [44]:
res

array([[ 5.36561131e-01,  7.45118499e-01, -1.49604678e+00,
         9.98376608e-01],
       [ 5.67870736e-01,  6.85252190e-01, -1.42949486e+00,
         9.96587932e-01],
       [ 5.83121896e-01,  6.88456714e-01, -1.42999613e+00,
         9.97123659e-01],
       [ 5.98669648e-01,  6.92323864e-01, -1.43029273e+00,
         9.96017158e-01],
       [ 5.14976084e-01,  6.78346872e-01, -1.43897653e+00,
         9.97481465e-01],
       [ 4.94841188e-01,  6.77983701e-01, -1.43881989e+00,
         9.98234332e-01],
       [ 4.77776289e-01,  6.79167569e-01, -1.43959212e+00,
         9.98327196e-01],
       [ 6.23706162e-01,  7.17934549e-01, -9.39283192e-01,
         9.96414185e-01],
       [ 4.50449318e-01,  7.03250289e-01, -9.69079316e-01,
         9.99020338e-01],
       [ 5.65900207e-01,  8.19047272e-01, -1.30181575e+00,
         9.98421848e-01],
       [ 4.95930046e-01,  8.17014635e-01, -1.31201887e+00,
         9.99357939e-01],
       [ 7.50115931e-01,  1.00130415e+00, -5.50530076e-01,
      

In [45]:
window

[array([[ 5.35358429e-01,  7.45127976e-01, -1.47331214e+00,
          9.98210967e-01],
        [ 5.67480445e-01,  6.84872270e-01, -1.40812564e+00,
          9.96309817e-01],
        [ 5.82838893e-01,  6.88144386e-01, -1.40862322e+00,
          9.96847570e-01],
        [ 5.98571658e-01,  6.92052662e-01, -1.40893805e+00,
          9.95696366e-01],
        [ 5.14190733e-01,  6.77870333e-01, -1.41890109e+00,
          9.97293115e-01],
        [ 4.94243532e-01,  6.77543044e-01, -1.41870046e+00,
          9.98083830e-01],
        [ 4.77381766e-01,  6.78757012e-01, -1.41948318e+00,
          9.98204052e-01],
        [ 6.24204278e-01,  7.17799246e-01, -9.18662071e-01,
          9.96033788e-01],
        [ 4.50329006e-01,  7.03283310e-01, -9.51008677e-01,
          9.98920679e-01],
        [ 5.65617025e-01,  8.18906963e-01, -1.27786648e+00,
          9.98231530e-01],
        [ 4.95092094e-01,  8.16922784e-01, -1.28885162e+00,
          9.99282897e-01],
        [ 7.50134110e-01,  1.00094295e+00, 

In [60]:
X = np.array(sequences)

In [61]:
X.shape

(10, 5, 33, 4)

In [62]:
y= to_categorical(labels).astype(int)

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.05)

In [64]:
y_test.shape

(1, 2)

In [65]:
sequences

[[array([[ 5.34657836e-01,  7.29303241e-01, -1.11519659e+00,
           9.99452531e-01],
         [ 5.55056393e-01,  6.74135327e-01, -1.03532696e+00,
           9.98560369e-01],
         [ 5.68733573e-01,  6.77294016e-01, -1.03553247e+00,
           9.98968005e-01],
         [ 5.83199620e-01,  6.81319654e-01, -1.03562689e+00,
           9.98482645e-01],
         [ 5.07322311e-01,  6.70563340e-01, -1.06116080e+00,
           9.98549521e-01],
         [ 4.88752753e-01,  6.70688391e-01, -1.06047702e+00,
           9.99078631e-01],
         [ 4.70929235e-01,  6.71933055e-01, -1.06109297e+00,
           9.98869956e-01],
         [ 6.00955307e-01,  7.15546966e-01, -5.31163037e-01,
           9.98440683e-01],
         [ 4.33426201e-01,  7.14551210e-01, -6.26835942e-01,
           9.99362648e-01],
         [ 5.64693332e-01,  8.01399589e-01, -9.24991429e-01,
           9.99065816e-01],
         [ 4.91126448e-01,  8.04411471e-01, -9.52795148e-01,
           9.99305367e-01],
         [ 7.40172446

# 7 Build and train LSTM Neural Network

In [66]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [67]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [68]:
model = Sequential()
model.add(LSTM(64,return_sequences=True,activation='relu',input_shape=(30,132)))
model.add(LSTM(128,return_sequences=True,activation='relu'))
model.add(LSTM(64,return_sequences=False,activation='relu'))

model.add(Dense(64,activation='relu'))
model.add(Dense(64,activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [70]:
res=[0.7,0.2,0.1]

In [72]:
actions[np.argmax(res)]

'good'

In [150]:
#왜 이러한 구조로 구성하였나? -> 
#1. 적은 양의 데이터만 사용할 예정이고
#2. 빠르게 학습시킬 수 있다는 장점과
#3. 실시간으로 평가를 빠르게 내려줄 수 있기 때문입니다.

In [73]:
model.compile(optimizer='Adam',loss='categorical_crossentropy',metrics=['categorical_accuracy'])
#crossentropy -> 수치로 표시하기에 유리한 방식으로 출력해주기 때문이다.

In [77]:
model.fit(X_train,y_train,epochs=5,callbacks=[tb_callback])

Epoch 1/5


ValueError: in user code:

    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step  **
        outputs = model.train_step(data)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:754 train_step
        y_pred = self(x, training=True)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:223 assert_input_compatibility
        str(tuple(shape)))

    ValueError: Input 0 of layer sequential_2 is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 5, 33, 4)


In [166]:
#cmd tensorboard --logdir=. 을 통해서 log와 LSTM 레이어가 어떻게 학습되는지 관찰할 수 있다.

# 8. make predictions

In [78]:
actions[np.argmax(res[0])]

'good'

In [79]:
actions[np.argmax(y_test[0])]

'bad'

# 9. save model

In [80]:
model.save('action.h5')

In [81]:
model.load_weights('action.h5')

# 10. Evaluation using confusion matrix and accuracy

In [82]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [85]:
yhat = model.predict(X_train)

ValueError: in user code:

    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:1478 predict_function  *
        return step_function(self, iterator)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:1468 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:1461 run_step  **
        outputs = model.predict_step(data)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\training.py:1434 predict_step
        return self(x, training=False)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    c:\users\82102\appdata\local\programs\python\python37\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:223 assert_input_compatibility
        str(tuple(shape)))

    ValueError: Input 0 of layer sequential_2 is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 5, 33, 4)


In [86]:
ytrue = np.argmax(y_train, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

NameError: name 'yhat' is not defined

In [521]:
multilabel_confusion_matrix(ytrue,yhat)

array([[[29,  0],
        [ 0, 28]],

       [[28,  0],
        [ 0, 29]]], dtype=int64)

In [522]:
accuracy_score(ytrue,yhat)

1.0

# 11. Test in Real Time

In [87]:
colors = [(245,117,16),(117,245,16),(16,117,245)]
def prob_viz(res,actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100),90+num*40),colors[num],-1)
        cv2.putText(output_frame,actions[num],(0,85+num*40),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2,cv2.LINE_AA)
    return output_frame

In [524]:
plt.imshow(prob_viz(res,actions,image,colors))

TypeError: only size-1 arrays can be converted to Python scalars

In [24]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
#1 Net detection variables

sequence = []
sentence = []
predictions=[]
threshold = 0.4

cap = cv2.VideoCapture(0)
#set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        #Read feed
        ret,frame = cap.read()

        #make detection
        image , results  = mediapipe_detection(frame,holistic)
        
        #draw_landmark
        draw_landmarks(image,results)

        #2. prediction logic
        keypoints = extract_keypoints(results)
        sequence.insert(0,keypoints)
        sequence = sequence[:30]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            predictions.append(np.argmax(res))
        
        #3. vizs logic
        if res[np.argmax(res)] > threshold:
            if len(sentence) > 0:
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
            else:
                sentence.append(actions[np.argmax(res)])
                    
        if len(sentence)>5:
            sentence = sentence[-5:]
            
        #viz
        image = prob_viz(res,actions,image,colors)
        cv2.rectangle(image, (0,0),(640,40), (245,117,16),-1)
        cv2.putText(image, ' '.join(sentence),(3,30), cv2.FONT_HERSHEY_SIMPLEX,1,
                   (255,255,255),2,cv2.LINE_AA)
        
        
        
        #show to screen
        cv2.imshow('OpenCV Feed',image)
        #breaking
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

IndexError: index 34 is out of bounds for axis 0 with size 13

In [631]:
np.unique(predictions[-15:])[0]==np.argmax(res)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [632]:
res

array([0.52529347, 0.47470647], dtype=float32)

In [596]:
np.unique(predictions[-15:])[0] == np.argmax(res)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [591]:
res = model.predict(np.expand_dims(sequence, axis=0))[0]



InvalidArgumentError:  Matrix size-incompatible: In[0]: [1,2004], In[1]: [132,256]
	 [[{{node sequential_12/lstm_36/while/body/_1/sequential_12/lstm_36/while/lstm_cell_36/MatMul}}]] [Op:__inference_predict_function_339149]

Function call stack:
predict_function


In [592]:
predictions

[]

In [608]:
res

array([0.52529347, 0.47470647], dtype=float32)

In [611]:
sequence

[array([ 0.53916377,  0.54401648, -1.58886135, ...,  0.47404408,
         0.03362728,  0.        ])]

In [630]:
len(sequence)

30