# Obtenção da base de dados

## 1. Instalação e importe de bibliotecas úteis

In [None]:
%pip install tensorflow==2.4.1 tensorflow-gpu==2.4.1 opencv-python mediapipe sklearn matplotlib pyrebase

In [1]:
import cv2
import os
import numpy as np
import mediapipe as mp
import pyrebase
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

## 2. Obtenção dos Pontos de Articulação por método Holístico pelo Mediapipe

In [2]:
mp_holistic = mp.solutions.holistic # Modelo holístico
mp_drawing = mp.solutions.drawing_utils # Desenhando o modelo

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Conversão das cores de Azul, Verde, Vermelho para Vermelho, Verde, Azul
    image.flags.writeable = False                  # Imagem não é mais gravável
    results = model.process(image)                 # Faz a predição da imagem
    image.flags.writeable = True                   # Imagem torna-se gravável
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Conversão das cores de Vermelho, Verde, Azul para Azul, Verde, Vermelho
    return image, results

In [4]:
def draw_landmarks(image, results):
    # Desenhando as conexões da rosto
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                            mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                            mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
    # Desenhando as conexões da postura
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=3),
                            mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))
    # Desenhando as conexões da mão esquerda
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=3),
                            mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1))  
    # Desenhando as conexões da mão direita
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color=(80,22,10), thickness=1, circle_radius=3),
                            mp_drawing.DrawingSpec(color=(80,44,121), thickness=1, circle_radius=1)) 

In [72]:
cap = cv2.VideoCapture(0)
# Definindo o modelo do Mediapipe
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Lendo o retorno da chamada
        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)

        # Desenhando landmarks
        draw_landmarks(image, results)

        # Mostrando na tela
        cv2.imshow('OpenCV feed', image)

        # Desligando a tela sem quebrar o código (aperte 'q para fechar')
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

## 3. Obtendo valor dos Pontos de Articulação

In [5]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    right_hand = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    left_hand = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    return np.concatenate([face, pose, right_hand, left_hand])

## 4. Definindo pastas de coleta de dados

In [6]:
# Sinais a serem detectados
actions = np.array(['OBRIGADO', 'OI', 'DESCULPAR', 'POR-FAVOR', 'ENTENDER'])

# Trinta videos a serem gravados
no_sequences = 20

# Os videos terão dez frames de duração
sequence_length = 22

In [7]:
config = {
    "apiKey": "AIzaSyBmb8AprkbeGvbUtJ7fXbFk305hHq8XHzM",
    "authDomain": "basedadoslibras.firebaseapp.com",
    "projectId": "basedadoslibras",
    "storageBucket": "basedadoslibras.appspot.com",
    "messagingSenderId": "546999484697",
    "appId": "1:546999484697:web:5ee14e4a19c9327e420588",
    "measurementId": "G-62ZQY36012",
    "databaseURL":"https://basedadoslibras-default-rtdb.firebaseio.com/"
}

firebase = pyrebase.initialize_app(config)
database = firebase.database()

## 5. Coletando Pontos de Articulação para teste e treinamento

In [9]:
cap = cv2.VideoCapture(0)
# Definindo o número da bateria
bateria = database.child('next_batery').get().val()
# Definindo o modelo do Mediapipe
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:

    # Repetição dos sinais
    for action in actions:
        # Repetição da quantidade de videos
        for sequence in range(no_sequences):
            # Repetição dos quadros a serem capturados
            for frame_num in range(sequence_length + 2):
                # Lendo o retorno da chamada
                ret, frame = cap.read()
                image, results = mediapipe_detection(frame, holistic)
                # Desenhando landmarks
                draw_landmarks(image, results)

                # Lógica da sequência de abertura da câmera
                if frame_num == 0 and sequence == 0:
                    cv2.putText(image, 'INICIANDO COLETA DO SINAL {}'.format(action), (10, 100),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA)
                    # Abrindo a tela
                    cv2.imshow('Tela OpenCV', image)
                    cv2.waitKey(5000)

                elif frame_num == 1:
                    cv2.putText(image, 'INICIANDO VIDEO NUMERO {}'.format(sequence + 1), (10, 100),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA)
                    # Abrindo a tela
                    cv2.imshow('Tela OpenCV', image)
                    cv2.waitKey(0)

                elif frame_num > 1:
                    cv2.putText(image, 'Coletando quadros para {} Video numero {}'.format(action, sequence + 1), (15,12),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    
                    # Abrindo a tela
                    cv2.imshow('Tela OpenCV', image)
                    # Salvando dados no Firebase            
                    keypoints = extract_keypoints(results)
                    database.child("Bateria "+str(bateria)).child(str(action)).child(str(sequence)).update({str(frame_num - 2):keypoints.tolist()})
                # Desligando a tela sem quebrar o código (aperte 'q para fechar')
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break             
    cap.release()
    cv2.destroyAllWindows()
    database.child('next_batery').set(bateria + 1)

## 6. Preprocessamento dos dados e criação das ferramentas e rótulos

In [10]:
#array de objetos classificadores
label_map = {label: num for num, label in enumerate(actions)}

# Definindo qual bateria será usada
bateria = [2]
batery = database.get().val()

In [11]:
sequences, labels = [],[]
for no_bateria in bateria:
    if('Bateria '+ str(no_bateria) in batery):
        for action in actions:
            for sequence in batery['Bateria '+ str(no_bateria)][action]:
                window = []
                for frame_num in sequence:
                    res = []
                    for i in frame_num:
                        res.append(float(i))
                    window.append(res)
                sequences.append(window)
                labels.append(label_map[action])
video = len(sequence)

# Processo de Teste

## 7. Separação dos dados para treino/teste

In [22]:
#Definindo dados para treino
X = np.array(sequences)

In [23]:
#Definindo labels para treino
Y = to_categorical(labels).astype(int)

In [24]:
#Dividindo dados para treino e teste
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25)

## 8. Construindo e treinando uma Rede Neural LSTM

In [26]:
#Salvando log do TensorBoard
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [33]:
#Configurando o modelo de previsão
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(video,1629)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [35]:
#Treinando o modelo
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.fit(X_train, Y_train, epochs=500, callbacks=[tb_callback])

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x1f1c6256220>

In [24]:
%reload_ext tensorboard

In [25]:
%tensorboard --logdir Logs

## 9. Validando o modelo usando Matriz de Confusão e Acuracidade

In [26]:
#Processo de teste do modelo
yhat = model.predict(X_train)



In [27]:
#Matriz de confusão
ytrue = np.argmax(Y_train, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
multilabel_confusion_matrix(ytrue, yhat)

array([[[70,  0],
        [ 0, 17]],

       [[69,  0],
        [ 0, 18]],

       [[70,  0],
        [ 0, 17]],

       [[69,  0],
        [ 0, 18]],

       [[70,  0],
        [ 0, 17]]], dtype=int64)

In [28]:
#Acurácia do modelo
accuracy_score(ytrue, yhat)

1.0

# Em Progresso

In [None]:
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0,85+num*40),
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

In [None]:
# 1. Novas variáveis de detecção
sequence = []
sentence = []
predictions = []
threshold = 0.7

cap = cv2.VideoCapture(0)
# Definindo o modelo do Mediapipe
with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Lendo o retorno da chamada
        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)

        # Desenhando landmarks
        draw_landmarks(image, results)
        
        # 2. lógica de previsão
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]
        
        if len(sequence) == 30:
            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            predictions.append(np.argmax(res))

        # 3. lógica de visualização
            if np.unique(predictions[-10:])[0] == np.argmax(res):
                if res[np.argmax(res)] > threshold:
                    if len(sentence) > 0:
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5:
                sentence = sentence[-5:]

            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30),
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Mostrando na tela
        cv2.imshow('OpenCV feed', image)

        # Desligando a tela sem quebrar o código (aperte 'q para fechar')
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()