# Configurações Gerais

## Instalação das dependências necessárias

In [1]:
%pip install -q mediapipe tensorflow matplotlib pandas numpy sklearn

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\madan\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


## Importação de um modelo de reconhecimento das mãos

In [2]:
%wget -q https://storage.googleapis.com/mediapipe-models/gesture_recognizer/gesture_recognizer/float16/1/gesture_recognizer.task

UsageError: Line magic function `%wget` not found.


In [3]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2
import os
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# UTILS

## Delete folder

In [4]:
import shutil

def delete_folder(folder_path):
    """
    Exclui uma pasta e todo o seu conteúdo.

    :param folder_path: O caminho para a pasta que você deseja excluir.
    """
    try:
        shutil.rmtree(folder_path)
        print("Pasta excluída com sucesso!")
    except Exception as e:
        print(f"Ocorreu um erro ao excluir a pasta: {e}")



## Unzip files

In [5]:
import zipfile
import os

def unzip_file(zip_path, extract_to):
    """
    Extrai um arquivo zip para uma pasta específica.

    :param zip_path: O caminho para o arquivo zip que você deseja extrair.
    :param extract_to: O caminho da pasta onde deseja extrair o conteúdo do arquivo zip.
    """
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print("Arquivo zip extraído com sucesso!")


## Zip Files

In [6]:
import zipfile
import os

def zip_folder(folder_path, output_path):
    """
    Comprime uma pasta para um arquivo zip.

    :param folder_path: O caminho para a pasta que você deseja comprimir.
    :param output_path: O caminho onde o arquivo zip será salvo.
    """
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, folder_path))




## Desenhar landmarks

In [7]:
def draw_landmarks(image, landmarks, color):

  for landmark in landmarks:
    height, width, _ = image.shape
    cx, cy = int(landmark.x * width), int(landmark.y * height)
    cv2.circle(image, (cx, cy), 5, color, cv2.FILLED)

## Formatação dos keyframes


In [8]:
def format_landmarks(landmarks):
  formatted_landmarks = []
  for hands_landmarks in landmarks:
    hand_landmarks = []
    for landmark in hands_landmarks:
      hand_landmarks.append([landmark.x, landmark.y, landmark.z])
  formatted_landmarks.append(np.concatenate(hand_landmarks))

  return np.concatenate(formatted_landmarks)


# Salvar dados (array)

## Importação dos vídeos do drive

In [None]:
from google.colab import drive
drive.mount('/content/drive/')


Mounted at /content/drive/


In [16]:
%unzip './videos.zip'

UsageError: Line magic function `%unzip` not found.


## Processamento de vídeo

In [9]:
def show_frame(frame, hands_landmarks):
  colors = [(0, 255, 0), (0, 0, 255)]


  for idx, hand_landmark in enumerate(hands_landmarks):
    draw_landmarks(frame, hand_landmark, colors[idx])

  # cv2_imshow(frame)
  cv2.waitKey(0)
  cv2.destroyAllWindows()

In [18]:
def process_video(video_path, label, sequence):
  base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task')
  options = vision.GestureRecognizerOptions(base_options=base_options, num_hands=2)
  recognizer = vision.GestureRecognizer.create_from_options(options)

  cap = cv2.VideoCapture(video_path)

  video_frames = []
  num_frames = 30
  video_num_frames = 0

  keyframes = []

  while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)

        recognition_result = recognizer.recognize(image)
        npy_path = os.path.join('./np', label, str(sequence), str((video_num_frames)))
        np.save(npy_path, recognition_result.hand_landmarks)
        video_num_frames += 1


        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

  cap.release()
  cv2.destroyAllWindows()
  return keyframes

## Montar base de dados



In [20]:
directory_path = './videos'
directory_files = os.listdir(directory_path)
labels = np.array(directory_files)  
label_map = {label:num for num, label in enumerate(labels)}
label_map


{'acontecer': 0,
 'aluno': 1,
 'amarelo': 2,
 'america': 3,
 'aproveirar': 4,
 'bala': 5,
 'banco': 6,
 'banheiro': 7}

In [24]:
# Recria a pasta np
folder_name = './np'
delete_folder(folder_name)
os.mkdir(folder_name)

Pasta excluída com sucesso!


In [25]:
for label in labels:
    for sequence in range(10): #trocar isso por uma variável que simboliza o número de vídeos
        try:
            os.makedirs(os.path.join(folder_name, label, str(sequence)))
        except:
            pass

In [26]:
# Popula array do numpy
for idx, folder_path in enumerate(labels):
  base_path = directory_path + '/'+folder_path
  files = os.listdir(base_path)
  for file_index in range(len(files)):
    file_name = files[file_index]
    video_frames = process_video(base_path + '/'+ file_name, folder_path, file_index)

In [27]:
folder_to_zip = './np'
output_zip_path = './np.zip'
zip_folder(folder_to_zip, output_zip_path)
print("Pasta comprimida com sucesso!")

Pasta comprimida com sucesso!


In [None]:
delete_folder(folder_to_zip)

Pasta excluída com sucesso!


# Processar dados salvos (array)

## Unzip pasta com os dados salvos


In [4]:
folder_name = './np'


In [None]:
os.mkdir(folder_name)
unzip_file('./np.zip', folder_name)

Arquivo zip extraído com sucesso!


## Carrega os dados

In [36]:
actions = ["acontecer", "aluno", "amarelo", "america", "aproveirar", "bala", "banco"]#os.listdir(folder_name)
label_map = {label:num for num, label in enumerate(actions)}
label_map

{'acontecer': 0,
 'aluno': 1,
 'amarelo': 2,
 'america': 3,
 'aproveirar': 4,
 'bala': 5,
 'banco': 6}

In [37]:
sequences, labels = [], []
for action in actions:
    actionFrames = []
    no_sequences = len(os.listdir(folder_name + '/'+ action))
    for sequence in range(no_sequences):
        window = []
        sequence_length = len(os.listdir(folder_name + '/'+ action + '/' + str(sequence)))
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(folder_name, action, str(sequence), "{}.npy".format(frame_num)), allow_pickle=True)
            window.append(res)
        actionFrames.append(window)
        labels.append(label_map[action])
    sequences.append(actionFrames)


In [None]:
'''
Nesse momento, sequence é um array com uma posição para cada action (label), e dentro disso todos os vídeos
Ou seja, imagine que temos actions = ["oi", "bom dia"]
E cada action tem 2 vídeos


sequences = [
  [
    [
      [frame1],
      [frame2]
    ],  //video 1 label 1
    [
      [frame1],
      [frame2]
    ],  //video 2 label 1
  ],
  [
    [
      [frame1],
      [frame2]
    ],  //video 1 label 2
    [
      [frame1],
      [frame2] label 2
    ],
  ]
]

'''

'\nNesse momento, sequence é um array com uma posição para cada action (label), e dentro disso todos os vídeos\nOu seja, imagine que temos actions = ["oi", "bom dia"]\nE cada action tem 2 vídeos\n\n\nsequences = [ \n  [ \n    [\n      [frame1],\n      [frame2]\n    ],  //video 1 label 1\n    [\n      [frame1],\n      [frame2]\n    ],  //video 2 label 1\n  ], \n  [\n    [\n      [frame1],\n      [frame2]\n    ],  //video 1 label 2\n    [\n      [frame1],\n      [frame2] label 2\n    ],  \n  ]\n]\n\n'

# LSTM

## Trata os dados para deixar homogêneo

In [38]:
### Rever esta lógica, estou definindo que todos os vídeos terão 30 frames
formatted_sequences = []
num_frames = 30

for sequence in sequences:
  videos = []
  for video in sequence:
    frames = []
    for frame in video:
      if(len(frame) > 0 and len(frames) < num_frames):
        frames.append(format_landmarks(frame))
    if(len(frames) < num_frames):
      for i in range (num_frames - len(frames)):
        frames.insert(0, frames[0])
    videos.append(frames[-num_frames:])
  formatted_sequences.append(videos)


In [39]:
formatted_sequences = np.array(formatted_sequences)
actions = np.array(actions)

In [40]:
formatted_sequences.shape

(7, 10, 30, 63)

## Separa os dados de treinamento e validação

In [41]:
#Concatenate sem numpy
videos = []

for sequence_idx in range(len(formatted_sequences)):
    current_video = []
    for video_idx in range(len(formatted_sequences[sequence_idx])):
        videos.append(formatted_sequences[sequence_idx][video_idx])


In [42]:
videos = np.concatenate(formatted_sequences)
# print(labels)
labels = np.array(labels)
y = to_categorical(labels).astype(int)

x_train, x_test, y_train, y_test = train_test_split(videos, y, test_size=0.15)


## Treinamento do modelo

In [45]:
model = Sequential()
model.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(
    x_train.shape[1], #num_frames
   x_train.shape[2]  #num_landmarks em cada frame
)))

model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
# model.add(LSTM(64))
# model.add(Dense(7, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['categorical_accuracy'])
model.fit(x_train, y_train, epochs=1000, validation_data=(x_test, y_test))

# model.add(LSTM(256, return_sequences=True, activation='relu'))
# model.add(LSTM(128, return_sequences=False, activation='relu'))
# # model.add(LSTM(128, return_sequences=False, activation='relu'))
# model.add(Dense(64, activation='relu'))
# model.add(Dense(32, activation='relu'))
# model.add(Dense(actions.shape[0], activation='softmax'))
# # model.add(LSTM(64))
# # model.add(Dense(7, activation='softmax'))

# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


Epoch 1/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 768ms/step - categorical_accuracy: 0.1312 - loss: 1.9450 - val_categorical_accuracy: 0.1818 - val_loss: 1.9420
Epoch 2/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - categorical_accuracy: 0.1425 - loss: 1.9373 - val_categorical_accuracy: 0.1818 - val_loss: 1.9301
Epoch 3/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step - categorical_accuracy: 0.1216 - loss: 1.9194 - val_categorical_accuracy: 0.1818 - val_loss: 1.8888
Epoch 4/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - categorical_accuracy: 0.1425 - loss: 1.8594 - val_categorical_accuracy: 0.1818 - val_loss: 1.7529
Epoch 5/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - categorical_accuracy: 0.1425 - loss: 1.7151 - val_categorical_accuracy: 0.1818 - val_loss: 1.7750
Epoch 6/1000
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

<keras.src.callbacks.history.History at 0x25dc9ed3df0>

# Análise dos resultados

## Geração da matriz de confusão

Essa matriz serve para analisar o número de verdadeiros/falsos positivos/negativos

In [49]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

yhat = model.predict(x_test)
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()
multilabel_confusion_matrix(ytrue, yhat)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


array([[[ 8,  1],
        [ 0,  2]],

       [[10,  0],
        [ 0,  1]],

       [[ 9,  0],
        [ 0,  2]],

       [[ 9,  0],
        [ 0,  2]],

       [[10,  0],
        [ 1,  0]],

       [[ 9,  1],
        [ 0,  1]],

       [[ 9,  0],
        [ 1,  1]]], dtype=int64)

## Cálculo da precisão do modelo

In [60]:
accuracy_score(ytrue, yhat)

0.8181818181818182

# Salvar o modelo gerado

In [None]:
model.save('libras.h5')

  saving_api.save_model(


# Testando o modelo

In [51]:
def process_test_video(video_path):
  base_options = python.BaseOptions(model_asset_path='gesture_recognizer.task')
  options = vision.GestureRecognizerOptions(base_options=base_options, num_hands=2)
  recognizer = vision.GestureRecognizer.create_from_options(options)

  cap = cv2.VideoCapture(video_path)

  video_frames = []
  num_frames = 30

  while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)

        recognition_result = recognizer.recognize(image)
        # show_frame(frame, recognition_result.hand_landmarks)
        if(len(recognition_result.hand_landmarks) > 0 and len(video_frames) < num_frames):
          keypoints = format_landmarks(recognition_result.hand_landmarks)
          video_frames.append(keypoints)


        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

  cap.release()
  cv2.destroyAllWindows()
  return video_frames

In [67]:
# new_video = np.array(process_test_video('videos/amarelo/03AmareloSinalizador07-4.mp4'))
# new_video = np.array(process_test_video('videos/bala/06BalaSinalizador07-5.mp4'))
new_video = np.array(process_test_video('videos/acontecer/01AcontecerSinalizador07-1.mp4'))
# print(np.array([new_video]).shape)

In [68]:
result = model.predict(np.array([new_video]))

category = actions[np.argmax(result[0])]
print(category)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
acontecer
