Comment the following cell if the libraries and execution environment have already been installed:

In [1]:
#!pip install PySimpleGUI
#!pip install opencv-python
#!pip install tqdm
#!pip install sklearn
#!pip install pandas
#!pip install scikit-learn
#conda install tensorflow
#!pip install tensorflow_hub
#!pip install sounddevice
#!pip install playsound
#!pip install pydub
#!pip install ipython

List of imports

In [1]:
import cv2, os, csv, time, sys, tqdm
import numpy as np
import pandas as pd
# Import TF and TF Hub libraries.
import tensorflow as tf
import tensorflow_hub as hub
# Import Classifiers
from sklearn import datasets
from sklearn.model_selection import train_test_split as tts
from sklearn import svm
from sklearn import metrics
#Warnings
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings('ignore', category=ConvergenceWarning)
#display and time
import time
from IPython.display import display, clear_output
import ipywidgets as widgets
from ipywidgets import Video
from IPython.display import Audio, display
#GUI
import PySimpleGUI as sg
from PIL import Image
import shutil

In [2]:
warnings.filterwarnings("ignore")

En el siguiente punto define las carpetas:
1. Donde se guardan las fotos de cada paciente y clase
2. Donde se guardan se leen los elementos de salida
3. Donde se guardan las imagenes que se han reajustado
4. El nombre del fichero donde se guardaran las articulaciones extraídas en cada foto
5. La carpeta que contiene el modelo de detección de puntos del cuerpo usado

In [3]:
#own path
directory = 'C:\\Users\\rache\\Documents\\Rehabilitation_interface\\'
# Download the model from TF Hub.
model_path = "C:\\Users\\rache\\Documents\\MusicTFM" #5
model = tf.saved_model.load(model_path)
movenet = model.signatures['serving_default']

# Path to the folder containing the input images
images_folder = directory + 'Data\\' #1
path_to_data = directory + 'Data'
path_to_classes = directory + 'Data\\clase'
images_out_folder = directory + 'DataOutputMovenet\\' #2
## Obtener rutas de las imágenes
resized_path = directory + 'resized\\' #3
# Path to the output CSV file
csv_out_path = directory + 'DataOutputMovenet\\fitness_poses_csvs_out_basic_movenet.csv' #4

#### CREACIÓN DE AUDIOS

En la siguiente celda se cargan los sonidos de tambores usados en cada clase, cambiando el path al indicado.

In [4]:
path_audio1 = 'C:\\Users\\rache\\Documents\\MusicTFM\\DataOutputMovenet\\drums\\drum1.wav'
path_audio6 = 'C:\\Users\\rache\\Documents\\MusicTFM\\DataOutputMovenet\\drums\\drum6.wav'

audio1 = Audio(path_audio1, rate=44100,autoplay = True)
audio6 = Audio(path_audio6, rate=44100,autoplay = True)
display(audio1)
display(audio6)

# A partir de aquí ya no hay línias a configurar

### Primer widget
- **Definimos el número de clases**
- **Definimos el número de fotos por classe**
- Si se deja vacío se comprueba si las carpetas tienen imágenes.

**OPCIÓN 1**
Quedarse solo con los puntos que tienen una probabilidad
**OPCIÓN 2**
Decidir a mano cuales queremos (prefiltrados)

In [5]:
def create_classes_layout():
    ## Define the layout for the "Create Classes" window
    layout = [
        [sg.Text('Number of classes:'), sg.InputText(default_text='2',size=(10,1))],
        [sg.Text('Number of photos per class:'), sg.InputText(default_text='5',size=(10,1))],
        [sg.Button('Create Classes')],
        [sg.Text(size=(40,1), key='-OUTPUT-')],
        [sg.Image(filename='', key='-IMAGE-', size=(30,15))],
        [sg.Button('Close window')],
    ]
    return layout

def create_classes_window():
    ## Create the "Create Classes" window
    layout = create_classes_layout()
    window = sg.Window('Pose Classifier - Create Classes', layout)
    return window
def create_classes(window, values, time_between_photos):
    # Initialize the webcam
    camera = cv2.VideoCapture(0)
    # Close if we try to crate new data without specifying the size
    try:
        num_classes = int(values[0])
        num_photos = int(values[1])
    except ValueError:
        window.close()
        pass
    # Define the number of photos to take and the time between photos
    num_classes = int(values[0])
    num_photos = int(values[1])
    # Remove all directories that start with "clase"
    for root, dirs, files in os.walk(path_to_data):
        for dir in dirs:
            if dir.startswith("clase"):
                shutil.rmtree(os.path.join(root, dir))
    for clase in range(num_classes):
        # Wait for the user to be ready
        sg.popup('Haz la posición número {}. Pulsa OK cuando estés listo.'.format(clase+1), title='¡Listo!')
        window['-OUTPUT-'].update('Haz la posición número {}. Esperando...'.format(clase+1))
        window.refresh()
        time.sleep(2)
        folder_name = str(path_to_classes+str(clase))
        if not os.path.exists(folder_name):
            os.makedirs(folder_name)
        # Loop to take the photos
        for i in range(num_photos):
            window['-OUTPUT-'].update('Foto {} de {}'.format(i+1,num_photos))
            window.refresh()
            # Read a frame from the webcam
            ret, frame = camera.read()
            # Convert the image to PNG Bytes
            resized = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
            imgbytes = cv2.imencode('.png', resized)[1].tobytes()
            # Check if the camera is working
            if not ret:
                window['-OUTPUT-'].update('La cámara no funciona!')
                window.refresh()
                break
            # Update the image element in the GUI with the new image
            window['-IMAGE-'].update(data=imgbytes)
            # Generate the file name for the current picture
            filename = folder_name+'/photo_{}.png'.format(i)
            # Save the frame to a file
            cv2.imwrite(filename, frame)
            # Wait for the specified time before taking the next photo
            time.sleep(time_between_photos)
    # Release the webcam
    camera.release()

In [11]:
# FUNCTIONS INCLUDED ON THE SECOND GUI
def class_image_resized():
    image_paths = []

    for folder_name in os.listdir(images_folder):
        if folder_name.startswith("clase"):
            image_paths.append(os.path.join(images_folder, folder_name, "photo_0.png"))

    # Cambiar tamaño de las imágenes y guardarlas en el directorio 'resized'
    for i, image_path in enumerate(image_paths):
        img = Image.open(image_path)
        img_resized = img.resize((100, 100))
        resized_image_path = os.path.join(resized_path, f"clase{i}.png")
        img_resized.save(resized_image_path)
    
    image_columns = []
    for i in range(len(image_paths)):
        image_column = sg.Column([[sg.Image(filename = os.path.join(resized_path,f"clase{i}.png"))]])#, pad = (0,0))
        image_columns.append(image_column)
    return image_columns

def create_training_layout():
    image_columns = class_image_resized()
    # Creamos un layout con una fila y las columnas de las imágenes
    layout = [image_columns]
    # Add remaining GUI elements
    layout += [
        [sg.Text('The rehabiliation is fixed on ...')],
        [sg.Checkbox('Full body:', default=False, key="-Full body-"),sg.Checkbox('Cara:', default=False, key="-Cara-"),sg.Checkbox('Torso:', default=False, key="-Torso-"),sg.Checkbox('Piernas:', default=False, key="-Piernas-"),sg.Checkbox('Puntos fiables:', default=False, key="-Puntos fiables-")],
        [sg.Button('Train Model')],
        [sg.Text(size=(50,1), key='-1-')],
        [sg.Button('Classify Pose from Camera')],
        [sg.Text(size=(50,1), key='-2-')],
        [sg.Button('Try Real Time'), sg.Button('STOP', button_color=('white', 'red'))],
        [sg.Text(size=(50,1), key='-3-')],
        [sg.Button('Close Window')]
    ]
    return layout
def keypoints_format(list_points):
    #list_points = [list_points[i]-1 for i in range(len(list_points))]
    idx = pd.Index(list(map(str, list_points)))
    return idx
def keypoints_in_range(list_points):
    new_idx = [int(list_points[i])-1 for i in range(len(list_points))]
    updated_idx = pd.Index(new_idx)
    return updated_idx

def keypoints_per_part_of_body(values):
    df = pd.read_csv(csv_out_path)
    if values["-Full body-"] == True:
        list_points = [1, 2, 3, 4, 5, 6,7,8,9,10,11,12,13,14,15,16,17]
        idx = keypoints_format(list_points)
    elif values["-Cara-"] == True:
        list_points = [1, 2, 3, 4, 5]
        idx = keypoints_format(list_points)
    elif values["-Torso-"] == True:
        list_points = [6,7,8,9,10,11,12,13]
        idx = keypoints_format(list_points)
    elif values["-Piernas-"] == True:
        list_points = [14,15,16,17]
        idx = keypoints_format(list_points)
    elif values["-Puntos fiables-"] == True:
        columns_p = df.filter(regex='^p') #cogemos las columnas de probabilidades
        df_filtered=df.loc[:,columns_p.columns[df.loc[:, columns_p.columns].gt(0.5).any(axis=0)]] #nos quedamos las columnas donde algun valor p > 0.5
        df_filtered.columns = df_filtered.columns.str.replace('p', '') #nos quedamos solo con los numeros de las columnas de probabilidad
        idx = df_filtered.columns #obtenemos los nombres de las columnas
        new_idx = [str(int(idx[i])) for i in range(len(idx))]
        # Crear un nuevo objeto Index con los valores actualizados
        idx = pd.Index(new_idx)
    else:
        list_points = [1, 2, 3, 4, 5, 6,7,8,9,10,11,12,13,14,15,16,17]
        idx = keypoints_format(list_points)
    numbers = df.columns.str.extract(r'(x|y)(\d+)')[1] #obtenemos los valores numericos de las columnas
    cols_to_keep = df[df.columns[df.columns.isin(['name', 'class']) | numbers.isin(idx)]]
    cols_to_keep.to_csv(csv_out_path, index=False) #guardamos el dataframe final en el fichero csv_out_path sobrescribiéndolo
    df = pd.read_csv(csv_out_path)
    idx = keypoints_in_range(idx)
    return idx
    
def create_training_window():
    ## Create the "Create Classes" window
    layout = create_training_layout()
    window = sg.Window('Ejemplo de imágenes', layout)
    return window

# Define the function to extract landmarks from an image
def extract_landmarks(image_path):
    #Load images
    image = tf.io.read_file(image_path)
    image = tf.compat.v1.image.decode_jpeg(image)
    image = tf.expand_dims(image, axis=0)
    # Resize and pad the image to keep the aspect ratio and fit the expected size.
    image = tf.cast(tf.image.resize_with_pad(image, 192, 192), dtype=tf.int32)

    # Run model inference.
    outputs = movenet(image)
    # Output is a [1, 1, 17, 3] tensor.
    #keypoints = outputs['output_0'] # 17 keypoints
    #only 10 points
    #keypoints = outputs['output_0'][:, :, :10, :]  # Only take the first 10 keypoints
    keypoints = outputs['output_0']

    return keypoints
def extract_features_to_train(csv_out_file, images_folder, images_out_foulder):
    csv_out_writer = csv.writer(csv_out_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
    # Folder names are used as pose class names.
    pose_class_names = sorted([n for n in os.listdir(images_folder) if not n.startswith('.')])
    # Write the header row to the CSV file
    csv_out_writer.writerow(['name', 'class', 'x1', 'y1', 'p1', 'x2', 'y2', 'p2', 'x3', 'y3', 'p3', 'x4', 'y4', 'p4', 'x5', 'y5', 'p5', 'x6', 'y6', 'p6', 'x7', 'y7', 'p7', 'x8', 'y8', 'p8', 'x9', 'y9', 'p9', 'x10', 'y10', 'p10', 'x11', 'y11', 'p11', 'x12', 'y12', 'p12', 'x13', 'y13', 'p13', 'x14', 'y14', 'p14', 'x15', 'y15', 'p15', 'x16', 'y16', 'p16', 'x17', 'y17', 'p17'])  # Header row
    #csv_out_writer.writerow(['name','class','x1', 'y1','x2', 'y2','x3', 'y3','x4', 'y4','x5', 'y5','x6', 'y6','x7', 'y7','x8', 'y8','x9', 'y9','x10', 'y10','x11', 'y11','x12', 'y12','x13', 'y13','x14', 'y14','x15', 'y15','x16', 'y16','x17', 'y17'])  # Header row
    #csv_out_writer.writerow(['name','class','x1', 'y1','x2', 'y2','x3', 'y3','x4', 'y4','x5', 'y5','x6', 'y6','x7', 'y7','x8', 'y8','x9', 'y9','x10', 'y10'])  # Header row
    for pose_class_name in pose_class_names:
        message = f'Bootstrapping {pose_class_name}'
        window['-1-'].update(message)
        if not os.path.exists(os.path.join(images_out_folder, pose_class_name)):
            os.makedirs(os.path.join(images_out_folder, pose_class_name))
        image_names = sorted([
        n for n in os.listdir(os.path.join(images_folder, pose_class_name))
        if not n.startswith('.')])
        new_row = []
        for image_name in tqdm.tqdm(image_names, position=0):
            new_row = []
            # Extract the landmarks from the image
            landmarks = extract_landmarks(os.path.join(images_folder,pose_class_name, image_name))
            # Assuming you have the pose data in a variable named `pose`
            pose_np = np.squeeze(landmarks.numpy())  # Remove the batch and channel dimensions
            pose_np = pose_np.transpose()
            # Write pose sample to CSV.
            new_row.append(image_name)
            new_row.append(pose_class_name)
            for i in range(pose_np.shape[1]):
                x = pose_np[0,i]
                y = pose_np[1,i]
                p = pose_np[2,i]
                new_row.append(x)
                new_row.append(y)
                new_row.append(p)
            # Save the landmarks to the CSV file
            csv_out_writer.writerow(new_row)
    # Aqui quiero cambiar todo el fichero csv_out_path y sobreescribirlo
    csv_out_file.close()
    #return idx
    idx = keypoints_per_part_of_body(values)
    return idx

def classify_from_data():
    df = pd.read_csv(csv_out_path)
    y = df["class"]
    x = df.loc[:,df.columns!="class"]
    x = x.loc[:,x.columns!="name"]
    ###Splitting train/test data
    X_tr, X_tst, y_tr, y_tst = tts(x, y, test_size=25/100,random_state=109)
    ###Creating Support Vector Machine Model
    clf = svm.SVC(kernel='rbf')
    ###Training the Model
    clf.fit(X_tr, y_tr)
    ###Making Predictions
    y_pr = clf.predict(X_tst)
    return clf, y_tst, y_pr

def loop(clf, idx):            
    global stop_flag
    # Initialize the webcam
    camera = cv2.VideoCapture(0)
    # Check if the webcam is opened correctly
    if not camera.isOpened():
        print("Cannot open camera")
        window['-3-'].update("Cannot open camera")
        exit()
    flag0 = 0
    flag1 = 0
    stopped = False
    while not stopped:
        event, values = window.read(timeout=0)  # Add timeout=0 to avoid GUI freezing
        if event == 'STOP':
            stop_flag = True
            window['-3-'].update("Loop stopped.")
        pose_landmarks_csv = []
        pose_landmarks_img = []
        # Read a frame from the webcam
        ret, image = camera.read()
        image_cam = image
        success, encoded_image = cv2.imencode('.jpg', image)
        if not success:
            print('Error encoding image')
            continue
        if stop_flag:
            stopped = True
        image = tf.io.decode_image(encoded_image.tobytes(), channels=3, expand_animations=False)
        image = tf.expand_dims(image, axis=0)
        # Resize and pad the image to keep the aspect ratio and fit the expected size.
        image = tf.cast(tf.image.resize_with_pad(image, 192, 192), dtype=tf.int32)
        # Run model inference.
        outputs = movenet(image)
        # keypoints = outputs['output_0'] 17 points
        #keypoints = outputs['output_0'][:, :, :10, :]  # Only take the first 10 keypoints       
        idx_tensor = tf.constant(idx, dtype=tf.int32)
        keypoints = tf.gather(outputs['output_0'], idx_tensor, axis=2)
        #break       
        if keypoints is not None:
            pose_np = np.squeeze(keypoints.numpy())  # Remove the batch and channel dimensions
            pose_np = pose_np.transpose()
            for i in range(pose_np.shape[1]):
                pose_landmarks_csv.append(pose_np[0, i])
                pose_landmarks_csv.append(pose_np[1, i])
            for i in range(pose_np.shape[0]):
                pose_landmarks_img.append((int(pose_np[i][0]), int(pose_np[i][1])))
        # Classify pose
        y_pr = clf.predict(pd.DataFrame(pose_landmarks_csv).transpose())
        i = y_pr[0]
        if i == 'clase0' and flag0 == 0:
            start = time.time()
            display(audio1)
            print(time.time() - start)
            window['-3-'].update("Clase 0.")
            flag0 = 1
            flag1 = 0
        elif i == 'clase1' and flag1 == 0:
            start = time.time()
            display(audio6)
            print(time.time() - start)
            window['-3-'].update("Clase 1.")
            flag1 = 1
            flag0 = 0
    camera.release()
    window['-3-'].update("Loop stopped.")

In [7]:
dic ={1: 'nose',2: 'left_eye',3: 'right_eye',4: 'left_ear',5: 'right_ear',6: 'left_shoulder',7: 'right_shoulder',8: 'left_elbow',9: 'right_elbow',10: 'left_wrist',11: 'right_wrist',12: 'left_hip',13: 'right_hip',14: 'left_knee',15: 'right_knee',16: 'left_ankle',17: 'right_ankle'}
print(dic)

{1: 'nose', 2: 'left_eye', 3: 'right_eye', 4: 'left_ear', 5: 'right_ear', 6: 'left_shoulder', 7: 'right_shoulder', 8: 'left_elbow', 9: 'right_elbow', 10: 'left_wrist', 11: 'right_wrist', 12: 'left_hip', 13: 'right_hip', 14: 'left_knee', 15: 'right_knee', 16: 'left_ankle', 17: 'right_ankle'}


In [12]:
## PRIMERA VENTANA
#def main():
# Event loop to process GUI events
window = create_classes_window()
while True:
    event, values = window.read()
    if event == sg.WIN_CLOSED:
        break
    if event == 'Create Classes':
        window['-OUTPUT-'].update('Creating classes...')
        window.refresh()

        time_between_photos = 1
        create_classes(window, values, time_between_photos)       

        window['-OUTPUT-'].update('Classes created')
        time.sleep(2)
        window.close()
    elif event == 'Close window':
        break        
# Close the window when the event loop is exited
window.close()
#SEGUNDA VENTANA
# Crear la ventana y mostrarla
window = create_training_window()
stop_flag = True
while True:
    event, values = window.read()
    if event == sg.WINDOW_CLOSED:
        break
    elif event == 'Train Model':
        with open(csv_out_path, 'w') as csv_out_file:
            idx = extract_features_to_train(csv_out_file, images_folder, images_out_folder)
            print('idx', idx)
            window['-1-'].update('Model trained')
    elif event == 'Classify Pose from Camera':
        ###Evaluating Prediction Accuracy
        clf, y_tst, y_pr = classify_from_data()
        window['-2-'].update(f'Pose classified from camera. Accuracy: {metrics.accuracy_score(y_tst, y_pr)}')
        print('SVM ACCURACY', metrics.accuracy_score(y_tst, y_pr))
    elif event == 'Try Real Time':    
        stop_flag = False
        loop(clf, idx)
        window['-3-'].update('Loop stopped. Tried in Real time')
    elif event == 'Close Window':
        break
window.Close()

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 38.81it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 37.28it/s]


Unnamed: 0,name,class,x1,y1,x2,y2,x3,y3,x4,y4,x5,y5
0,photo_0.png,clase0,0.759891,0.474201,0.686588,0.538494,0.674697,0.401109,0.64434,0.596814,0.651011,0.317572
1,photo_1.png,clase0,0.568553,0.435101,0.530744,0.488844,0.533503,0.390288,0.5875,0.555028,0.580077,0.322988
2,photo_2.png,clase0,0.588322,0.438443,0.558492,0.480302,0.568032,0.396208,0.590484,0.535203,0.604104,0.329514
3,photo_3.png,clase0,0.572346,0.439593,0.547529,0.481231,0.556921,0.392217,0.556045,0.540907,0.568957,0.343214
4,photo_4.png,clase0,0.59056,0.430482,0.565292,0.465939,0.563185,0.394067,0.559608,0.540068,0.570689,0.34679
5,photo_0.png,clase1,0.572153,0.430966,0.523345,0.481652,0.522124,0.37501,0.563952,0.545129,0.553668,0.30556
6,photo_1.png,clase1,0.584374,0.426809,0.527654,0.480394,0.53106,0.378706,0.568627,0.541829,0.55827,0.305727
7,photo_2.png,clase1,0.57187,0.433931,0.52705,0.487692,0.530816,0.380391,0.571871,0.55236,0.564694,0.312273
8,photo_3.png,clase1,0.575953,0.431406,0.527959,0.485792,0.529055,0.375339,0.575686,0.548222,0.563925,0.304439
9,photo_4.png,clase1,0.569592,0.434701,0.52266,0.485511,0.525107,0.376586,0.572676,0.551702,0.562252,0.308041


idx Int64Index([0, 1, 2, 3, 4], dtype='int64')


Unnamed: 0,x1,y1,x2,y2,x3,y3,x4,y4,x5,y5
0,0.759891,0.474201,0.686588,0.538494,0.674697,0.401109,0.64434,0.596814,0.651011,0.317572
1,0.568553,0.435101,0.530744,0.488844,0.533503,0.390288,0.5875,0.555028,0.580077,0.322988
2,0.588322,0.438443,0.558492,0.480302,0.568032,0.396208,0.590484,0.535203,0.604104,0.329514
3,0.572346,0.439593,0.547529,0.481231,0.556921,0.392217,0.556045,0.540907,0.568957,0.343214
4,0.59056,0.430482,0.565292,0.465939,0.563185,0.394067,0.559608,0.540068,0.570689,0.34679
5,0.572153,0.430966,0.523345,0.481652,0.522124,0.37501,0.563952,0.545129,0.553668,0.30556
6,0.584374,0.426809,0.527654,0.480394,0.53106,0.378706,0.568627,0.541829,0.55827,0.305727
7,0.57187,0.433931,0.52705,0.487692,0.530816,0.380391,0.571871,0.55236,0.564694,0.312273
8,0.575953,0.431406,0.527959,0.485792,0.529055,0.375339,0.575686,0.548222,0.563925,0.304439
9,0.569592,0.434701,0.52266,0.485511,0.525107,0.376586,0.572676,0.551702,0.562252,0.308041


SVM ACCURACY 0.3333333333333333


0.0


0.002157926559448242


0.0040094852447509766


KeyboardInterrupt: 