Za one koji pomazu sa snimanjem dataseta: Ispod, u PREFIKS upisite svoje ime u formatu "ImePrezime_", i pokrenite sve na poslednje dugme na meniju(Restart the kernel and run all cells)

In [None]:
PREFIKS = ""

In [None]:
POCETNI_UZORAK_BR = None

In [None]:
Camera_number = 0

In [None]:
TRAINING_EPOCHS = 300

In [None]:
LOG_FOLDER_NAME = "Logs2"

# Imports

In [None]:
!pip install tensorflow opencv-python mediapipe scikit-learn matplotlib

In [None]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp
from collections import namedtuple

In [None]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join(PREFIKS+'MP_Data') 

# Actions that we try to detect
if os.path.exists(DATA_PATH):
    sign_labels = np.array(os.listdir(DATA_PATH))

# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 30

# Folder start
start_folder = 30


In [None]:
# os.sys.path

# Setup

In [None]:
def get_next_record_number_for_sign(sign_label):
    if os.path.exists(os.path.join(DATA_PATH, sign_label)):
        number_of_records = len(os.listdir(os.path.join(DATA_PATH, sign_label))) 
        if number_of_records>0:
            folder_names = os.listdir(os.path.join(DATA_PATH, sign_label))
            folder_numbers = [ [int(el) for el in name.split('_') if el.isdigit()] for name in folder_names]
            
            max_record_name = np.max(np.array( folder_numbers  ).astype(int)) + 1
            record_num = max_record_name if max_record_name > number_of_records else number_of_records
        else:
            record_num = number_of_records
    else:
        record_num = 0
    return record_num

In [None]:
# Optional setup of folders

def setup_folders_for_alphabet(path="MP_Data"):
    
    azbuka = ['a', 'b', 'v', 'g', 'd', 'đ', 'e', 'ž', 'z', 'i', 'j', 'k', 'l', 'lj', 'm', 'n', 'nj', 'o', 'p', 'r', 's', 't', 'ć', 'u', 'f', 'h', 'c', 'č','dž', 'š']
    for slovo in azbuka:
        if POCETNI_UZORAK_BR is not None:
            record_num = POCETNI_UZORAK_BR
        else:
            record_num = get_next_record_number_for_sign(slovo)
        
        try: 
            os.makedirs(os.path.join(path, slovo, str(record_num)))
        except:
            pass

In [None]:

if not os.path.exists(DATA_PATH):
    setup_folders_for_alphabet(DATA_PATH)

# 2. Keypoint functions

In [None]:
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose

In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [None]:
def draw_landmarks(image, results):
    #mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_hands.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_hands.HAND_CONNECTIONS) # Draw right hand connections

In [None]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    '''mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) '''
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(160,44,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(160,88,150), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    if results.left_hand_landmarks is not None:
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_hands.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    if results.right_hand_landmarks is not None:
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_hands.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [None]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    #face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh])

# Preprocess Data and Create Labels and Features¶

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
def create_labels_and_features(_test_size=0.1):
    sign_labels = np.array(os.listdir(DATA_PATH))
    label_map = {label:num for num, label in enumerate(sign_labels)}
    sequences, labels = [], []
    for sign in sign_labels:
        for sequence in os.listdir(os.path.join(DATA_PATH, sign)):
            video = []
            for frame_num in range(sequence_length):
                res = np.load(os.path.join(DATA_PATH, sign, str(sequence), "{}.npy".format(frame_num)))
                video.append(res)
            sequences.append(video)
            labels.append(label_map[sign])
    X = np.array(sequences)
    y = to_categorical(labels).astype(int)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=_test_size)
    return X_train, X_test, y_train, y_test

In [None]:
def create_labels_and_features(num_of_labels=-1, num_of_sequences=-1,_test_size=0.1):
    if num_of_labels == -1:
        num_of_labels = len(os.listdir(DATA_PATH))
    
    sign_labels = np.array(os.listdir(DATA_PATH))[:num_of_labels]
    label_map = {label:num for num, label in enumerate(sign_labels)}
    sequences, labels = [], []
    for sign in sign_labels:
        if num_of_sequences == -1:
            nmbr_of_seq = len(os.listdir(os.path.join(DATA_PATH, sign)))
        else:
            nmbr_of_seq = num_of_sequences
        for sequence in os.listdir(os.path.join(DATA_PATH, sign))[:nmbr_of_seq]:
            video = []
            for frame_num in range(sequence_length):
                res = np.load(os.path.join(DATA_PATH, sign, str(sequence), "{}.npy".format(frame_num)))
                video.append(res)
            sequences.append(video)
            labels.append(label_map[sign])
    X = np.array(sequences)
    y = to_categorical(labels).astype(int)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=_test_size)
    return X_train, X_test, y_train, y_test

In [None]:
#os.listdir(os.path.join(DATA_PATH, 'a'))[:85]

In [None]:
#sign_labels = np.array(os.listdir(DATA_PATH))
#label_map = {label:num for num, label in enumerate(sign_labels)}

In [None]:
#X = np.array(sequences)

In [None]:
#y = to_categorical(labels).astype(int)

In [None]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

# Build and Train LSTM Neural Network

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [None]:
log_dir = os.path.join(LOG_FOLDER_NAME)
tb_callback = TensorBoard(log_dir=log_dir)

In [None]:
def load_model(sign_labels=np.array(os.listdir(DATA_PATH)) ,path='action.keras'):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,258)))
    model.add(LSTM(128, return_sequences=True, activation='relu'))
    model.add(LSTM(64, return_sequences=False, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(sign_labels.shape[0], activation='softmax'))

    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    model.load_weights(path)
    return model

In [None]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

def test_model(model,X_test, y_test,labels):
    ypred = model.predict(X_test)
    ytrue = np.argmax(y_test, axis=1).tolist()
    ypred = np.argmax(ypred, axis=1).tolist()

    #print(ytrue)
    #print(ypred)
    print(len(labels))
    print("recall")
    print(recall_score(ytrue, ypred,average='weighted'))
    print("precision")
    print(precision_score(ytrue, ypred,average='weighted'))
    print("f1")
    print(f1_score(ytrue, ypred,average='weighted'))
    
    print("Accuracy score:")
    print(accuracy_score(ytrue, ypred) )
    print("multilabel_confusion_matrix")
    print(multilabel_confusion_matrix(ytrue, ypred))

    
    conf_matrix = confusion_matrix(ytrue, ypred)
    print("conf matrix")
    print(conf_matrix)
    disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix,
                              display_labels=labels)
    disp.plot()

    plt.show()
    
    
    

In [None]:
#test_model(load_model(sign_labels=np.array(os.listdir(DATA_PATH))[:20], path='let20sekv20epoha200.keras') )

In [None]:
def train_new_model(epoh_num=400,sign_labels=np.array(os.listdir(DATA_PATH))):
    #sign_labels = sign_labels[:20]
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,258)))
    model.add(LSTM(128, return_sequences=True, activation='relu'))
    model.add(LSTM(64, return_sequences=False, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(sign_labels.shape[0], activation='softmax'))

    model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
    X_train, X_test, y_train, y_test = create_labels_and_features(_test_size=0.2)
    
    model.fit(X_train, y_train, epochs=epoh_num, callbacks=[tb_callback])
    test_model(model,X_test,y_test,sign_labels)
    return model

In [None]:
#sign_labels = None

In [None]:
def save_model(model, name='action.keras'):
    model.save(name)

In [None]:
#save_model(train_new_model(epoh_num=400),'let30sekvNepoha400.keras')

# Evaluation using Confusion Matrix and Accuracy

In [None]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

In [None]:
#yhat = model.predict(X_test)

In [None]:
#ytrue = np.argmax(y_test, axis=1).tolist()
#yhat = np.argmax(yhat, axis=1).tolist()

In [None]:
#multilabel_confusion_matrix(ytrue, yhat)

In [None]:
#accuracy_score(ytrue, yhat)

# Tkinter GUI 

In [None]:
from tkinter import *
from PIL import Image, ImageTk 

def input_textbox(options):
    text = "initial value"
    master = Tk()
    master.title("Izabrati znak") 
    master.geometry('500x500') 
    
    Label(master, text='Unesi znak:').grid(row=0)
    entry = Entry(master)
    entry.grid(row=0, column=1)
    entry.focus_set()

    options_list = [option for option in options]

    value_inside = StringVar(master) 

    value_inside.set("Select an Option") 
    
    if len(options_list)>0:
        question_menu = OptionMenu(master, value_inside, *options_list) 
        question_menu.grid(row=1) 

    def callback():
        nonlocal text
        text = entry.get()
        if(text == "") and (value_inside.get() != "Select an Option"):
            text = value_inside.get()
        master.destroy()
    
    OK_button = Button(master, text = "OK", width = 10, command = callback)
    OK_button.grid(row=2)
    
    master.mainloop()
    print(text)
    print('aaa')
    return text

def manual_textbox():
    master = Tk()
    master.title("Manual") 
    master.geometry('500x500') 

    Label(master, text='Press Q or Esc to exit program',anchor=W, width=50).grid(row=0)
    Label(master, text='Press R to start recording keypoints',anchor=W, width=50).grid(row=1)
    Label(master, text='Press Space to start Live sign recognition',anchor=W, width=50).grid(row=2)
    Label(master, text='Press N to go back to plain live feed',anchor=W, width=50).grid(row=3)
    
    def callback():
        master.destroy()

    def callback_train():
        model = None
        model = train_new_model()
        save_model(model)
        master.destroy()
        
    
    close_button = Button(master, text = "Proceed to live feed", command = callback)
    close_button.grid(row=4,column=1)
    train_button =  Button(master, text = "Train updated model and proceed", command = callback_train)
    train_button.grid(row=4,column=0)
    
    mainloop()


    


# Supportig functions for GUI / drawing

In [None]:
from scipy import stats
from random import random

In [None]:
def cv_printable_labels(label):
    if label == 'Đ':
        return 'Dj'
    elif label == 'Ž':
        return 'Zh'
    elif label == 'Ć':
        return 'Cj'
    elif label == 'Č':
        return 'Ch'
    elif label == 'Dž':
        return 'Dzh'
    elif label == 'Š':
        return 'Sh'
    else:
        return label

In [None]:
def probability_vizualization(res, sign_labels, input_frame):
    if probability_vizualization.colors is None:
        probability_vizualization.colors = [(  int(100+random()*155), int(100+random()*155), int(100+random()*155)) for sign in sign_labels]
    output_frame = input_frame.copy()
    
    if len(res)>5:
        
        indexes = np.argpartition(res, -5)[-5:]
        for num, prob in enumerate( res[indexes]):
            cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), probability_vizualization.colors[indexes[num]], -1)
            cv2.putText(output_frame, cv_printable_labels( sign_labels[indexes[num]]  ), (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    else:   
        for num, prob in enumerate( res):
            cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), probability_vizualization.colors[num], -1)
            cv2.putText(output_frame, cv_printable_labels( sign_labels[num] ), (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
            
    return output_frame
probability_vizualization.colors = None

In [None]:
def ResizeWithAspectRatio(image, width=None, height=None, inter=cv2.INTER_AREA):
    dim = None
    (h, w) = image.shape[:2]

    if width is None and height is None:
        return image
    if width is None:
        r = height / float(h)
        dim = (int(w * r), height)
    else:
        r = width / float(w)
        dim = (width, int(h * r))

    return cv2.resize(image, dim, interpolation=inter)

In [None]:
#cv2.destroyAllWindows()

In [None]:
def get_frame_draw_and_return_landmarks(cap,hands,pose):
# Read feed
    ret, frame = cap.read()
    image = cv2.flip(frame, 1) 
    
    # Make detections
    image, hand_results = mediapipe_detection(image, hands)
    image, pose_results = mediapipe_detection(image, pose)
    
    left_hand_landmarks = None
    right_hand_landmarks = None
    landmarks_results = namedtuple("landmarks_results", "pose_landmarks left_hand_landmarks right_hand_landmarks")
    
    if hand_results.multi_handedness is not None:
        for handedness, hand_landmarks in zip(hand_results.multi_handedness, hand_results.multi_hand_landmarks):
            #print(handedness.classification)
            #print(hand_landmarks)
            if handedness is not None:
                if handedness.classification[0].label == "Left":
                    left_hand_landmarks = hand_landmarks
                if handedness.classification[0].label == "Right":
                    right_hand_landmarks = hand_landmarks
    landmarks_results.left_hand_landmarks = left_hand_landmarks
    landmarks_results.right_hand_landmarks = right_hand_landmarks
    landmarks_results.pose_landmarks = pose_results.pose_landmarks

    # Draw landmarks
    draw_styled_landmarks(image, landmarks_results)
    
    #for recognition
    all_landmarks = extract_keypoints(landmarks_results)
    
    return all_landmarks, image

In [None]:
def input_textbox_window(master,options):
    text = ""
    window = Toplevel(master)
    window.title("Izabrati znak") 
    window.geometry('300x300') 
    
    Label(window, text='Unesi znak:').grid(row=0)
    entry = Entry(window)
    entry.grid(row=0, column=1)
    entry.focus_set()

    options_list = [option for option in options]

    value_inside = StringVar(window) 

    value_inside.set("Select an Option") 
    
    if len(options_list)>0:
        question_menu = OptionMenu(window, value_inside, *options_list) 
        question_menu.grid(row=1) 

    def callback():
        nonlocal text
        text = entry.get()
        if(text == "") and (value_inside.get() != "Select an Option"):
            text = value_inside.get()
        window.destroy()
    
    OK_button = Button(window, text = "OK", width = 10, command = callback)
    OK_button.grid(row=2)

    window.wait_window()
    return text


# GUI main

In [None]:
from PIL import ImageTk, Image
from enum import Enum

CountdownTime = 1.5

class Mode(Enum):
    LIVE = 0
    SELECT_SIGN = 1
    CAPTURE_MODE = 2
    COUNTDOWN = 3
    CAPTURING = 4
    LIVE_RECOGNITION = 5

class MainWindow:
    root = None
    text_label_value = None
    label_with_image = None
    cap = None
    mode = Mode.LIVE
    sequence = []
    sentence = []
    predictions = []
    threshold = 0.5
    model = None
    #model = load_model(path='let10sekv60epoha600.keras')
    sign_labels = None
    sign_label = ""
    saved_models_path = 'Saved Models'

    def __init__(self):
        self.main_window_start()

    def __del__(self):
        self.cap.release()
        cv2.destroyAllWindows()
        self.root = None
        self.text_label_value = None
        self.label_with_image = None
        self.cap = None
        self.mode = Mode.LIVE
        self.sequence = []
        self.sentence = []
        self.predictions = []
        self.threshold = 0.5
        self.model = None
        self.sign_labels = None
        self.sign_label = ""
        self.saved_models_path =  os.path.join(DATA_PATH,'Saved Models')
    
    
    def main_window_update_camera(root,label_with_image, image):
        #main_window_start.key = None
        photo_image = ImageTk.PhotoImage(image=Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGBA) ) ) 
        
        label_with_image.photo_image = photo_image
        label_with_image.configure(image=photo_image) 
        return main_window_start.key

    def mode_processing(self,image, all_landmarks):     
        match self.mode:
            case Mode.LIVE:
                
                cv2.putText(image, 'Live Mode', (50,50), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 3, cv2.LINE_AA)
            case Mode.SELECT_SIGN:
                
                cv2.putText(image, 'Select sign', (50,50), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 3, cv2.LINE_AA)
                if os.path.exists(DATA_PATH):
                    self.options = os.listdir(DATA_PATH)
                else:
                    self.options = []
                self.sign_label = input_textbox_window(self.root,self.options)
                print(self.sign_label)
                if self.sign_label != "":
                    self.mode = Mode.CAPTURE_MODE
            case Mode.CAPTURE_MODE:
    
                cv2.putText(image, 'Record frames for sign:{}'.format( cv_printable_labels( self.sign_label)  ), (50,50), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 3, cv2.LINE_AA)
                cv2.putText(image, "Press S to start, R to go back to sign selection", (100,100), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 3, cv2.LINE_AA)
                
                   
            case Mode.COUNTDOWN:
                
                cv2.putText(image, 'Record frames for sign:{}'.format( cv_printable_labels( self.sign_label)  ), (50,50), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 3, cv2.LINE_AA)
                cv2.putText(image, "{}".format(CountdownTime-int(time.time()-self.start_time)), (150,150), 
                               cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255, 0), 4, cv2.LINE_AA)
                if time.time()-self.start_time >= CountdownTime:
                    self.mode = Mode.CAPTURING
                    self.frame_number = 0
                    
            case Mode.CAPTURING:
                if self.frame_number == 0:
                    self.record_num = get_next_record_number_for_sign(self.sign_label)
                    os.makedirs(os.path.join(DATA_PATH, self.sign_label, str(self.record_num)))
                
                cv2.putText(image, 'Recording frame {}/30 for sign {}, video {}'.format(self.frame_number, cv_printable_labels(self.sign_label), self.record_num), (50,50), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 3, cv2.LINE_AA)
                
                #keypoints = extract_keypoints(landmarks_results)
                
                npy_path = os.path.join(DATA_PATH, self.sign_label, str(self.record_num), str(self.frame_number))
                np.save(npy_path, all_landmarks)
                
                self.frame_number+=1
                if self.frame_number>30:
                    self.frame_number = 0
                    self.mode = Mode.CAPTURE_MODE
                    
            case Mode.LIVE_RECOGNITION:
                self.sequence.append(all_landmarks)
                self.sequence = self.sequence[-30:]
                
                if self.model is None:
                    self.model = load_model(self.sign_labels)
                if len(self.sequence) == 30:
                    res = self.model.predict(np.expand_dims(self.sequence, axis=0))[0]
                    self.predictions.append(np.argmax(res))
                    last_10_predicts = np.unique(self.predictions[-10:])
                    
                    if last_10_predicts[0]==np.argmax(res) and last_10_predicts.shape[0] == 1: 
                        if res[np.argmax(res)] > self.threshold: 

                            if self.sign_labels[np.argmax(res)] != 'No_sign':
                                if len(self.sentence) > 0: 
                                    if self.sign_labels[np.argmax(res)] != self.sentence[-1]:
                                        self.sentence.append(self.sign_labels[np.argmax(res)])
                                else:
                                    self.sentence.append(self.sign_labels[np.argmax(res)])
        
                    if len(self.sentence) > 10: 
                        self.sentence = self.sentence[-10:]
                    #probabilities
                    image = probability_vizualization(res, self.sign_labels, image)
        
        #cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        #cv2.putText(image, ' '.join(sentence), (3,30), 
        #       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        self.tkinter_text_value.set(self.sentence)
    
        return image

    
 
    def main_window_start(self):
        if os.path.exists(DATA_PATH):
            self.sign_labels = np.array(os.listdir(DATA_PATH))
            
        
        self.cap = cv2.VideoCapture(0)
        self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1200)
        self.root = Tk()
        self.tkinter_text_value = StringVar(self.root) 

  
        self.root.title("") 
        self.root.geometry('965x680+300+100') 
        self.tkinter_text_value.set("") 
        Label(self.root, text='Press Q or Esc to exit program',anchor=W, width=50).grid(row=0,sticky=W,columnspan=2)
        Label(self.root, text='Press R to start recording keypoints',anchor=W, width=50).grid(row=1,sticky=W,columnspan=2)
        Label(self.root, text='Press Space to start Live sign recognition(Load model first)',anchor=W, width=50).grid(row=2,sticky=W,columnspan=2)
        Label(self.root, text='Press N to go back to plain live feed',anchor=W, width=50).grid(row=3,sticky=W,columnspan=2)
        Label(self.root, text='Press B to delete a letter',anchor=W, width=50).grid(row=4,sticky=W,columnspan=2)
        Label(self.root, textvariable=self.tkinter_text_value, font=("Arial", 16, "bold"), bg='lightblue' ).grid(row=6,columnspan=5)
        
        self.label_with_image = Label(self.root)
        self.label_with_image.grid(row=7,columnspan=5)
    
    
        def callback_close():
            self.cap.release()
            cv2.destroyAllWindows()
            self.root.destroy()
            
    
        def callback_train():
            save_model(train_new_model())

        def clear_sentence():
            self.sentence = []

        def callback_load_model():
            #del self.model
            filename = input_textbox_window(self.root, os.listdir( self.saved_models_path ) )
            self.model = load_model(path = os.path.join(self.saved_models_path,filename) )


        
        train_button =  Button(self.root, text = "Train updated model and save it", command = callback_train)
        train_button.grid(row=5,column=0)
        clear_button = Button(self.root, text = "Clear sentence", command = clear_sentence)
        clear_button.grid(row=5,column=2)
        close_button = Button(self.root, text = "Close", command = callback_close)
        close_button.grid(row=5,column=3)
        load_model_button = Button(self.root, text = "Load model", command = callback_load_model)
        load_model_button.grid(row=5,column=1)

        def keypress_processing(event):
            key=event.char
            if key == 'q': #ord('q'):
                self.cap.release()
                cv2.destroyAllWindows()
                self.root.destroy()
            elif key == 'r': #ord('r'):
                self.mode = Mode.SELECT_SIGN
            elif key == 'n': #ord('n'):
                self.mode = Mode.LIVE
            elif key == ' ': #space
                self.mode = Mode.LIVE_RECOGNITION
            elif key == 's':
                 #start countdown and record
                self.start_time = time.time()
                self.mode = Mode.COUNTDOWN
            elif key == 'd':
                self.sentence = self.sentence[:-1]
                
          

        with mp_hands.Hands() as hands, mp_pose.Pose() as pose:
            def video_stream():
                all_landmarks, frame = get_frame_draw_and_return_landmarks(self.cap,hands,pose)

                frame = self.mode_processing(frame,all_landmarks)
                
                
                cv2image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA)
                img = Image.fromarray(cv2image)
                imgtk = ImageTk.PhotoImage(image=img,master=self.root)
                self.label_with_image.imgtk = imgtk
                self.label_with_image.configure(image=imgtk)
                self.label_with_image.after(1, video_stream) 
            

            self.root.bind('<Key>', keypress_processing)
            video_stream()
            self.root.mainloop()
        #end with
        self.cap.release()
        cv2.destroyAllWindows()
            
    

# Run main

In [None]:
instance = MainWindow()
del instance