# Taking Video Live stream

In [None]:
import tensorflow as tf

In [None]:
import cv2

# Open video capture for webcam
cap = cv2.VideoCapture(0)

while cap.isOpened():
    # Read frame from video capture
    ret, frame = cap.read()
    
    if ret:
        # Display the frame
        cv2.imshow('Live Stream', frame)
        
        # Check for 'q' key press to exit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break

# Release the video capture and close windows
cap.release()
cv2.destroyAllWindows()


# Adding feature to video capture that can draw lines by tracking lines hand and fingers using media pipe

In [None]:
pip install opencv-python mediapipe numpy

In [None]:
import cv2
import mediapipe as mp
import numpy as np

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Constants for window layout
WINDOW_WIDTH = 1280
WINDOW_HEIGHT = 480
IMAGE_SIZE = (48, 48)

# Open video capture for webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is successfully opened
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

# Set up MediaPipe Hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

drawing = False
start_point = None
canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)  # Initialize canvas
draw_color = (0, 0, 255)  # Default color: Red (BGR format)
gesture_detection = True  # Flag to indicate hand gesture detection mode

while True:
    # Read frame from video capture
    ret, frame = cap.read()

    if ret:
        # Resize the frame to fit the window layout
        frame = cv2.resize(frame, (int(WINDOW_WIDTH / 2), WINDOW_HEIGHT))

        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        # Convert the frame to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        if gesture_detection:
            # Process the frame with MediaPipe Hands
            results = hands.process(frame_rgb)

            # Check if hand landmarks are detected
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    # Extract hand landmarks and visualize them
                    mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                    # Get index finger tip coordinates
                    index_finger_coords = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                    x, y = int(index_finger_coords.x * frame.shape[1]), int(index_finger_coords.y * frame.shape[0])

                    if not drawing:
                        # Create a new canvas if not already drawing
                        canvas = np.zeros_like(frame)

                    # Start drawing
                    drawing = True

                    # Draw line on the canvas
                    if start_point is not None:
                        cv2.line(canvas, start_point, (x, y), draw_color, thickness=2)
                    start_point = (x, y)

            else:
                # Stop drawing if no hand is detected
                drawing = False
                start_point = None
        else:
            # Drawing mode, no hand gesture detection

            # Start drawing
            if start_point is None:
                start_point = (0, 0)

            # Get index finger tip coordinates based on mouse movement
            x, y = start_point

            # Draw line on the canvas
            if drawing:
                cv2.line(canvas, start_point, (x, y), draw_color, thickness=2)
            start_point = (x, y)

        # Create a blank canvas on the right
        blank_canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)

        # Combine the video frame and canvas
        output_frame = np.concatenate((frame, canvas), axis=1)

        # Display the combined frame
        cv2.imshow('Air Canvas', output_frame)
        
        
        # Save the drawn image on the blank canvas
        blank_canvas[:, :int(WINDOW_WIDTH / 2)] = canvas

        # Convert the drawn image to grayscale
        grayscale_canvas = cv2.cvtColor(blank_canvas, cv2.COLOR_BGR2GRAY)

        # Perform any additional operations on the grayscale canvas as needed

        # Display the combined frame
        cv2.imshow('Air Canvas', output_frame)

        # Save the grayscale canvas
        cv2.imwrite('drawn_image.jpg', grayscale_canvas)

                # Check for key inputs
        key = cv2.waitKey(1) & 0xFF
        
                # Save the drawn image when 's' key is pressed
        if key == ord('s'):
            # Save the grayscale canvas
            cv2.imwrite('drawn_image.jpg', grayscale_canvas)
            print("Image saved as 'drawn_image.jpg'")

        # Quit if 'q' key is pressed
        if key == ord('q'):
            break

        # Toggle between gesture detection and drawing mode if 'm' key is pressed
        if key == ord('m'):
            gesture_detection = not gesture_detection
            if gesture_detection:
                print("Switched to gesture detection mode")
            else:
                print("Switched to drawing mode")

        # Clear the canvas if 'c' key is pressed
        if key == ord('c'):
            canvas = np.zeros_like(frame)
            print("Cleared canvas")

        # Change draw color to red if '1' key is pressed
        if key == ord('1'):
            draw_color = (0, 0, 255)  # Red
            print("Switched to red color")

        # Change draw color to green if '2' key is pressed
        if key == ord('2'):
            draw_color = (0, 255, 0)  # Green
            print("Switched to green color")

        # Change draw color to blue if '3' key is pressed
        if key == ord('3'):
            draw_color = (255, 0, 0)  # Blue
            print("Switched to blue color")

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()



### challenges
One challenge that we faced was hardware issues while using MediaPipe for hand gesture detection, resulting in output drawings appearing as dotted lines. To overcome this, we implemented the traceback method to get continuous line images. This method likely involved modifying or optimizing the hand gesture detection algorithm to ensure a smooth tracking of hand movements, leading to better results in drawing continuous lines.

# Hand Detection


In [None]:
import cv2
import mediapipe as mp

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Open video capture for webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is successfully opened
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

# Set up MediaPipe Hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

while True:
    # Read frame from video capture
    ret, frame = cap.read()

    if ret:
        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        # Convert the frame to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hands
        results = hands.process(frame_rgb)

        # Check if hand landmarks are detected
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Draw landmarks on the frame
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Draw lines connecting the landmarks for better visualization of hand movement
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                          mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2, circle_radius=4),
                                          mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2)
                                          )

        # Display the frame
        cv2.imshow('Hand Gesture Detection', frame)

        # Check for key inputs
        key = cv2.waitKey(1) & 0xFF

        # Quit if 'q' key is pressed
        if key == ord('q'):
            break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp
import numpy as np

# Constants for window layout
WINDOW_WIDTH = 1280
WINDOW_HEIGHT = 480

# Open video capture for webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is successfully opened
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

drawing = False
start_point = None
canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)  # Initialize canvas
draw_color = (0, 0, 255)  # Default color: Red (BGR format)
gesture_detection = True  # Flag to indicate hand gesture detection mode

# Set up MediaPipe Hands
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

while True:
    # Read frame from video capture
    ret, frame = cap.read()

    if ret:
        # Resize the frame to fit the window layout
        frame = cv2.resize(frame, (int(WINDOW_WIDTH / 2), WINDOW_HEIGHT))

        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        # Convert the frame to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hands
        results = hands.process(frame_rgb)

        if gesture_detection:
            # Gesture detection mode

            # Check if hand landmarks are detected
            if results.multi_hand_landmarks:
                for hand_landmarks in results.multi_hand_landmarks:
                    # Draw landmarks on the frame
                    mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                              mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4),
                                              mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2)
                                              )

        else:
            # Drawing mode, no hand gesture detection

            # Get index finger tip coordinates based on mouse movement
            if start_point is None:
                x, y = 0, 0
            else:
                x, y = start_point

            # Draw line on the canvas
            if drawing:
                cv2.line(canvas, start_point, (x, y), draw_color, thickness=2)
            start_point = (x, y)

        # Create a blank canvas on the right
        blank_canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)

        # Combine the video frame and canvas
        output_frame = np.concatenate((frame, canvas), axis=1)

        # Display the combined frame
        cv2.imshow('Air Canvas', output_frame)

        # Check for key inputs
        key = cv2.waitKey(1) & 0xFF

        # Quit if 'q' key is pressed
        if key == ord('q'):
            break

        # Toggle between gesture detection and drawing mode if 'm' key is pressed
        if key == ord('m'):
            gesture_detection = not gesture_detection
            if gesture_detection:
                print("Switched to gesture detection mode")
            else:
                print("Switched to drawing mode")
                    # Clear the canvas if 'c' key is pressed
        if key == ord('c'):
            canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)

        # Start drawing if 'd' key is pressed
        if key == ord('d'):
            drawing = True

        # Stop drawing if 's' key is pressed
        if key == ord('s'):
            drawing = False

# Release the video capture and destroy all windows
cap.release()
cv2.destroyAllWindows()


       


### challenges
 One of the challenges that we faced in this code is achieving real-time performance for both hand gesture detection and drawing on the canvas. The continuous processing of video frames and the integration of mediapipe, hands for gesture detection while simultaneously updating the canvas in drawing mode can cause performance bottlenecks. To overcome this, we did efficient asynchronous processing techniques this way the hand gesture detection and drawing processes can be executed concurrently, ensuring a smooth and responsive user experience.



### Showing Landmarks in Gesture mode and able to draw in Drawing mode


In [None]:
import cv2
import mediapipe as mp
import numpy as np

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Constants for window layout
WINDOW_WIDTH = 1280
WINDOW_HEIGHT = 480

# Open video capture for webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is successfully opened
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

# Set up MediaPipe Hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

drawing = False
start_point = None
canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)  # Initialize canvas
draw_color = (0, 0, 255)  # Default color: Red (BGR format)
gesture_detection = True  # Flag to indicate hand gesture detection mode

while True:
    # Read frame from video capture
    ret, frame = cap.read()

    if ret:
        # Resize the frame to fit the window layout
        frame = cv2.resize(frame, (int(WINDOW_WIDTH / 2), WINDOW_HEIGHT))

        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        # Convert the frame to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hands
        results = hands.process(frame_rgb)

        # Check if hand landmarks are detected
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Extract hand landmarks and visualize them
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                if gesture_detection:
                    # Get bounding box coordinates of the hand
                    landmarks = hand_landmarks.landmark
                    x_values = [landmark.x for landmark in landmarks]
                    y_values = [landmark.y for landmark in landmarks]
                    min_x = min(x_values) * frame.shape[1]
                    max_x = max(x_values) * frame.shape[1]
                    min_y = min(y_values) * frame.shape[0]
                    max_y = max(y_values) * frame.shape[0]

                    # Draw a green rectangle around the hand
                    cv2.rectangle(frame, (int(min_x), int(min_y)), (int(max_x), int(max_y)), (0, 255, 0), 2)

                    # Start drawing
                    if drawing:
                        # Get index finger tip coordinates
                        index_finger_coords = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                        x, y = int(index_finger_coords.x * frame.shape[1]), int(index_finger_coords.y * frame.shape[0])

                        # Draw line on the canvas
                        cv2.line(canvas, start_point, (x, y), draw_color, thickness=2)
                    start_point = (x, y)

        # Create a blank canvas on the right
        blank_canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)

        # Combine the video frame and canvas
        output_frame = np.concatenate((frame, canvas), axis=1)

        # Display the combined frame
        cv2.imshow('Air Canvas', output_frame)

        # Check for key inputs
        key = cv2.waitKey(1) & 0xFF

        # Quit if 'q' key is pressed
        if key == ord('q'):
            break

        # Toggle between gesture detection and drawing mode if 'm' key is pressed
        if key == ord('m'):
            gesture_detection = not gesture_detection
            if gesture_detection:
                print("Switched to gesture detection mode")
            else:
                print("Switched to drawing mode")

        # Start drawing if 'd' key is pressed
        if key == ord('d'):
            drawing = True
            print("Started drawing")

        # Stop drawing if 's' key is pressed
        if key == ord('s'):
            drawing = False
            print("Stopped drawing")

        # Clear the canvas if 'c' key is pressed
        if key == ord('c'):
            canvas = np.zeros_like(frame)
            print("Cleared canvas")

        # Change draw color to red if '1' key is pressed
        if key == ord('1'):
            draw_color = (0, 0, 255)  # Red
            print("Switched to red color")

        # Change draw color to green if '2' key is pressed
        if key == ord('2'):
            draw_color = (0, 255, 0)  # Green
            print("Switched to green color")

        # Change draw color to blue if '3' key is pressed
        if key == ord('3'):
            draw_color = (255, 0, 0)  # Blue
            print("Switched to blue color")

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
import cv2
import mediapipe as mp
import numpy as np

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

# Constants for window layout
WINDOW_WIDTH = 1280
WINDOW_HEIGHT = 480

# Open video capture for webcam
cap = cv2.VideoCapture(0)

# Check if the webcam is successfully opened
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

# Set up MediaPipe Hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

drawing = False
start_point = None
canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)  # Initialize canvas
draw_color = (0, 0, 255)  # Default color: Red (BGR format)
gesture_detection = True  # Flag to indicate hand gesture detection mode

def save_image(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Resize the image to (48, 48) dimension
    resized = cv2.resize(gray, (48, 48))

    # Save the image
    cv2.imwrite("drawn_image.jpg", resized)

while True:
    # Read frame from video capture
    ret, frame = cap.read()

    if ret:
        # Resize the frame to fit the window layout
        frame = cv2.resize(frame, (int(WINDOW_WIDTH / 2), WINDOW_HEIGHT))

        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        # Convert the frame to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hands
        results = hands.process(frame_rgb)

        # Check if hand landmarks are detected
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Extract hand landmarks and visualize them
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                if gesture_detection:
                    # Get bounding box coordinates of the hand
                    landmarks = hand_landmarks.landmark
                    x_values = [landmark.x for landmark in landmarks]
                    y_values = [landmark.y for landmark in landmarks]
                    min_x = min(x_values) * frame.shape[1]
                    max_x = max(x_values) * frame.shape[1]
                    min_y = min(y_values) * frame.shape[0]
                    max_y = max(y_values) * frame.shape[0]

                    # Draw a green rectangle around the hand
                    cv2.rectangle(frame, (int(min_x), int(min_y)), (int(max_x), int(max_y)), (0, 255, 0), 2)

                    # Start drawing
                    if drawing:
                        # Get index finger tip coordinates
                        index_finger_coords = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                        x, y = int(index_finger_coords.x * frame.shape[1]), int(index_finger_coords.y * frame.shape[0])

                        # Draw line on the canvas
                        cv2.line(canvas, start_point, (x, y), draw_color, thickness=2)
                    start_point = (x, y)

        # Create a blank canvas on the right
        blank_canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)

        # Combine the video frame and canvas
        output_frame = np.concatenate((frame, canvas), axis=1)

        # Display the combined frame
        cv2.imshow('Air Canvas', output_frame)

        # Check for key inputs
        key = cv2.waitKey(1) & 0xFF

        # Quit if 'q' key is pressed
        if key == ord('q'):
            break

        # Toggle between gesture detection and drawing mode if 'm' key is pressed
        if key == ord('m'):
            gesture_detection = not gesture_detection
            if gesture_detection:
                print("Switched to gesture detection mode")
            else:
                print("Switched to drawing mode")

        # Start drawing if 'd' key is pressed
        if key == ord('d'):
            drawing = True
            print("Started drawing")

        # Stop drawing if 's' key is pressed
        if key == ord('s'):
            drawing = False
            print("Stopped drawing")
            save_image(canvas)

        # Clear the canvas if 'c' key is pressed
        if key == ord('c'):
            canvas = np.zeros_like(frame)
            print("Cleared canvas")

        # Change draw color to red if '1' key is pressed
        if key == ord('1'):
            draw_color = (0, 0, 255)  # Red
            print("Switched to red color")

        # Change draw color to green if '2' key is pressed
        if key == ord('2'):
            draw_color = (0, 255, 0)  # Green
            print("Switched to green color")

        # Change draw color to blue if '3' key is pressed
        if key == ord('3'):
            draw_color = (255, 0, 0)  # Blue
            print("Switched to blue color")

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


### Comment:
Two additional challenges we encountered were enhancing the image saving functionality and expanding the color selection options that is we have introduced functionality to save the drawn image by pressing the 's' key. However, in the current implementation, the image is overwritten each time the user presses 's'. To enhance this feature, we did add options to save new drawings to existing images. And also we defined three predefined colors(red, green, and blue) for drawing.

# Model that draws and makes predictions

In [None]:
pip install tensorflow


In [None]:
!pip show tensorflow

In [None]:
!pip install --upgrade tensorflow

In [None]:
pip install tensorflow


In [None]:
pip install mediapipe

In [None]:
pip install opencv-python


In [None]:
pip install mediapipe

In [1]:
import tensorflow as tf
from tensorflow import keras
import cv2
import mediapipe as mp
import numpy as np
from skimage.transform import resize
from PIL import Image





In [2]:
import cv2
import mediapipe as mp
import numpy as np
from skimage.transform import resize
from tensorflow import keras
from PIL import Image

In [3]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands

In [4]:
# Constants for window layout
WINDOW_WIDTH = 1280
WINDOW_HEIGHT = 480

In [5]:
# Open video capture for webcam
cap = cv2.VideoCapture(0)

In [6]:
# Check if the webcam is successfully opened
if not cap.isOpened():
    print("Could not open webcam.")
    exit()

In [7]:
# Set up MediaPipe Hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.5, min_tracking_confidence=0.5)

In [8]:
drawing = False
start_point = None
canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)  # Initialize canvas
draw_color = (0, 0, 255)  # Default color: Red (BGR format)
gesture_detection = True  # Flag to indicate hand gesture detection mode

In [9]:
# Load your trained VGG16-based model
model = keras.models.load_model('model')

In [10]:
def preprocess_image(image):
    # Convert the image to 8-bit per channel or 32-bit float
    if image.dtype == np.float64:
        image = (image * 255).astype(np.uint8)
    elif image.dtype == np.float32:
        image = (image * 255).astype(np.uint8)

    # Resize the image to match the input size of the model
    image = resize(image, (48, 48), anti_aliasing=True)

    # Check if the image is grayscale
    if len(image.shape) == 2:
        # Convert the grayscale image to RGB using PIL
        image = np.stack((image,) * 3, axis=-1)
        image = Image.fromarray(image.squeeze(), mode='RGB')
    elif image.shape[0] == 1 and image.shape[1] == 1:
        # Handle single-pixel image
        image = np.tile(image, (48, 48, 3))
        image = Image.fromarray(image.squeeze(), mode='RGB')
    else:
        # Convert the image to RGB using PIL
        image = Image.fromarray(image, mode='RGB')

    # Convert the image to numpy array
    image = np.array(image)

    # Normalize the pixel values
    image = image.astype('float32') / 255.0

    # Reshape the image to match the input shape of the model
    image = np.reshape(image, (1, 48, 48, 3))

    return image

In [11]:
def save_image(image):
    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Resize the image to (48, 48) dimension
    resized = cv2.resize(gray, (48, 48))

    # Save the image
    cv2.imwrite("drawn_image.jpg", resized)

In [12]:
# Preprocess the input image
def preprocess_image(image):
    # Resize the image to match the input shape of the model (48x48)
    image = cv2.resize(image, (48, 48))
    # Convert the image to grayscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Convert the grayscale image to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    # Reshape the image to match the input shape of the model (add batch dimension)
    image_rgb = np.expand_dims(image_rgb, axis=0)
    # Normalize the pixel values
    image_rgb = image_rgb.astype('float32') / 255.0
    return image_rgb

In [13]:
while True:
    # Read frame from video capture
    ret, frame = cap.read()

    if ret:
        # Resize the frame to fit the window layout
        frame = cv2.resize(frame, (int(WINDOW_WIDTH / 2), WINDOW_HEIGHT))

        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        # Convert the frame to RGB
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hands
        results = hands.process(frame_rgb)

        # Check if hand landmarks are detected
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Extract hand landmarks and visualize them
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                if gesture_detection:
                    # Get bounding box coordinates of the hand
                    landmarks = hand_landmarks.landmark
                    x_values = [landmark.x for landmark in landmarks]
                    y_values = [landmark.y for landmark in landmarks]
                    min_x = min(x_values) * frame.shape[1]
                    max_x = max(x_values) * frame.shape[1]
                    min_y = min(y_values) * frame.shape[0]
                    max_y = max(y_values) * frame.shape[0]

                    # Draw a green rectangle around the hand
                    cv2.rectangle(frame, (int(min_x), int(min_y)), (int(max_x), int(max_y)), (0, 255, 0), 2)

                    # Start drawing
                    if drawing:
                        # Get index finger tip coordinates
                        index_finger_coords = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
                        x, y = int(index_finger_coords.x * frame.shape[1]), int(index_finger_coords.y * frame.shape[0])

                        # Draw line on the canvas
                        cv2.line(canvas, start_point, (x, y), draw_color, thickness=20)
                        start_point = (x, y)

        # Create a blank canvas on the right
        blank_canvas = np.zeros((WINDOW_HEIGHT, int(WINDOW_WIDTH / 2), 3), dtype=np.uint8)

        # Combine the video frame and canvas
        output_frame = np.concatenate((frame, canvas), axis=1)

        # Display the combined frame
        cv2.imshow('Air Canvas', output_frame)

        # Check for key inputs
        key = cv2.waitKey(1) & 0xFF

        # Quit if 'q' key is pressed
        if key == ord('q'):
            break

        # Toggle between gesture detection and drawing mode if 'm' key is pressed
        if key == ord('m'):
            gesture_detection = not gesture_detection
            if gesture_detection:
                print("Switched to gesture detection mode")
            else:
                print("Switched to drawing mode")

        # Start drawing if 'd' key is pressed
        if key == ord('d'):
            drawing = True
            print("Started drawing")

        # Stop drawing if 's' key is pressed
        if key == ord('s'):
            drawing = False
            print("Stopped drawing")
            save_image(canvas)

        # Clear the canvas if 'c' key is pressed
        if key == ord('c'):
            canvas = np.zeros_like(frame)
            print("Cleared canvas")

                # Change draw color to red if '1' key is pressed
        if key == ord('1'):
            draw_color = (0, 0, 255)  # Red
            print("Switched to red color")
            
        # Change draw color to green if '2' key is pressed
        if key == ord('0'):
            draw_color = (255, 255, 255)  # Green
            print("Switched to white color")

        # Change draw color to green if '2' key is pressed
        if key == ord('2'):
            draw_color = (0, 255, 0)  # Green
            print("Switched to green color")

        # Change draw color to blue if '3' key is pressed
        if key == ord('3'):
            draw_color = (255, 0, 0)  # Blue
            print("Switched to blue color")

        # Perform prediction if 'g' key is pressed
        if key == ord('g'):
        # Load and preprocess the input image
            image_path = 'drawn_image.jpg'
            input_image = cv2.imread(image_path)
            preprocessed_image = preprocess_image(input_image)

            # Make a prediction
            prediction = model.predict(preprocessed_image)
            predicted_class = np.argmax(prediction)

            # Convert the predicted class to text
            predicted_text = str(predicted_class)
            print("Predicted Number:", predicted_text)
# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


Started drawing
Switched to green color
Cleared canvas
Stopped drawing
Predicted Number: 1
Cleared canvas
Started drawing
Cleared canvas
Cleared canvas
Stopped drawing
Predicted Number: 1
Cleared canvas
Started drawing
Cleared canvas
Stopped drawing
Predicted Number: 5
Cleared canvas
Started drawing
Cleared canvas
Stopped drawing
Predicted Number: 1
Cleared canvas
Started drawing
Cleared canvas
Stopped drawing
Predicted Number: 6
Cleared canvas
Started drawing
Cleared canvas
Stopped drawing
Predicted Number: 8
Cleared canvas
Started drawing
Cleared canvas
Cleared canvas
Stopped drawing
Predicted Number: 8
Cleared canvas
Started drawing
Stopped drawing
Predicted Number: 6
Cleared canvas
Started drawing
Cleared canvas
Stopped drawing
Predicted Number: 6
Cleared canvas
Started drawing
Cleared canvas
Cleared canvas
Stopped drawing
Predicted Number: 6


### Challenges:
One challenge was to correctly preprocess the drawn image so that it could be fed into the trained model for prediction. The function preprocess_image was introduced to resize and normalize the image to match the input size and data format expected by the model. we also included an option to perform real-time prediction by pressing the 'g' key. However, successfully integrating the prediction process into the main loop while maintaining a smooth user experience required careful handling. To achieve this, we first saved the drawn image using the save_image function, then loaded and preprocessed the saved image for prediction. After making a prediction using the trained model, we obtained the predicted class and converted it to text for display. Ensuring proper synchronization of image saving and prediction within the real-time loop was essential to achieve smooth and accurate predictions without impacting the overall user experience.