In [None]:
%pip install opencv-python 

In [1]:
import cv2
import numpy as np

### Testing Live Feed

In [2]:
import cv2

#Access the camera
cap=cv2.VideoCapture(0)

#Read frames from camera
while True:
    _, frame=cap.read()
    cv2.imshow('Live',frame) #Show the frames in a new window
    if cv2.waitKey(1) ==27:#1ms time gap between frames 
        break
cap.release()
cv2.destroyAllWindows()

2025-03-09 13:11:38.101 python[3000:49819] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-09 13:11:38.101 python[3000:49819] +[IMKInputSession subclass]: chose IMKInputSession_Modern


In [3]:
%pip install mediapipe

Note: you may need to restart the kernel to use updated packages.


### Drawing using Index Finger

In [4]:
#import and load the model
import mediapipe as mp

#Load the hand tracking algorithm/model
hands=mp.solutions.hands
hand_landmark=hands.Hands(max_num_hands=1)

I0000 00:00:1741506150.712353   49819 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1741506150.724303   51091 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1741506150.732488   51091 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [5]:
frame_shape=(1080,1920,3) #Camera frame resolution

In [6]:

prevxy=None
mask=np.zeros(frame_shape,dtype='uint8') #create an array of same size as frame to permanently draw

colour=[123,34,90]
thickness=4

#Access the camera
cap=cv2.VideoCapture(0)
draw=mp.solutions.drawing_utils

#Read frames from camera
while True:
    _, frame=cap.read() #BGR format
    frame = cv2.flip(frame, 1)  # Flip horizontally (mirror effect)

    rgb=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    op=hand_landmark.process(rgb)#contains all the information/points in the hand
    
    if op.multi_hand_landmarks:
        for all_landmarks in op.multi_hand_landmarks: 
            draw.draw_landmarks(frame, all_landmarks, hands.HAND_CONNECTIONS)#plot all points on the hand
            
            #accessing the x and y coordinates 
            x=int(all_landmarks.landmark[8].x*frame_shape[1])
            y=int(all_landmarks.landmark[8].y*frame_shape[0])

            if prevxy!= None:
                #Draw line
                cv2.line(mask,prevxy, (x,y),colour,thickness)
            prevxy=(x,y)
    
    #Merge frame and mask
    frame=np.where(mask,mask,frame)
    
    cv2.imshow('Live',frame) #show the frames in a new window
    if cv2.waitKey(1) ==27:#1ms time gap between frames 
        break
cap.release()
cv2.destroyAllWindows()

W0000 00:00:1741506162.695559   51093 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


<H2>Toolbar<H2>

In [7]:
#adding the toolbar to the frame
import cv2
tools = cv2.imread("/Users/sachin/Downloads/tool.png")

tools = cv2.resize(tools, (600, 100))#resizing the toolbar for better visibility

tools = tools.astype('uint8') #converts the tools image array to the unsigned 8-bit integer (uint8) data type
print(tools.shape)

(100, 600, 3)


In [8]:
# Row and Column for toolbar
midCol = 1920 // 2# Finds the middle column of the frame
max_row = 100  #Height
min_col = midCol - 300  #Left boundary
max_col = midCol + 300 #Right boundary

In [9]:
#Testing the tool bar in the live feed
import cv2
cap = cv2.VideoCapture(0)

while True:
    _, frame = cap.read()
    frame = cv2.flip(frame, 1) 
    
    frame[0:max_row, min_col:max_col] = tools
    
    cv2.imshow('Live', frame)
    
    if cv2.waitKey(10) == 27:
        break
  
cap.release()
cv2.waitKey(1)

-1

<H1>Selection in Toolbar<H1>

In [10]:
#Draw only when the index and middle fingers are close(less than 60 pixels)

#Check if distance between two points is less than 60 pixels
def get_is_clicked(point1,point2):
    (x1,y1)=point1
    (x2,y2)=point2
    
    #Distance formula to calculate distance between two points
    dis = (x1-x2)**2 + (y1-y2)**2
    dis = np.sqrt(dis)
    if dis<60:
        return True 
    else:
        return False
    
#Determine the tool selected by user
def get_Tool(point,prev_tool): #point is the coordinate where the user has clicked
    #prev_tool stores the previously used tool
    (x,y)=point
    
    #Tool selection logic
    if x > min_col and x < max_col and y < max_row:
        if x < 120 + min_col:
            curr_tool = "line"
        elif x < 240 + min_col:
            curr_tool = "rectangle"
        elif x < 360 + min_col:
            curr_tool = "draw"
        elif x < 480 + min_col:
            curr_tool = "circle"
        elif x < 600 + min_col:
            curr_tool = "erase"
        return curr_tool
    else:
        return prev_tool
    
        

### Drawing the Rectangle

In [11]:
prevxy=None
mask=np.zeros(frame_shape,dtype='uint8') #create an array of same size as frame to permanently draw

colour=[125,100,140]
thickness=4
curr_tool='draw'
start_point=None

#Access the camera
cap=cv2.VideoCapture(0)
draw=mp.solutions.drawing_utils

#Read frames from camera
while True:
    _, frame=cap.read() #BGR format
    frame = cv2.flip(frame, 1)  # Flip horizontally (mirror effect)

    rgb=cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    op=hand_landmark.process(rgb)#contains all the information/points in the hand
    
    #Check is hand is in frame
    if op.multi_hand_landmarks:
        for all_landmarks in op.multi_hand_landmarks: 
            draw.draw_landmarks(frame, all_landmarks, hands.HAND_CONNECTIONS)#plot all points on the hand
            
            #Insex finger location
            x=int(all_landmarks.landmark[8].x*frame_shape[1])
            y=int(all_landmarks.landmark[8].y*frame_shape[0])
            x,y=int(x),int(y)
            
            #Middle finger location
            middle_x = all_landmarks.landmark[12].x * frame_shape[1]
            middle_y = all_landmarks.landmark[12].y * frame_shape[0]
            middle_x, middle_y = int(middle_x), int(middle_y)
            
            is_clicked = get_is_clicked((x, y), (middle_x, middle_y))
            curr_tool = get_Tool((x, y), curr_tool)

            # Select tool and draw for that
            if curr_tool == 'draw':
                # Connect previous and current index finger locations
                if is_clicked and prevxy!=None:
                    cv2.line(mask, prevxy, (x, y), colour, thickness)
            elif curr_tool == 'rectangle':
                if is_clicked and start_point==None:
                    start_point = (x, y)
                
                elif is_clicked:
                    cv2.rectangle(frame, start_point, (x, y), colour, thickness)
                
                elif is_clicked==False and start_point:
                    cv2.rectangle(mask, start_point, (x, y), colour, thickness)
                    start_point=None
            
            prevxy=(x,y)
    
    #Merge frame and mask
    frame=np.where(mask,mask,frame)
    
    frame[0:max_row, min_col:max_col] = tools
     
    cv2.imshow('Live',frame) #show the frames in a new window
    if cv2.waitKey(1) ==27:#1ms time gap between frames 
        break
cap.release()
cv2.destroyAllWindows()

### Initialising all the tools

In [12]:

mask = np.zeros(frame_shape, dtype='uint8')
colour = (125, 100, 140)
thickness = 4
curr_tool = 'draw'
start_point = None

cap = cv2.VideoCapture(0)
prevxy = None

while True:
    _, frame = cap.read()
    frame = cv2.flip(frame, 1)
    
    # Preprocess Image
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    op = hand_landmark.process(rgb)
    
    # Check if hand is in frame
    if op.multi_hand_landmarks:
        for all_landmarks in op.multi_hand_landmarks:
            draw.draw_landmarks(frame, all_landmarks, hands.HAND_CONNECTIONS)

            # index finger location
            x = all_landmarks.landmark[8].x * frame_shape[1]
            y = all_landmarks.landmark[8].y * frame_shape[0]
            x, y = int(x), int(y)
            
            # Middle finger location
            middle_x = all_landmarks.landmark[12].x * frame_shape[1]
            middle_y = all_landmarks.landmark[12].y * frame_shape[0]
            middle_x, middle_y = int(middle_x), int(middle_y)
            
            is_clicked = get_is_clicked((x, y), (middle_x, middle_y))
            curr_tool = get_Tool((x, y), curr_tool)

            # Select tool and draw for that
            if curr_tool == 'draw':
                # Connect previous and current index finger locations
                if is_clicked and prevxy!=None:
                    cv2.line(mask, prevxy, (x, y), colour, thickness)
            elif curr_tool == 'line':
                if is_clicked and start_point is None:
                    start_point = (x, y)  # Capture the starting point when fingers touch
                elif is_clicked and start_point:
                    #Show a temporary straight line
                    cv2.line(frame, start_point, (x, y), colour, thickness)
                elif not is_clicked and start_point:
                    # Draw the final straight line on the mask
                    cv2.line(mask, start_point, (x, y), colour, thickness)
                    start_point = None  # Reset start point
            elif curr_tool == 'rectangle':
                if is_clicked and start_point==None:
                    # Init start_point
                    start_point = (x, y)
                elif is_clicked:
                    # Draw temp rectange
                    cv2.rectangle(frame, start_point, (x, y), colour, thickness)
                elif is_clicked==False and start_point:
                    # draw perm. rectangle and reset start_point
                    cv2.rectangle(mask, start_point, (x, y), colour, thickness)
                    start_point=None                      
            elif curr_tool=='circle':
                if is_clicked and start_point==None:
                    start_point = (x, y)
                
                if start_point:
                    rad = int(((start_point[0]-x)**2 + (start_point[1]-y)**2)**0.5)
                if is_clicked:
                    cv2.circle(frame, start_point, rad, colour, thickness)
                
                if is_clicked==False and start_point:
                    cv2.circle(mask, start_point, rad, colour, thickness)
                
                    start_point=None
            
            elif curr_tool == "erase":
                #cv2.circle(frame, (x, y), 30, (0,0,0), -1) # -1 means fill
                if is_clicked:
                #    cv2.circle(mask, (x, y), 30, 0, -1)
                    mask = np.zeros(frame.shape, dtype='uint8')  # Reset mask
            prevxy = (x, y)    
    
    # Merge Frame and Mask
    frame = np.where(mask, mask, frame)
    
    frame[0:max_row, min_col:max_col] = tools
    cv2.imshow('Live', frame)
    if cv2.waitKey(1) == 27:
        break
  
cap.release()
cv2.waitKey(1)

-1

### Adding a Canvas

In [13]:
canvas = np.ones((1080, 1920, 3), dtype='uint8') * 255  # White background

In [14]:
# Variables for drawing
mask = np.zeros(frame_shape, dtype='uint8')
colour = (125, 100, 140)
thickness = 4
curr_tool = 'draw'
start_point = None

cap = cv2.VideoCapture(0)
draw=mp.solutions.drawing_utils
prevxy = None

while True:
    _, frame = cap.read()
    frame = cv2.flip(frame, 1)

    # Preprocess Image
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    op = hand_landmark.process(rgb)

    if op.multi_hand_landmarks:
        for all_landmarks in op.multi_hand_landmarks:
            draw.draw_landmarks(frame, all_landmarks, hands.HAND_CONNECTIONS)

            # index finger location
            x = all_landmarks.landmark[8].x * frame_shape[1]
            y = all_landmarks.landmark[8].y * frame_shape[0]
            x, y = int(x), int(y)
            
            # Middle finger location
            middle_x = all_landmarks.landmark[12].x * frame_shape[1]
            middle_y = all_landmarks.landmark[12].y * frame_shape[0]
            middle_x, middle_y = int(middle_x), int(middle_y)
            
            is_clicked = get_is_clicked((x, y), (middle_x, middle_y))
            
            curr_tool = get_Tool((x, y), curr_tool)

            # Drawing logic
            if curr_tool == 'draw':
                if is_clicked and prevxy is not None:
                    cv2.line(canvas, prevxy, (x, y), colour, thickness)

            elif curr_tool == 'rectangle':
                if is_clicked and start_point is None:
                    start_point = (x, y)
                elif is_clicked:
                    cv2.rectangle(frame, start_point, (x, y), colour, thickness)
                elif not is_clicked and start_point:
                    cv2.rectangle(canvas, start_point, (x, y), colour, thickness)
                    start_point = None

            elif curr_tool == 'circle':
                if is_clicked and start_point is None:
                    start_point = (x, y)
                if start_point:
                    rad = int(np.linalg.norm(np.array(start_point) - np.array([x, y])))
                if is_clicked:
                    cv2.circle(frame, start_point, rad, colour, thickness)
                elif not is_clicked and start_point:
                    cv2.circle(canvas, start_point, rad, colour, thickness)
                    start_point = None

            elif curr_tool == 'line':
                if is_clicked and start_point is None:
                    start_point = (x, y)
                elif is_clicked:
                    cv2.line(frame, start_point, (x, y), colour, thickness)
                elif not is_clicked and start_point:
                    cv2.line(canvas, start_point, (x, y), colour, thickness)
                    start_point = None

            elif curr_tool == "erase":
                canvas[:] = 255  # Clear the canvas

            prevxy = (x, y)

    # Merge canvas and toolbar
    output = canvas.copy()
    output[0:max_row, min_col:max_col] = tools
    cv2.circle(output, (x, y), 5, (0, 0, 255), -1)
    cv2.imshow('Live', output)

    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()


### Semi Transparent Canvas
Used a semi-transparent background instead of a fully solid white canvas to ensure that the user can see both the live feed and their drawings at the same time, enhancing user experience

In [15]:
# Variables for drawing
canvas = np.ones(frame_shape, dtype='uint8') * 255  # Ensure canvas is visible
colour = (125, 100, 140)
thickness = 4
curr_tool = None
start_point = None

cap = cv2.VideoCapture(0)
draw = mp.solutions.drawing_utils
prevxy = None

while True:
    _, frame = cap.read()
    frame = cv2.flip(frame, 1)

    # Preprocess Image
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    op = hand_landmark.process(rgb)

    # Copy canvas to display permanent drawings
    output = canvas.copy()

    if op.multi_hand_landmarks:
        for all_landmarks in op.multi_hand_landmarks:
            draw.draw_landmarks(frame, all_landmarks, hands.HAND_CONNECTIONS)

            # Index finger location
            x = int(all_landmarks.landmark[8].x * frame_shape[1])
            y = int(all_landmarks.landmark[8].y * frame_shape[0])
            
            # Middle finger location
            middle_x = int(all_landmarks.landmark[12].x * frame_shape[1])
            middle_y = int(all_landmarks.landmark[12].y * frame_shape[0])
            
            is_clicked = get_is_clicked((x, y), (middle_x, middle_y))
            curr_tool = get_Tool((x, y), curr_tool)

            # Drawing logic
            if curr_tool == 'draw':
                if is_clicked and prevxy is not None:
                    cv2.line(canvas, prevxy, (x, y), colour, thickness)
                    cv2.line(output, prevxy, (x, y), colour, thickness)  # Show in real-time

            elif curr_tool == 'rectangle':
                if is_clicked and start_point is None:
                    start_point = (x, y)
                elif is_clicked:
                    cv2.rectangle(output, start_point, (x, y), colour, thickness)  # Live preview
                elif not is_clicked and start_point:
                    cv2.rectangle(canvas, start_point, (x, y), colour, thickness)  # Save to canvas
                    start_point = None

            elif curr_tool == 'circle':
                if is_clicked and start_point is None:
                    start_point = (x, y)
                if start_point:
                    rad = int(np.linalg.norm(np.array(start_point) - np.array([x, y])))
                if is_clicked:
                    cv2.circle(output, start_point, rad, colour, thickness)  # Live preview
                elif not is_clicked and start_point:
                    cv2.circle(canvas, start_point, rad, colour, thickness)  # Save to canvas
                    start_point = None

            elif curr_tool == 'line':
                if is_clicked and start_point is None:
                    start_point = (x, y)
                elif is_clicked:
                    cv2.line(output, start_point, (x, y), colour, thickness)  # Live preview
                elif not is_clicked and start_point:
                    cv2.line(canvas, start_point, (x, y), colour, thickness)  # Save to canvas
                    start_point = None

            elif curr_tool == "erase":
                canvas[:] = 255  # Clear only the canvas (not frame)

            prevxy = (x, y)

    # Merge frame with canvas for visibility
    blended = cv2.addWeighted(frame, 0.4, output, 0.6, 0)  # Blend live view & drawings
    blended[0:max_row, min_col:max_col] = tools  # Keep toolbar intact
    cv2.circle(blended, (x, y), 5, (0, 0, 255), -1)  # Red cursor follows finger

    cv2.imshow('Live', blended)

    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()


In [16]:
cap.release()
cv2.destroyAllWindows()