In [1]:
import cv2
import numpy as np
import mediapipe as mp
import math

# Function to get the index finger tip and thumb tip positions
def get_finger_tips(frame, hand_landmarks):
    if hand_landmarks:
        index_finger_tip = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.INDEX_FINGER_TIP]
        thumb_tip = hand_landmarks.landmark[mp.solutions.hands.HandLandmark.THUMB_TIP]
        height, width, _ = frame.shape
        index_finger_tip_px = (int(index_finger_tip.x * width), int(index_finger_tip.y * height))
        thumb_tip_px = (int(thumb_tip.x * width), int(thumb_tip.y * height))
        return index_finger_tip_px, thumb_tip_px
    else:
        return None, None

# Function to calculate distance between two points
def calculate_distance(point1, point2):
    return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)

# Function to perform smoothing
def smooth_line(new_point, prev_points, smoothing_factor=0.5):
    if prev_points is None:
        return new_point
    else:
        smoothed_point = tuple(np.round(smoothing_factor * np.array(new_point) + (1 - smoothing_factor) * np.array(prev_points)).astype(int))
        return smoothed_point

def main():
    # Open camera
    cap = cv2.VideoCapture(0)
    # Initialize MediaPipe Hands
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(max_num_hands=1)
    mp_drawing = mp.solutions.drawing_utils

    # Create a blank canvas filled with white color
    canvas = np.ones((480, 640, 3), dtype=np.uint8) * 255

    # Variables for drawing and erasing
    drawing = False
    prev_point = None

    # Create resizable windows
    cv2.namedWindow('Frame', cv2.WINDOW_NORMAL)
    cv2.namedWindow('Canvas', cv2.WINDOW_NORMAL)
    # Set initial window sizes
    cv2.resizeWindow('Frame', 1020, 720)
    cv2.resizeWindow('Canvas', 1020, 720)

    while True:
        ret, frame = cap.read()

        if not ret:
            break

        # Flip the frame horizontally
        frame = cv2.flip(frame, 1)

        # Adjust brightness
        frame = cv2.convertScaleAbs(frame, alpha=0.7, beta=30)  # adjust alpha and beta as needed

        # Convert the BGR image to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Process the frame with MediaPipe Hands
        results = hands.process(rgb_frame)

        # Draw hand landmarks on the frame
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # Get finger tip positions
        index_finger_tip, thumb_tip = get_finger_tips(frame, results.multi_hand_landmarks[0] if results.multi_hand_landmarks else None)

        if index_finger_tip is not None and thumb_tip is not None:
            distance_threshold = 50  # Adjust threshold as needed
            distance = calculate_distance(index_finger_tip, thumb_tip)
            
            if distance < distance_threshold:
                drawing = False
                # Check if index finger is touching the top part of the frame
                if index_finger_tip[1] < 50:  # Adjust the value as needed
                    # Implement clear, save, quit functionality
                    if index_finger_tip[0] < 213:  # 640/3, 3 sections
                        canvas = np.ones((480, 640, 3), dtype=np.uint8) * 255  # Clear canvas
                        print("Canvas cleared")
                    elif 213 <= index_finger_tip[0] < 426:  # 640/3 * 2
                        print("Attempting to save canvas")
                        saved = cv2.imwrite(r"C:\Users\parth\OneDrive\Desktop\IBM_Work\Canvas.png", canvas)  # Save canvas as image
                        if saved:
                            print("Canvas saved successfully")
                        else:
                            print("Failed to save canvas")
                    elif 426 <= index_finger_tip[0] < 640:
                        print("Quitting program")
                        break  # Quit program
            else:
                drawing = True

        if drawing and index_finger_tip is not None:
            cv2.circle(frame, index_finger_tip, 5, (0, 0, 0), -1)  # Draw in black color
            if prev_point is not None:
                smoothed_point = smooth_line(index_finger_tip, prev_point)
                cv2.line(canvas, prev_point, smoothed_point, (0, 0, 0), 4)  # Draw in black color
                prev_point = smoothed_point
            else:
                prev_point = index_finger_tip
        else:
            prev_point = None
        
        # Display canvas
        cv2.imshow('Canvas', canvas)

        # Overlay drawing onto the frame
        frame_with_drawing = cv2.addWeighted(frame, 0.5, canvas, 0.5, 0)
        
        # Add functionality text
        cv2.putText(frame_with_drawing, "Clear", (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
        cv2.putText(frame_with_drawing, "Save", (220, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        cv2.putText(frame_with_drawing, "Quit", (420, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        # Display frame
        cv2.imshow('Frame', frame_with_drawing)

        # Check for key press
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()



Canvas cleared
Canvas cleared
Canvas cleared
Canvas cleared
Canvas cleared
Attempting to save canvas
Canvas saved successfully


In [18]:
from transformers import pipeline
image_path = image_path = r"C:\Users\parth\OneDrive\Desktop\IBM_Work\Graphology\Handwriting_Analysis\images\images\Canvas.png"
# Create the image-to-text pipeline using the TrOCR model
pipe = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
prediction = pipe(image_path)
prediction

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[{'generated_text': '2 2 Vesoma.'}]

In [3]:
image_path = r"C:\Users\parth\OneDrive\Desktop\IBM_Work\Graphology\Handwriting_Analysis\images\images\Canvas.png"
pipe = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
prediction = pipe(image_path)
prediction



[{'generated_text': '<s_cord-v2><s_menu><s_nm> Amari</s_nm><s_price>'}]

In [4]:
import pytesseract
import cv2
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:/Program Files/Tesseract-OCR/tesseract.exe"
img_path = cv2.imread(r"C:\Users\parth\OneDrive\Desktop\IBM_Work\Canvas.png")
img = cv2.cvtColor(img_path, cv2.COLOR_BGR2GRAY)
pytesseract.image_to_string(img_path)

''

In [6]:
import cv2
import numpy as np
from PIL import Image  # For potential deskewing (slant correction)
def get_slant(gray):
  # Calculate moments
  moments = cv2.moments(gray)
  mu02 = moments['mu02']
  mu11 = moments['mu11']
  # Check for division by zero
  if abs(mu02) < 1e-6:
    return 0
  else:
    # Calculate slant angle (radians)
    return np.degrees(np.arctan((2 * mu11) / mu02))


def get_text_block_orientation(gray):
  # Apply thresholding and find contours
  thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
  cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
  
  # Check for detected contours
  if len(cnts) == 0:
    return "Text not detected"
  
  # Get bounding rectangle of all contours
  x, y, w, h = cv2.boundingRect(np.vstack(cnts))
  
  # Calculate aspect ratio
  aspect_ratio = w / float(h)
  
  # Estimate orientation based on aspect ratio (heuristic)
  if aspect_ratio > 1:
    orientation = "Horizontal"
  else:
    orientation = "Vertical"
  
  # Calculate slant angle
  slant_angle = get_slant(gray[y:y+h, x:x+w])  # Apply slant calculation on the text region

  return orientation, slant_angle

img_path = r"C:\Users\parth\OneDrive\Desktop\IBM_Work\Canvas.png"
img = cv2.imread(img_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Analyze text region
orientation, slant_angle = get_text_block_orientation(gray)

print(f"Text block orientation: {orientation}")
print(f"Slant angle of the entire text region: {slant_angle:.2f} degrees")

Text block orientation: Horizontal
Slant angle of the entire text region: -0.22 degrees
