In [37]:
import subprocess
import cv2
import numpy as np
import pytesseract
from PIL import Image
import time
import pandas as pd
from PIL import ImageGrab
from pynput.keyboard import Key, Controller

def clear_clipboard():
    subprocess.run("pbcopy < /dev/null", shell=True)

def take_screenshot():
    keyboard = Controller()
    keyboard.press(Key.cmd)
    keyboard.press(Key.shift)
    keyboard.press('5')
    keyboard.release('5')
    keyboard.release(Key.shift)
    keyboard.release(Key.cmd)
    time.sleep(1)  # wait for toolbar to appear
    keyboard.press(Key.enter)
    keyboard.release(Key.enter)

def get_clipboard_image_data():
    time.sleep(2)  # allow clipboard to update
    img = ImageGrab.grabclipboard()
    return img

def preprocess_image_for_ocr(pil_img):
    img = np.array(pil_img)
    if img.shape[2] == 4:  # Convert RGBA to RGB if needed
        img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # Sharpen image
    kernel = np.array([[-1, -1, -1],
                       [-1, 9, -1],
                       [-1, -1, -1]])
    sharpened = cv2.filter2D(gray, -1, kernel)

    # Determine if text is light on dark; invert if so
    mean_intensity = np.mean(sharpened)
    if mean_intensity < 127:
        sharpened = cv2.bitwise_not(sharpened)

    # Adaptive thresholding (Otsu)
    _, binary_img = cv2.threshold(sharpened, 0, 255, 
                                  cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return binary_img

def extract_text_from_image(img):
    if img:
        preprocessed = preprocess_image_for_ocr(img)
        config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789'
        text = pytesseract.image_to_string(preprocessed, config=config)
        return text.strip()
    else:
        return None

def append_text_to_dataframe(text):
    df = pd.DataFrame(columns=["extracted_text"])
    new_row = pd.DataFrame([{"extracted_text": text}])
    df = pd.concat([df, new_row], ignore_index=True)
    print("Extracted text appended to DataFrame:")
    print(df)

if __name__ == "__main__":
    clear_clipboard()
    take_screenshot()
    img = get_clipboard_image_data()
    extracted_text = extract_text_from_image(img)
    if extracted_text:
        append_text_to_dataframe(extracted_text)
    else:
        print("No image or text found in clipboard.")


No image or text found in clipboard.
