In [2]:
from imutils.perspective import four_point_transform
import pytesseract
import imutils
import cv2
import re

# Manually set the image path instead of using argparse
image_path = "larry_page.jpg"  # Change this to your actual image file
debug = -1
min_conf = 0

# Set Tesseract path (for Windows users)
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load and process the image
orig = cv2.imread(image_path)
if orig is None:
    raise Exception("Error: Image not found!")

image = orig.copy()
image = imutils.resize(image, width=600)
ratio = orig.shape[1] / float(image.shape[1])

# Convert to grayscale, blur, and detect edges
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150)

# Find contours
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

cardCnt = None
for c in cnts:
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)
    if len(approx) == 4:
        cardCnt = approx
        break

if cardCnt is None:
    raise Exception("Could not find business card outline.")

if debug > 0:
    output = image.copy()
    cv2.drawContours(output, [cardCnt], -1, (0, 255, 0), 2)
    cv2.imshow("Business Card Outline", output)
    cv2.waitKey(0)

# Perspective transformation
card = four_point_transform(orig, cardCnt.reshape(4, 2) * ratio)
cv2.imshow("Business Card Transform", card)
cv2.waitKey(0)

# OCR processing
rgb = cv2.cvtColor(card, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(rgb)

# Extract phone numbers and emails
phoneNums = re.findall(r'[\+\(]?[1-9][0-9 .\-\(\)]{8,}[0-9]', text)
emails = re.findall(r"[a-z0-9\.\-+_]+@[a-z0-9\.\-+_]+\.[a-z]+", text)

# Extract names (not always accurate)
nameExp = r"^[\w'\-,.][^0-9_!¡?÷?¿/\\+=@#$%ˆ&*(){}|~<>;:[\]]{2,}"
names = re.findall(nameExp, text)

# Print results
print("PHONE NUMBERS\n=============")
for num in phoneNums:
    print(num.strip())

print("\nEMAILS\n======")
for email in emails:
    print(email.strip())

print("\nNAME/JOB TITLE\n==============")
for name in names:
    print(name.strip())


PHONE NUMBERS
650 330-0100
650 618-1499

EMAILS
larry@google.com

NAME/JOB TITLE
Larry Page

CEO
Google
