In [1]:
!pip install opencv-python requests numpy pytesseract -q git+https://github.com/sunsmarterjie/yolov12.git

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m116.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m86.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m61.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.2 MB/s[0m eta [36m0:

In [2]:
# Imports
import cv2
import numpy as np
import re
import pytesseract
import requests
from google.colab import files
import pandas as pd
from ultralytics import YOLO
import os

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/yolov12/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
FlashAttention is not available on this device. Using scaled_dot_product_attention instead.


In [5]:
print("Upload YOLO Model:")
uploaded = files.upload()
path = list(uploaded.keys())[0]
model = YOLO(path)  # Your trained model

Upload YOLO Model:


Saving best.pt to best.pt


In [6]:
# Functions
def find_id_box(img):
    """Run YOLOv12 model on image and return detected boxes or None."""
    try:
        results = model.predict(img)
        boxes = results[0].boxes  # boxes object
        if boxes is None or len(boxes) == 0:
            return None
        return boxes
    except Exception as e:
        print(f"Error while running model on image\nException: {e}")
        return None


def crop_image(img):
    """Crop image to first detected YOLO box or return None."""
    boxes = find_id_box(img)
    if boxes is None:
        return None

    # Use the first detected box
    box = boxes.xyxy[0]  # (x1, y1, x2, y2)

    x1, y1, x2, y2 = map(int, box)

    # Crop the image
    cropped_img = img[y1:y2, x1:x2]
    return cropped_img


def download_image(image_url):
    """Downloads an image from a given URL and returns it as a NumPy array."""
    response = requests.get(image_url)
    if response.status_code == 200:
        image_array = np.asarray(bytearray(response.content), dtype=np.uint8)
        return cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    return None


def process_image_url(image_url):
    """Processes an image URL and returns the extracted transaction ID."""
    try:
        if not image_url:
            return None
        image = download_image(image_url)
        if image is not None:
            cropped = crop_image(image)
            if cropped is None:
                return None
            text = pytesseract.image_to_string(cropped)
            extract = extract_transaction_details(text)
            return extract
        return None
    except Exception as e:
        print(
            f"[ERROR] Failed to process image URL: {image_url}\nException: {e}")
        return None


def extract_transaction_details(text):
    """Extracts UTR Number (PhonePe), UPI Transaction ID (Google Pay), or Paytm Transaction ID from text."""
    lines = text.split('\n')
    buffer = ""

    for i, line in enumerate(lines):
        # Combine previous buffer if present (handles label on one line, ID on next)
        combined = buffer + " " + line if buffer else line

        # Match UPI transaction ID / UTR from current or buffered+current line
        match_upi = re.search(
            r"(?:UPI Ref(?:erence)?|UTR|Ref(?:erence)? ID|UPI transaction ID)[:\s]*([A-Za-z0-9]{9,})", combined, re.IGNORECASE)

        if match_upi:
            return match_upi.group(1)

        # Buffer this line if it might be a label but doesn't contain the ID
        if re.search(r"(UPI Ref(?:erence)?|UTR|Ref(?:erence)? ID|UPI transaction ID|Bank Reference Id)", line, re.IGNORECASE):
            buffer = line
        else:
            buffer = ""

    return None  # No transaction ID found


def process_transactions(reg_path):
    """Processes an input file to extract and verify transaction details."""
    reg = pd.read_csv(reg_path, dtype=str)
    reg["extracted_transaction_id"] = reg["screenshot"].dropna().apply(
        process_image_url)
    return reg


def save_and_download(df, output_filename="processed_transactions.xlsx"):
    """Saves the processed dataframe and makes it available for download."""
    df.to_excel(output_filename, index=False)
    files.download(output_filename)


def main():
    """Main function to handle input, processing, and output."""
    print("Upload the registration data")
    uploaded = files.upload()
    file_paths = list(uploaded.keys())

    if len(file_paths) < 1:
        print("Error: Please upload all required files.")
        return

    processed_df = process_transactions(file_paths[0])
    save_and_download(processed_df)

In [8]:
if __name__ == "__main__":
    main()

Upload the registration data


Saving 19032025-reg-data-pasc.csv to 19032025-reg-data-pasc.csv

0: 640x384 1 upi_id, 52.3ms
Speed: 14.5ms preprocess, 52.3ms inference, 316.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x320 1 upi_id, 55.9ms
Speed: 3.6ms preprocess, 55.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 320)

0: 640x288 (no detections), 53.4ms
Speed: 3.7ms preprocess, 53.4ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 288)

0: 640x448 (no detections), 58.3ms
Speed: 3.0ms preprocess, 58.3ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 448)

0: 640x320 (no detections), 17.6ms
Speed: 2.8ms preprocess, 17.6ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 320)

0: 640x352 1 upi_id, 56.4ms
Speed: 3.0ms preprocess, 56.4ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 352)

0: 640x384 1 upi_id, 17.5ms
Speed: 2.8ms preprocess, 17.5ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x288 1 upi_id, 17.3ms
S

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>