# 🚗 PDF Car Number Extractor
Split PDF into 2-page chunks and extract car registration numbers.

In [None]:
!pip install PyPDF2 pandas openpyxl

In [None]:
from PyPDF2 import PdfReader, PdfWriter
import pandas as pd
import os, re

In [None]:
def extract_registration(text):
    match = re.search(r'\b[A-Z]{1,3}-[A-Z]{1,2} \d{1,4}\b', text)
    return match.group(0) if match else 'Not found'

In [None]:
def split_and_extract(input_pdf):
    reader = PdfReader(input_pdf)
    total_pages = len(reader.pages)
    results = []
    if not os.path.exists("splits"): os.makedirs("splits")
    for i in range(0, total_pages, 2):
        text = reader.pages[i].extract_text()
        reg_number = extract_registration(text)
        safe_name = reg_number.replace(" ", "_").replace("-", "_") if reg_number != "Not found" else f"Not_found_{i+1:03d}"
        name = f"{safe_name}.pdf"
        path = os.path.join("splits", name)
        writer = PdfWriter()
        writer.add_page(reader.pages[i])
        if i + 1 < total_pages:
            writer.add_page(reader.pages[i+1])
        with open(path, "wb") as f_out:
            writer.write(f_out)
        results.append({"File": name, "Registration No": reg_number})
    df = pd.DataFrame(results)
    df.to_excel("result.xlsx", index=False)
    return "result.xlsx"

In [None]:
# Upload PDF
from google.colab import files
uploaded = files.upload()
pdf_file = list(uploaded.keys())[0]

In [None]:
# Run extraction
output_file = split_and_extract(pdf_file)
files.download(output_file)

In [None]:
# Upload your PDF file
from google.colab import files
uploaded = files.upload()
pdf_file = list(uploaded.keys())[0]

In [None]:
# Run extraction and download result
output_file = split_and_extract(pdf_file)
files.download(output_file)