# 🚗 PDF Car Number Extractor
Split a PDF into 2-page files and extract car registration numbers from the first page of each chunk.

In [None]:
!pip install PyPDF2 pandas openpyxl

In [None]:
from PyPDF2 import PdfReader, PdfWriter
import pandas as pd
import os, re

In [None]:
def extract_registration(text):
    match = re.search(r'\b[A-Z]{1,3}-[A-Z]{1,2} \d{1,4}\b', text)
    return match.group(0) if match else 'Not found'

In [None]:
def split_and_extract(input_pdf):
    reader = PdfReader(input_pdf)
    total_pages = len(reader.pages)
    results = []
    if not os.path.exists("splits"): os.makedirs("splits")
    for i in range(0, total_pages, 2):
        writer = PdfWriter()
        name = f"split_{i+1:03d}_{min(i+2, total_pages):03d}.pdf"
        with open(f"splits/{name}", "wb") as f_out:
            writer.add_page(reader.pages[i])
            if i + 1 < total_pages:
                writer.add_page(reader.pages[i+1])
            writer.write(f_out)
        text = reader.pages[i].extract_text()
        reg_number = extract_registration(text)
        results.append({"File": name, "Registration No": reg_number})
    df = pd.DataFrame(results)
    df.to_excel("result.xlsx", index=False)
    return "result.xlsx"

In [None]:
# Upload PDF
from google.colab import files
uploaded = files.upload()
pdf_file = list(uploaded.keys())[0]

In [None]:
# Run extraction
output_file = split_and_extract(pdf_file)
files.download(output_file)