In [7]:
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import random
import json
from faker import Faker
from skimage.util import random_noise
from tqdm import tqdm

# Initialize Faker for fake data generation
fake = Faker()

# Create output directories
os.makedirs('synthetic_nid_dataset/images', exist_ok=True)
os.makedirs('synthetic_nid_dataset/labels', exist_ok=True)
os.makedirs('synthetic_nid_dataset/annotations', exist_ok=True)

# NID template parameters
CARD_WIDTH = 850
CARD_HEIGHT = 540
CARD_COLOR = (240, 240, 245)  # Light grayish background
TEXT_COLOR = (10, 10, 10)  # Dark text
ACCENT_COLOR = (0, 80, 150)  # Blue for headers

# Font setup (replace with actual font path if needed)
try:
    font_bold = ImageFont.truetype("arialbd.ttf", 28)
    font_regular = ImageFont.truetype("arial.ttf", 24)
    font_small = ImageFont.truetype("arial.ttf", 20)
except:
    # Fallback to default font if specified not found
    font_bold = ImageFont.load_default()
    font_regular = ImageFont.load_default()
    font_small = ImageFont.load_default()

def generate_nid_template():
    """Generate a clean NID card template with random personal information"""
    # Create blank image
    img = Image.new('RGB', (CARD_WIDTH, CARD_HEIGHT), color=CARD_COLOR)
    draw = ImageDraw.Draw(img)
    
    # Add header
    draw.rectangle([(0, 0), (CARD_WIDTH, 80)], fill=ACCENT_COLOR)
    draw.text((CARD_WIDTH//2, 40), "NATIONAL IDENTITY CARD", 
              fill=(255, 255, 255), font=font_bold, anchor='mm')
    
    # Generate fake personal information
    person_data = {
        'full_name': fake.name().upper(),
        'nid_number': ''.join(random.choices('0123456789', k=12)),
        'date_of_birth': fake.date_of_birth(minimum_age=18, maximum_age=90).strftime('%d/%m/%Y'),
        'place_of_birth': fake.city().upper(),
        'address': fake.address().replace('\n', ', ').upper(),
        'issue_date': fake.date_between(start_date='-10y', end_date='today').strftime('%d/%m/%Y'),
        'expiry_date': fake.date_between(start_date='today', end_date='+10y').strftime('%d/%m/%Y')
    }
    
    # Add photo placeholder
    photo_size = 180
    photo_x = CARD_WIDTH - photo_size - 40
    photo_y = 100
    draw.rectangle([(photo_x, photo_y), (photo_x + photo_size, photo_y + photo_size)], 
                   outline=TEXT_COLOR, width=2)
    draw.text((photo_x + photo_size//2, photo_y + photo_size//2), "PHOTO", 
              fill=TEXT_COLOR, font=font_bold, anchor='mm')
    
    # Add personal information
    y_offset = 100
    draw.text((40, y_offset), f"NAME: {person_data['full_name']}", fill=TEXT_COLOR, font=font_regular)
    y_offset += 40
    draw.text((40, y_offset), f"NID NUMBER: {person_data['nid_number']}", fill=TEXT_COLOR, font=font_regular)
    y_offset += 40
    draw.text((40, y_offset), f"DATE OF BIRTH: {person_data['date_of_birth']}", fill=TEXT_COLOR, font=font_regular)
    y_offset += 40
    draw.text((40, y_offset), f"PLACE OF BIRTH: {person_data['place_of_birth']}", fill=TEXT_COLOR, font=font_regular)
    y_offset += 40
    draw.text((40, y_offset), f"ADDRESS: {person_data['address']}", fill=TEXT_COLOR, font=font_small)
    y_offset += 60
    draw.text((40, y_offset), f"ISSUE DATE: {person_data['issue_date']}", fill=TEXT_COLOR, font=font_regular)
    y_offset += 40
    draw.text((40, y_offset), f"EXPIRY DATE: {person_data['expiry_date']}", fill=TEXT_COLOR, font=font_regular)
    
    # Add security features
    draw.text((CARD_WIDTH//2, CARD_HEIGHT - 30), "GOVERNMENT OF YOUR COUNTRY", 
              fill=ACCENT_COLOR, font=font_bold, anchor='mm')
    
    return img, person_data

def apply_random_transformations(img):
    """Apply random transformations to make the NID look more realistic"""
    img = np.array(img)
    
    # Random perspective transform
    if random.random() > 0.7:
        h, w = img.shape[:2]
        pts1 = np.float32([[0,0], [w,0], [w,h], [0,h]])
        
        # Random perspective distortion
        max_offset = min(w,h) * 0.1
        pts2 = pts1 + np.random.uniform(-max_offset, max_offset, size=pts1.shape)
        
        # Ensure points are in float32 format
        pts1 = np.array(pts1, dtype=np.float32)
        pts2 = np.array(pts2, dtype=np.float32)
        
        M = cv2.getPerspectiveTransform(pts1, pts2)
        img = cv2.warpPerspective(img, M, (w,h), borderMode=cv2.BORDER_REPLICATE)
    
    # Random brightness/contrast adjustment
    alpha = random.uniform(0.8, 1.2)  # Contrast control
    beta = random.randint(-30, 30)     # Brightness control
    img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
    
    # Random blur
    if random.random() > 0.8:
        img = cv2.GaussianBlur(img, (3,3), 0)
    
    # Random noise
    if random.random() > 0.7:
        img = (random_noise(img, mode='gaussian', var=0.001) * 255).astype(np.uint8)
    
    return Image.fromarray(img)

def add_random_background(nid_img):
    """Place the NID card on a random background"""
    # Create a random background (larger than the card)
    bg_width = random.randint(nid_img.width + 100, nid_img.width + 500)
    bg_height = random.randint(nid_img.height + 100, nid_img.height + 500)
    
    # Random background color (light colors work better)
    bg_color = (random.randint(200, 255), random.randint(200, 255), random.randint(200, 255))
    bg = Image.new('RGB', (bg_width, bg_height), color=bg_color)
    
    # Randomly position the NID card
    x_offset = random.randint(50, bg_width - nid_img.width - 50)
    y_offset = random.randint(50, bg_height - nid_img.height - 50)
    
    # Random rotation (-15 to +15 degrees)
    angle = random.randint(-15, 15)
    rotated_nid = nid_img.rotate(angle, expand=True, fillcolor=bg_color)
    
    # Paste the rotated NID onto the background
    bg.paste(rotated_nid, (x_offset, y_offset))
    
    # Calculate the bounding box coordinates (YOLO format: normalized cx, cy, w, h)
    card_w = rotated_nid.width / bg_width
    card_h = rotated_nid.height / bg_height
    card_cx = (x_offset + rotated_nid.width / 2) / bg_width
    card_cy = (y_offset + rotated_nid.height / 2) / bg_height
    
    return bg, (card_cx, card_cy, card_w, card_h)

def generate_synthetic_dataset(num_images=100):
    """Generate a complete synthetic dataset"""
    all_person_data = []
    
    for i in tqdm(range(num_images), desc="Generating synthetic NID dataset"):
        # Generate clean NID template
        nid_img, person_data = generate_nid_template()
        
        # Apply random transformations to the NID
        transformed_nid = apply_random_transformations(nid_img)
        
        # Add random background and get bounding box
        final_img, bbox = add_random_background(transformed_nid)
        
        # Save the image
        img_path = f'synthetic_nid_dataset/images/nid_{i:04d}.jpg'
        final_img.save(img_path, quality=95)
        
        # Save YOLO format label
        label_path = f'synthetic_nid_dataset/labels/nid_{i:04d}.txt'
        with open(label_path, 'w') as f:
            f.write(f"0 {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}")
        
        # Save annotation data
        annotation = {
            'image_path': img_path,
            'bbox': bbox,
            'person_data': person_data
        }
        all_person_data.append(annotation)
    
    # Save all annotations
    with open('synthetic_nid_dataset/annotations/annotations.json', 'w') as f:
        json.dump(all_person_data, f, indent=4)

# Generate the synthetic dataset
generate_synthetic_dataset(100)  # Change the number of images as needed


Generating synthetic NID dataset: 100%|██████████| 100/100 [00:03<00:00, 28.91it/s]


In [None]:
import os
import random
import shutil
from pathlib import Path
import yaml

# Config
base_dir = Path("synthetic_nid_dataset")
image_dir = base_dir / "images"
label_dir = base_dir / "labels"
split_ratio = 0.8  # 80% train, 20% val

# Output folders
for split in ["train", "val"]:
    (base_dir / split / "images").mkdir(parents=True, exist_ok=True)
    (base_dir / split / "labels").mkdir(parents=True, exist_ok=True)

# Get all image files
image_files = sorted(list(image_dir.glob("*.jpg")))
random.shuffle(image_files)

# Split
split_index = int(len(image_files) * split_ratio)
train_files = image_files[:split_index]
val_files = image_files[split_index:]

# Copy files
def copy_files(file_list, split):
    for img_path in file_list:
        label_path = label_dir / (img_path.stem + ".txt")

        shutil.copy(img_path, base_dir / split / "images" / img_path.name)
        if label_path.exists():
            shutil.copy(label_path, base_dir / split / "labels" / label_path.name)
        else:
            print(f"Warning: Label for {img_path.name} not found.")

copy_files(train_files, "train")
copy_files(val_files, "val")

# Create YAML file
data_yaml = {
    "path": str(base_dir.resolve()),
    "train": "train/images",
    "val": "val/images",
    "names": {
        0: "nid_card"
    }
}

with open(base_dir / "nid_card.yaml", "w") as f:
    yaml.dump(data_yaml, f)


✅ Dataset split and YAML file created at: synthetic_nid_dataset\nid_card.yaml


In [9]:
pip install mlflow


Collecting mlflowNote: you may need to restart the kernel to use updated packages.

  Downloading mlflow-2.17.2-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==2.17.2 (from mlflow)
  Downloading mlflow_skinny-2.17.2-py3-none-any.whl.metadata (30 kB)
Collecting Flask<4 (from mlflow)
  Downloading flask-3.0.3-py3-none-any.whl.metadata (3.2 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting scikit-learn<2 (from mlflow)
  Downloading scikit_learn-1.3.2-cp38-cp38-win_amd64.whl.metadata (11 kB)
Collecting sqlalchemy<3,>=1.4.0 (from mlflow)
  Downloading sqlalchemy-2.0.40-cp38-cp38-win_amd64.whl.metadata (9.9 kB)
Collecting waitress<4 (from mlflow)
  Downloading waitress-3.0.0-py3-none-any.whl.metadata (4.

In [1]:
import mlflow
import os

mlflow.set_experiment("YOLO_NID_Detection")

with mlflow.start_run():
    os.system("yolo task=detect mode=train model=yolov8n.pt data=synthetic_nid_dataset/nid_card.yaml epochs=5 imgsz=640")
    
    mlflow.log_param("model", "yolov8n")
    mlflow.log_param("epochs", 1)

    # You can manually log metrics if needed or parse results
    mlflow.log_artifacts("runs/detect/train")  # Save model weights, plots, etc.


In [2]:
import pandas as pd