In [70]:
import os
import cv2
import random
import numpy as np

from skimage.feature import hog, local_binary_pattern

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import joblib

import torch
import torchvision.transforms as transforms
import torch.optim as optim
import torchvision.models as models
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
from torchvision.datasets import CIFAR10



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

cuda


In [71]:
def load_and_preprocess_image(img_path, img_size=(256, 256)):
    """
    Load an image, convert to grayscale, resize, apply Gaussian blur,
    and normalize pixel values to [0,1].
    """
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None
    img = cv2.resize(img, img_size)
    img = cv2.GaussianBlur(img, (3, 3), 0)
    img = img / 255.0
    return img

def extract_features(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    if image is None:
        print(f"Warning: Unable to load image {image_path}")
        return None  # Skip this image
    
    image = cv2.resize(image, (128, 128))  # Resize for consistency
    
    # HOG Feature Extraction
    hog_features, _ = hog(image, pixels_per_cell=(16, 16), cells_per_block=(2, 2), visualize=True)
    
    # LBP Feature Extraction
    lbp = local_binary_pattern(image, P=24, R=3, method="uniform")
    lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(27), density=True)
    
    return np.hstack((hog_features, lbp_hist))


def load_dataset(real_folder, fake_folder):
    X, y = [], []
    
    for file in os.listdir(real_folder):
        if file.lower() == "thumbs.db":
            continue
        img_path = os.path.join(real_folder, file)
        features = extract_features(img_path)
        if features is not None:
            X.append(features)
            y.append(1)
    
    for file in os.listdir(fake_folder):
        if file.lower() == "thumbs.db":
            continue
        img_path = os.path.join(fake_folder, file)
        features = extract_features(img_path)
        if features is not None:
            X.append(features)
            y.append(0)
    
    return np.array(X), np.array(y)


In [3]:
# Load dataset
dataset_dir = './signature_dataset'
real_path = os.path.join(dataset_dir, "full_org")
fake_path = os.path.join(dataset_dir, "full_forg")
X, y = load_dataset(real_path, fake_path)

# Split dataset (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train an SVM classifier (using RBF kernel)
clf = SVC(kernel='rbf', probability=True)
clf.fit(X_train, y_train)

# Evaluate classifier
y_pred = clf.predict(X_test)
print("=== CLASSIFICATION REPORT ===")
print(classification_report(y_test, y_pred))
print("=== CONFUSION MATRIX ===")
print(confusion_matrix(y_test, y_pred))

# Save the model for later use
joblib.dump(clf, "svm_signature_classifier.pkl")

=== CLASSIFICATION REPORT ===
              precision    recall  f1-score   support

           0       0.99      0.98      0.98       264
           1       0.98      0.99      0.98       264

    accuracy                           0.98       528
   macro avg       0.98      0.98      0.98       528
weighted avg       0.98      0.98      0.98       528

=== CONFUSION MATRIX ===
[[258   6]
 [  3 261]]


['svm_signature_classifier.pkl']

In [58]:
class ContentLoss(nn.Module):
    def __init__(self, target):
        super(ContentLoss, self).__init__()
        self.target = target.detach()
        self.loss = nn.MSELoss()

    def forward(self, x):
        self.loss_value = self.loss(x, self.target)
        return x

class StyleLoss(nn.Module):
    def __init__(self, target):
        super(StyleLoss, self).__init__()
        self.target = target.detach().to(device)
        self.loss = nn.MSELoss().to(device)

    @staticmethod
    def gram_matrix(x):
        _, c, h, w = x.size()
        features = x.view(c, h * w)
        return torch.mm(features, features.t()) / (c * h * w)

    def forward(self, x):
        G = self.gram_matrix(x)
        self.loss_value = self.loss(G, self.target)
        return x

def get_style_model_and_losses(cnn, normalization_mean, normalization_std, style_img, content_img):
    normalization = nn.Sequential(
        transforms.Normalize(normalization_mean, normalization_std)
    ).to(device)
    style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']
    content_layers = ['conv_4']

    model = nn.Sequential(normalization)
    content_losses = []
    style_losses = []

    i = 0
    for layer in cnn.children():
        if isinstance(layer, nn.Conv2d):
            i += 1
            name = f"conv_{i}"
        elif isinstance(layer, nn.ReLU):
            name = f"relu_{i}"
            layer = nn.ReLU(inplace=False)
        elif isinstance(layer, nn.MaxPool2d):
            name = f"pool_{i}"
        elif isinstance(layer, nn.BatchNorm2d):
            name = f"bn_{i}"
        else:
            continue

        model.add_module(name, layer)

        if name in content_layers:
            target = model(content_img).detach()
            content_loss = ContentLoss(target)
            model.add_module(f"content_loss_{i}", content_loss)
            content_losses.append(content_loss)

        if name in style_layers:
            target_feature = model(style_img).detach()
            target_gram = StyleLoss.gram_matrix(target_feature)
            style_loss = StyleLoss(target_gram)
            model.add_module(f"style_loss_{i}", style_loss)
            style_losses.append(style_loss)

    return model, style_losses, content_losses


In [59]:
def run_style_transfer(
    cnn,
    normalization_mean,
    normalization_std,
    content_img,
    style_img,
    input_img,
    num_steps=300,
    style_weight=1e6,
    content_weight=1,
    device=None  # Add device parameter
):
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    writer = SummaryWriter('./runs/style_transfer')
    
    # Explicit device placement
    style_img = style_img.to(device)
    content_img = content_img.to(device)
    input_img = input_img.to(device)
    normalization_mean = normalization_mean.to(device)
    normalization_std = normalization_std.to(device)

    model, style_losses, content_losses = get_style_model_and_losses(
        cnn, normalization_mean, normalization_std, style_img, content_img
    )

    input_img = input_img.requires_grad_()
    optimizer = optim.LBFGS([input_img])

    run = [0]
    
    while run[0] <= num_steps:
        def closure():
            with torch.no_grad():
                input_img.clamp_(0, 1)

            optimizer.zero_grad()
            model(input_img)
            
            style_score = torch.tensor(0., device=device)
            content_score = torch.tensor(0., device=device)
            
            if style_losses:
                style_score = sum(sl.loss_value for sl in style_losses)
            if content_losses:
                content_score = sum(cl.loss_value for cl in content_losses)

            total_loss = style_weight * style_score + content_weight * content_score
            total_loss.backward()

            if writer:
                writer.add_scalar('Loss/Style', style_score.item(), run[0])
                writer.add_scalar('Loss/Content', content_score.item(), run[0])
                writer.add_scalar('Loss/Total', total_loss.item(), run[0])

            run[0] += 1
            return total_loss

        optimizer.step(closure)

    with torch.no_grad():
        input_img.clamp_(0, 1)
    
    if writer:
        writer.close()
    
    return input_img

def nst_signature_forgery(
    content_dir, 
    style_dataset, 
    save_dir, 
    cnn, 
    normalization_mean, 
    normalization_std,
    device,
    imsize=128,
    num_steps=300,
    num_images=50,
    style_weight=1e5,
    content_weight=1
):
    os.makedirs(save_dir, exist_ok=True)
    generated_forgeries = []
    processed_count = 0
    
    # Get limited list of content images
    content_files = [f for f in os.listdir(content_dir) if f.lower().endswith(("jpg", "png", "jpeg"))][:num_images]
    
    for content_file in content_files:
        content_img_path = os.path.join(content_dir, content_file)
        
        try:
            content_img = image_loader(content_img_path, device, imsize)
            idx = random.randrange(len(style_dataset))
            style_img, _ = style_dataset[idx]
            style_img = style_img.unsqueeze(0).to(device, torch.float)
            
            input_img = content_img.clone() + torch.randn_like(content_img) * 0.1
            
            output = run_style_transfer(
                cnn,
                normalization_mean,
                normalization_std,
                content_img,
                style_img,
                input_img,
                num_steps=num_steps,
                style_weight=style_weight,
                content_weight=content_weight,
                device=device
            )
            
            output_path = os.path.join(save_dir, f"forged_{content_file}")
            save_image(output, output_path)
            
            generated_forgeries.append(output_path)
            processed_count += 1
            print(f"Generated {processed_count}/{num_images}: {content_file}")
            
        except Exception as e:
            print(f"Failed to process {content_file}: {str(e)}")
    
    return generated_forgeries

def save_image(tensor, filename):
    """Save a tensor to image file with proper denormalization"""
    unloader = transforms.ToPILImage()
    image = tensor.cpu().clone()
    image = image.squeeze(0)
    image = unloader(image)
    image.save(filename)

In [60]:
class ArtBench10(CIFAR10):

    base_folder = "artbench-10-batches-py"
    url = "https://artbench.eecs.berkeley.edu/files/artbench-10-python.tar.gz"
    filename = "artbench-10-python.tar.gz"
    tgz_md5 = "9df1e998ee026aae36ec60ca7b44960e"
    train_list = [
        ["data_batch_1", "c2e02a78dcea81fe6fead5f1540e542f"],
        ["data_batch_2", "1102a4dcf41d4dd63e20c10691193448"],
        ["data_batch_3", "177fc43579af15ecc80eb506953ec26f"],
        ["data_batch_4", "566b2a02ccfbafa026fbb2bcec856ff6"],
        ["data_batch_5", "faa6a572469542010a1c8a2a9a7bf436"],
    ]

    test_list = [
        ["test_batch", "fa44530c8b8158467e00899609c19e52"],
    ]
    meta = {
        "filename": "meta",
        "key": "styles",
        "md5": "5bdcafa7398aa6b75d569baaec5cd4aa",
    }

In [61]:
def generate_forgeries(
    num_images=5,
    content_dir="./signature_dataset/full_org/",
    save_dir="./synthetic_forged/",
    style_weight=1e5,
    num_steps=500
):
    # Initialize model and parameters
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    imsize = 128
    
    # Load VGG model
    cnn = models.vgg19(pretrained=True).features.to(device).eval()
    
    # Normalization parameters
    cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
    cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)
    
    # Load style dataset
    style_dataset = ArtBench10(
        root="artbench10-py",
        train=True,
        transform=transforms.Compose([
            transforms.Resize((imsize, imsize)),
            transforms.ToTensor(),
        ]),
        download=False
    )
    
    # Generate forgeries WITH NUM_IMAGES CONTROL
    generated_forgeries = nst_signature_forgery(
        content_dir, 
        style_dataset, 
        save_dir, 
        cnn, 
        cnn_normalization_mean, 
        cnn_normalization_std,
        device,
        num_steps=num_steps,
        num_images=num_images,
        style_weight=style_weight  # Pass explicitly
    )
    
    print(f"\nGenerated {len(generated_forgeries)}/{num_images} synthetic forgeries in {save_dir}")
    return generated_forgeries

# Execute with controlled output
generated_files = generate_forgeries(num_images=5)

Generated 1/5: original_10_1.png
Generated 2/5: original_10_10.png
Generated 3/5: original_10_11.png
Generated 4/5: original_10_12.png
Generated 5/5: original_10_13.png

Generated 5/5 synthetic forgeries in ./synthetic_forged/


In [74]:
def evaluate_forgeries(
    model_path="svm_signature_classifier.pkl",
    forged_dir="./synthetic_forged/"
):
    # Load the trained classifier
    clf = joblib.load(model_path)
    
    # Get list of generated forgeries
    forged_files = [
        os.path.join(forged_dir, f)
        for f in os.listdir(forged_dir)
        if f.lower().endswith(("png", "jpg", "jpeg"))
    ]
    
    results = []
    for forgery_path in forged_files:
        # Directly pass the file path to extract_features
        synthetic_features = extract_features(forgery_path)

        # If extract_features returned None (e.g., couldn't load), skip
        if synthetic_features is None:
            print(f"Warning: Could not process {forgery_path}")
            continue

        # Reshape for SVM
        synthetic_features = synthetic_features.reshape(1, -1)

        # Predict with SVM
        prediction = clf.predict(synthetic_features)
        probabilities = clf.predict_proba(synthetic_features)

        # Save result
        results.append({
            "file": os.path.basename(forgery_path),
            "prediction": prediction[0],
            "probabilities": probabilities[0]
        })
    
    # Print results
    for res in results:
        label_str = "Forged" if res['prediction'] == 0 else "Real"
        real_prob, fake_prob = res['probabilities']
        print(f"\n=== {res['file']} ===")
        print(f"Prediction: {label_str}")
        print(f"Probabilities: [Fake: {real_prob:.2f}, Real: {fake_prob:.2f}]")
    
    return results
evaluation_results = evaluate_forgeries()



=== forged_original_10_1.png ===
Prediction: Real
Probabilities: [Fake: 0.00, Real: 1.00]

=== forged_original_10_10.png ===
Prediction: Real
Probabilities: [Fake: 0.00, Real: 1.00]

=== forged_original_10_11.png ===
Prediction: Real
Probabilities: [Fake: 0.01, Real: 0.99]

=== forged_original_10_12.png ===
Prediction: Real
Probabilities: [Fake: 0.00, Real: 1.00]

=== forged_original_10_13.png ===
Prediction: Real
Probabilities: [Fake: 0.00, Real: 1.00]

=== forgeries_1_24.png ===
Prediction: Forged
Probabilities: [Fake: 1.00, Real: 0.00]

=== original_1_1.png ===
Prediction: Real
Probabilities: [Fake: 0.00, Real: 1.00]
