In [1]:
# Signature Detection using FastAI by @madferreiro
# ==================================================

# Libraries
from fastai.vision.all import *
from pathlib import Path

In [2]:
# Config variables
data_path = Path('/kaggle/input/testdataset2/data')
os.listdir(data_path)
print(list(data_path.glob('*/*')))  # List all image file paths

[Path('/kaggle/input/testdataset2/data/person2/signature1.png'), Path('/kaggle/input/testdataset2/data/person2/signature2.png'), Path('/kaggle/input/testdataset2/data/person2/signature3.png'), Path('/kaggle/input/testdataset2/data/person2/signature4.png'), Path('/kaggle/input/testdataset2/data/person1/signature1.png'), Path('/kaggle/input/testdataset2/data/person1/signature2.png'), Path('/kaggle/input/testdataset2/data/person1/signature3.png'), Path('/kaggle/input/testdataset2/data/person1/signature4.png')]


In [3]:
# 1. DATA LOAD AND PREP
# ===============================

# Function to get all signature paths
# This assumes the folder structure as below
#/data/
#    /person1/
#        signature1.jpg
#        signature2.jpg
#    /person2/
#        signature1.jpg
#        signature2.jpg

# Create DataBlock
dblock = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,  # Automatically fetch all image files
    splitter=RandomSplitter(valid_pct=0.3, seed=42),  # 70-30 train-validation split
    get_y=parent_label,  # Use folder name as label
    item_tfms=Resize(224),  # Resize images to 224x224
    batch_tfms=aug_transforms(mult=1.0)  # Apply basic augmentations
)

# Create DataLoaders
dls = dblock.dataloaders(data_path, bs=3)

In [4]:
# 2. MODEL DESIGN: SIAMESE NETWORK
# =================================

# Define the SiameseModel
# I use a siamese network since its recomended for face recognition and signature is a similar problem. 
class SignatureModel(Module):
    def __init__(self, num_classes, embedding_dim=128):
        self.cnn = create_vision_model(resnet34, n_out=embedding_dim, pretrained=True)
        self.classifier = nn.Linear(embedding_dim, num_classes)

    def forward(self, x):
        embeddings = self.cnn(x)  # Extract embeddings
        logits = self.classifier(embeddings)  # Classify using embeddings
        return embeddings, logits  # Return both for use in loss and metrics

# Define the loss function combining Triplet Loss and Cross Entropy
class TripletCrossEntropyLoss(Module):
    def __init__(self, margin=1.0):
        self.margin = margin
        self.ce_loss = CrossEntropyLossFlat()

    def forward(self, outputs, targets):
        embeddings, logits = outputs  # Unpack tuple
        ce_loss = self.ce_loss(logits, targets)  # Cross-entropy loss

        # Generate triplets for triplet loss
        anchor_embeddings = embeddings
        positive_embeddings = embeddings[targets]
        negative_embeddings = embeddings[torch.roll(targets, 1, dims=0)]
        triplet_loss = F.triplet_margin_loss(anchor_embeddings, positive_embeddings, negative_embeddings, margin=self.margin)

        return ce_loss + triplet_loss
        
class LogitsAccuracy(Metric):
    def __init__(self):
        self.correct = 0
        self.total = 0

    def reset(self):
        self.correct = 0
        self.total = 0

    def accumulate(self, learn):
        # Extract logits (second element of the output tuple)
        _, logits = learn.pred
        preds = logits.argmax(dim=-1)
        self.correct += (preds == learn.y).sum().item()
        self.total += len(learn.y)

    @property
    def value(self):
        return self.correct / self.total if self.total > 0 else None

In [5]:
# 4. TRAINING LOOP
# =================

# Create the Learner
loss_func = TripletCrossEntropyLoss()
metrics = [LogitsAccuracy()]  # Use the fixed custom metric

learn = Learner(
    dls,
    model=SignatureModel(num_classes=len(dls.vocab), embedding_dim=128),
    loss_func=loss_func,
    metrics=metrics,
    opt_func=Adam
)

# Train the model
learn.fine_tune(5)

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 155MB/s]


epoch,train_loss,valid_loss,logits_accuracy,time
0,20.507938,1.205404,0.5,00:03


epoch,train_loss,valid_loss,logits_accuracy,time
0,1.37487,1.044712,0.5,00:03
1,6.78665,0.318696,1.0,00:03
2,9.102486,1.625116,0.5,00:03
3,8.460338,0.78884,0.5,00:03
4,8.167493,0.416007,0.5,00:03


In [6]:
# 5. INFERENCE
# ============
# Create a mapping from indices to labels
idx_to_label = {v: k for k, v in dls.vocab.o2i.items()}

# Function to detect owner given a new signature
def detect_owner(signature_path, learn):
    signature_img = PILImage.create(signature_path) # Load the signature image
    test_dl = dls.test_dl([signature_img]) # Create a DataLoader for the test image
    preds, _ = learn.get_preds(dl=test_dl) # Get predictions
    logits = preds[1]  # Extract logits from the tuple (embeddings, logits)
    owner = idx_to_label[logits.argmax().item()] # Find the index with the highest probability
    return owner

# Example inference
sample_signature = '/kaggle/input/testdataset2/data/person2/signature1.png'  # Path to test signature
owner = detect_owner(sample_signature, learn)
print(f"The detected owner is: {owner}")

The detected owner is: person2
