In [None]:
#unfiltered dataset
import os
import random
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# Step 1: Paths
DATA_DIR = "/content/drive/MyDrive/data"
MODEL_PATH = "writer_id_model.pth"

# Step 2: Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("🖥️ Using device:", device)

# Step 3: Model with pretrained weights
def build_model(num_classes):
    model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)  # For grayscale
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model.to(device)

# Step 4: Dataset Preparation — per-writer split
def split_dataset_per_writer(root_dir, transform, split_ratio=0.8):
    class_names = sorted(os.listdir(root_dir))
    class_to_idx = {cls_name: i for i, cls_name in enumerate(class_names)}
    train_samples, test_samples = [], []

    for writer in class_names:
        writer_dir = os.path.join(root_dir, writer)
        images = [os.path.join(writer_dir, f) for f in os.listdir(writer_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
        random.shuffle(images)
        split = int(len(images) * split_ratio)
        train_samples += [(img, class_to_idx[writer]) for img in images[:split]]
        test_samples  += [(img, class_to_idx[writer]) for img in images[split:]]

    return CustomImageDataset(train_samples, transform), CustomImageDataset(test_samples, transform), class_names, test_samples

# Step 5: Custom Dataset
class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, samples, transform=None):
        self.samples = samples
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        image = Image.open(path).convert("L")
        if self.transform:
            image = self.transform(image)
        return image, label

# Step 6: Train
def train_model(train_loader, num_classes, epochs=50, lr=1e-4):
    model = build_model(num_classes)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"📉 Epoch {epoch+1} Loss: {total_loss:.4f}")

    torch.save(model.state_dict(), MODEL_PATH)
    print("✅ Model saved:", MODEL_PATH)
    return model

# Step 7: Evaluate
def evaluate_model(model, test_loader):
    model.eval()
    correct = total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            preds = model(images).argmax(1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    print(f"✅ Test Accuracy: {correct / total * 100:.2f}%")

# Step 8: Predict
def predict_random_test_image(model, test_samples, class_names, transform):
    path, true_label = random.choice(test_samples)
    img = Image.open(path).convert("L")
    img_tensor = transform(img).unsqueeze(0).to(device)
    model.eval()
    with torch.no_grad():
        output = model(img_tensor)
        pred_label = torch.argmax(output, 1).item()
    print(f"\n🖼️ Predicted Writer: {class_names[pred_label]}")
    print(f"✅ Actual Writer:    {class_names[true_label]}")
    print(f"📂 File: {os.path.basename(path)}")

# Step 9: Run
def main():
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    train_ds, test_ds, class_names, test_samples = split_dataset_per_writer(DATA_DIR, transform)
    train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)   # Increased batch size
    test_loader  = DataLoader(test_ds, batch_size=16, shuffle=False)

    model = train_model(train_loader, num_classes=len(class_names), epochs=50, lr=1e-4)
    evaluate_model(model, test_loader)
    predict_random_test_image(model, test_samples, class_names, transform)

main()


🖥️ Using device: cuda


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 177MB/s]
Epoch 1/50: 100%|██████████| 53/53 [01:10<00:00,  1.33s/it]


📉 Epoch 1 Loss: 330.9981


Epoch 2/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 2 Loss: 278.0735


Epoch 3/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 3 Loss: 247.3800


Epoch 4/50: 100%|██████████| 53/53 [01:07<00:00,  1.26s/it]


📉 Epoch 4 Loss: 224.1148


Epoch 5/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 5 Loss: 202.4938


Epoch 6/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 6 Loss: 186.3491


Epoch 7/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 7 Loss: 171.6436


Epoch 8/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 8 Loss: 157.3038


Epoch 9/50: 100%|██████████| 53/53 [01:09<00:00,  1.31s/it]


📉 Epoch 9 Loss: 145.3284


Epoch 10/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 10 Loss: 138.5014


Epoch 11/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 11 Loss: 128.7659


Epoch 12/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 12 Loss: 114.8371


Epoch 13/50: 100%|██████████| 53/53 [01:06<00:00,  1.26s/it]


📉 Epoch 13 Loss: 111.4543


Epoch 14/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 14 Loss: 105.4068


Epoch 15/50: 100%|██████████| 53/53 [01:07<00:00,  1.26s/it]


📉 Epoch 15 Loss: 97.2125


Epoch 16/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 16 Loss: 91.5564


Epoch 17/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 17 Loss: 89.1958


Epoch 18/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 18 Loss: 84.6637


Epoch 19/50: 100%|██████████| 53/53 [01:08<00:00,  1.30s/it]


📉 Epoch 19 Loss: 84.1990


Epoch 20/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 20 Loss: 76.3286


Epoch 21/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 21 Loss: 74.1647


Epoch 22/50: 100%|██████████| 53/53 [01:07<00:00,  1.26s/it]


📉 Epoch 22 Loss: 70.4612


Epoch 23/50: 100%|██████████| 53/53 [01:08<00:00,  1.28s/it]


📉 Epoch 23 Loss: 70.0201


Epoch 24/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 24 Loss: 68.4598


Epoch 25/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 25 Loss: 66.6794


Epoch 26/50: 100%|██████████| 53/53 [01:07<00:00,  1.26s/it]


📉 Epoch 26 Loss: 66.5441


Epoch 27/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 27 Loss: 66.3502


Epoch 28/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 28 Loss: 65.0952


Epoch 29/50: 100%|██████████| 53/53 [01:07<00:00,  1.26s/it]


📉 Epoch 29 Loss: 64.6833


Epoch 30/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 30 Loss: 64.0297


Epoch 31/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 31 Loss: 63.1153


Epoch 32/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 32 Loss: 63.4145


Epoch 33/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 33 Loss: 63.1432


Epoch 34/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 34 Loss: 60.7572


Epoch 35/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 35 Loss: 61.9915


Epoch 36/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 36 Loss: 60.5402


Epoch 37/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 37 Loss: 61.4156


Epoch 38/50: 100%|██████████| 53/53 [01:09<00:00,  1.32s/it]


📉 Epoch 38 Loss: 61.5982


Epoch 39/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 39 Loss: 61.9691


Epoch 40/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 40 Loss: 60.6198


Epoch 41/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 41 Loss: 62.5544


Epoch 42/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 42 Loss: 62.0561


Epoch 43/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 43 Loss: 60.5431


Epoch 44/50: 100%|██████████| 53/53 [01:07<00:00,  1.26s/it]


📉 Epoch 44 Loss: 60.8718


Epoch 45/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 45 Loss: 61.1190


Epoch 46/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 46 Loss: 62.0247


Epoch 47/50: 100%|██████████| 53/53 [01:07<00:00,  1.28s/it]


📉 Epoch 47 Loss: 63.5571


Epoch 48/50: 100%|██████████| 53/53 [01:08<00:00,  1.29s/it]


📉 Epoch 48 Loss: 60.5348


Epoch 49/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 49 Loss: 58.8951


Epoch 50/50: 100%|██████████| 53/53 [01:07<00:00,  1.27s/it]


📉 Epoch 50 Loss: 60.4404
✅ Model saved: writer_id_model.pth
✅ Test Accuracy: 31.73%

🖼️ Predicted Writer: 155
✅ Actual Writer:    155
📂 File: c03-081f.png


In [None]:
#filtered dataset
import os
import random
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler, random_split
from torchvision import transforms, models
from tqdm import tqdm

class FilteredWriterDataset(Dataset):
    def __init__(self, root_dir, transform=None, min_images=10):
        self.transform = transform
        self.class_names = [d for d in os.listdir(root_dir) if len(os.listdir(os.path.join(root_dir, d))) >= min_images]
        self.class_names.sort()
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.class_names)}

        self.samples = []
        for cls in self.class_names:
            folder = os.path.join(root_dir, cls)
            images = [f for f in os.listdir(folder) if f.endswith(('.png', '.jpg', '.jpeg'))]
            self.samples.extend([(os.path.join(folder, img), self.class_to_idx[cls]) for img in images])

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        image = Image.open(path).convert("L")
        if self.transform:
            image = self.transform(image)
        return image, label

class CosineClassifier(nn.Module):
    def __init__(self, in_features, num_classes):
        super().__init__()
        self.weight = nn.Parameter(torch.Tensor(num_classes, in_features))
        nn.init.xavier_uniform_(self.weight)

    def forward(self, x):
        x = F.normalize(x)
        w = F.normalize(self.weight)
        return torch.matmul(x, w.t()) * 10


class WriterClassifier(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        self.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.backbone.fc = nn.Identity()
        self.classifier = CosineClassifier(in_features=512, num_classes=num_classes)

    def forward(self, x):
        features = self.backbone(x)
        return self.classifier(features)

def train_model(model, loader, criterion, optimizer, device, epochs=70):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for images, labels in tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"📉 Epoch {epoch+1} Loss: {total_loss:.4f}")


def evaluate(model, loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = correct / total * 100
    print(f"🎯 Total Accuracy: {acc:.2f}%")

def run_pipeline(data_dir):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    dataset = FilteredWriterDataset(data_dir, transform, min_images=5)
    print(f"🧾 Writers after filtering: {len(dataset.class_names)}")

    # Balanced sampler
    labels = [label for _, label in dataset.samples]
    class_count = torch.bincount(torch.tensor(labels))
    weights = 1.0 / class_count[labels]
    #sampler = WeightedRandomSampler(weights, len(weights))

    # Train/test split
    train_size = int(0.8 * len(dataset))
    test_size = len(dataset) - train_size
    train_ds, test_ds = random_split(dataset, [train_size, test_size])

    train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
    test_loader = DataLoader(test_ds, batch_size=16, shuffle=False)


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = WriterClassifier(num_classes=len(dataset.class_names)).to(device)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    train_model(model, train_loader, criterion, optimizer, device, epochs=50)
    evaluate(model, test_loader, device)

    torch.save(model.state_dict(), "writer_classifier_cosine.pth")
    print("✅ Model saved.")

run_pipeline("/content/drive/MyDrive/data")  # or your actual dataset path


🧾 Writers after filtering: 94


Epoch 1/50: 100%|██████████| 34/34 [00:42<00:00,  1.25s/it]


📉 Epoch 1 Loss: 152.2293


Epoch 2/50: 100%|██████████| 34/34 [00:42<00:00,  1.26s/it]


📉 Epoch 2 Loss: 133.7400


Epoch 3/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 3 Loss: 118.2322


Epoch 4/50: 100%|██████████| 34/34 [00:44<00:00,  1.31s/it]


📉 Epoch 4 Loss: 107.5771


Epoch 5/50: 100%|██████████| 34/34 [00:42<00:00,  1.26s/it]


📉 Epoch 5 Loss: 98.1402


Epoch 6/50: 100%|██████████| 34/34 [00:41<00:00,  1.23s/it]


📉 Epoch 6 Loss: 90.8059


Epoch 7/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 7 Loss: 83.7735


Epoch 8/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 8 Loss: 77.4287


Epoch 9/50: 100%|██████████| 34/34 [00:43<00:00,  1.29s/it]


📉 Epoch 9 Loss: 71.5851


Epoch 10/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 10 Loss: 65.5415


Epoch 11/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 11 Loss: 62.6328


Epoch 12/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 12 Loss: 58.4772


Epoch 13/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 13 Loss: 53.8903


Epoch 14/50: 100%|██████████| 34/34 [00:42<00:00,  1.26s/it]


📉 Epoch 14 Loss: 50.9874


Epoch 15/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 15 Loss: 48.3827


Epoch 16/50: 100%|██████████| 34/34 [00:41<00:00,  1.23s/it]


📉 Epoch 16 Loss: 46.0183


Epoch 17/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 17 Loss: 44.5621


Epoch 18/50: 100%|██████████| 34/34 [00:43<00:00,  1.28s/it]


📉 Epoch 18 Loss: 40.9699


Epoch 19/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 19 Loss: 38.9780


Epoch 20/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 20 Loss: 37.6482


Epoch 21/50: 100%|██████████| 34/34 [00:41<00:00,  1.23s/it]


📉 Epoch 21 Loss: 37.3112


Epoch 22/50: 100%|██████████| 34/34 [00:42<00:00,  1.25s/it]


📉 Epoch 22 Loss: 35.6680


Epoch 23/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 23 Loss: 34.9793


Epoch 24/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 24 Loss: 33.7629


Epoch 25/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 25 Loss: 33.0882


Epoch 26/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 26 Loss: 32.5056


Epoch 27/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 27 Loss: 31.1142


Epoch 28/50: 100%|██████████| 34/34 [00:42<00:00,  1.26s/it]


📉 Epoch 28 Loss: 30.9407


Epoch 29/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 29 Loss: 30.0808


Epoch 30/50: 100%|██████████| 34/34 [00:43<00:00,  1.28s/it]


📉 Epoch 30 Loss: 30.1789


Epoch 31/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 31 Loss: 29.6610


Epoch 32/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 32 Loss: 30.0566


Epoch 33/50: 100%|██████████| 34/34 [00:42<00:00,  1.26s/it]


📉 Epoch 33 Loss: 29.1341


Epoch 34/50: 100%|██████████| 34/34 [00:42<00:00,  1.25s/it]


📉 Epoch 34 Loss: 28.7008


Epoch 35/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 35 Loss: 28.9085


Epoch 36/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 36 Loss: 28.8656


Epoch 37/50: 100%|██████████| 34/34 [00:42<00:00,  1.25s/it]


📉 Epoch 37 Loss: 28.8036


Epoch 38/50: 100%|██████████| 34/34 [00:44<00:00,  1.30s/it]


📉 Epoch 38 Loss: 29.1012


Epoch 39/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 39 Loss: 28.9769


Epoch 40/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 40 Loss: 28.1066


Epoch 41/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 41 Loss: 27.8779


Epoch 42/50: 100%|██████████| 34/34 [00:42<00:00,  1.25s/it]


📉 Epoch 42 Loss: 27.7346


Epoch 43/50: 100%|██████████| 34/34 [00:42<00:00,  1.26s/it]


📉 Epoch 43 Loss: 27.5495


Epoch 44/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 44 Loss: 27.9538


Epoch 45/50: 100%|██████████| 34/34 [00:42<00:00,  1.25s/it]


📉 Epoch 45 Loss: 27.9659


Epoch 46/50: 100%|██████████| 34/34 [00:43<00:00,  1.27s/it]


📉 Epoch 46 Loss: 28.1343


Epoch 47/50: 100%|██████████| 34/34 [00:42<00:00,  1.26s/it]


📉 Epoch 47 Loss: 27.7927


Epoch 48/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 48 Loss: 27.5545


Epoch 49/50: 100%|██████████| 34/34 [00:42<00:00,  1.25s/it]


📉 Epoch 49 Loss: 27.3119


Epoch 50/50: 100%|██████████| 34/34 [00:42<00:00,  1.24s/it]


📉 Epoch 50 Loss: 27.4303
🎯 Total Accuracy: 94.07%
✅ Model saved.
