In [5]:
!pip install torchvision



In [6]:
import os
import zipfile
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models
from PIL import Image
from google.colab import files

In [17]:
uploaded = files.upload()

Saving training_id.zip to training_id.zip


In [20]:
zip_name = list(uploaded.keys())[0]

with zipfile.ZipFile(zip_name, 'r') as zip_ref:
    zip_ref.extractall()

print("Dataset Extracted Successfully")

Dataset Extracted Successfully


In [21]:
data_dir = "training_id"

all_files = os.listdir(data_dir)

print("Total Images:", len(all_files))
print("Sample Files:", all_files[:10])

Total Images: 20
Sample Files: ['pan_02.jpeg', 'pan_05.jpeg', 'adhaar_01.jpeg', 'pan_01.jpeg', 'adhaar_10.jpeg', 'pan_10.jpeg', 'pan_04.jpeg', 'adhaar_06.jpeg', 'adhaar_08.jpeg', 'pan_03.jpeg']


In [22]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [23]:
class DocumentDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.folder_path = folder_path
        self.transform = transform
        self.images = os.listdir(folder_path)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.folder_path, img_name)

        image = Image.open(img_path).convert("RGB")

        # Assign label based on filename
        if "adhaar" in img_name.lower():
            label = 0
        elif "pan" in img_name.lower():
            label = 1
        else:
            raise ValueError("Unknown file name format")

        if self.transform:
            image = self.transform(image)

        return image, label

In [24]:
dataset = DocumentDataset(data_dir, transform=transform)

print("Total Images Loaded:", len(dataset))

Total Images Loaded: 20


In [25]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)

print("Training Samples:", len(train_dataset))
print("Validation Samples:", len(val_dataset))

Training Samples: 16
Validation Samples: 4


In [26]:
model = models.resnet18(pretrained=True)

model.fc = nn.Linear(model.fc.in_features, 2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print("Model Loaded on:", device)



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 149MB/s]

Model Loaded on: cpu





In [27]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [28]:
epochs = 5

for epoch in range(epochs):
    model.train()
    running_loss = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}] Loss: {running_loss/len(train_loader):.4f}")

Epoch [1/5] Loss: 0.6532
Epoch [2/5] Loss: 0.0276
Epoch [3/5] Loss: 0.0229
Epoch [4/5] Loss: 0.0008
Epoch [5/5] Loss: 0.0048


In [29]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Validation Accuracy: {accuracy:.2f}%")

Validation Accuracy: 100.00%


In [30]:
def predict_image(image_path):
    image = Image.open(image_path).convert("RGB")
    image = transform(image).unsqueeze(0).to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)

    classes = ["adhaar", "pan"]
    return classes[predicted.item()]

In [32]:
uploaded_test = files.upload()
test_path = list(uploaded_test.keys())[0]

prediction = predict_image(test_path)
print("Predicted Document Type:", prediction)

Saving pan_14.jpeg to pan_14.jpeg
Predicted Document Type: pan
