In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets
from torchvision import models
import os
import tqdm

In [6]:
# Define data transforms
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
}

In [7]:
# Define data directory
data_dir = r'./Dataset'

In [8]:
# Load the entire datas
data = datasets.ImageFolder(data_dir, transform=data_transforms['train'])

# Split the dataset into training and validation sets
train_size = int(0.8 * len(data))
val_size = len(data) - train_size
train_data, val_data = random_split(data, [train_size, val_size])


In [9]:
# Create data loaders
train_data_loader = DataLoader(train_data, batch_size=128, shuffle=True, num_workers=4)
val_data_loader = DataLoader(val_data, batch_size=128, shuffle=False, num_workers=4)


In [11]:
# Get the first batch
train_features_batch, train_labels_batch = next(iter(train_data_loader))

In [12]:
# Shape of the first batch
train_features_batch.shape,train_labels_batch.shape

(torch.Size([128, 3, 224, 224]), torch.Size([128]))

In [13]:
# Get the first image and label from the first batch
train_features_batch[0],train_labels_batch[0]

(tensor([[[0.4314, 0.4157, 0.4392,  ..., 0.1804, 0.1529, 0.1216],
          [0.4431, 0.4392, 0.4431,  ..., 0.1294, 0.0902, 0.0745],
          [0.4275, 0.4392, 0.4392,  ..., 0.0549, 0.0471, 0.0431],
          ...,
          [0.0196, 0.0196, 0.0235,  ..., 0.0196, 0.0235, 0.0235],
          [0.0196, 0.0196, 0.0196,  ..., 0.0196, 0.0196, 0.0196],
          [0.0196, 0.0196, 0.0196,  ..., 0.0196, 0.0196, 0.0196]],
 
         [[0.2784, 0.2588, 0.2745,  ..., 0.1176, 0.0980, 0.0706],
          [0.2863, 0.2824, 0.2824,  ..., 0.0784, 0.0471, 0.0392],
          [0.2549, 0.2667, 0.2863,  ..., 0.0235, 0.0196, 0.0235],
          ...,
          [0.0196, 0.0196, 0.0196,  ..., 0.0275, 0.0275, 0.0235],
          [0.0196, 0.0196, 0.0196,  ..., 0.0196, 0.0196, 0.0196],
          [0.0196, 0.0196, 0.0196,  ..., 0.0196, 0.0196, 0.0196]],
 
         [[0.1647, 0.1569, 0.1843,  ..., 0.1137, 0.0863, 0.0431],
          [0.1804, 0.1765, 0.1843,  ..., 0.0824, 0.0431, 0.0235],
          [0.1608, 0.1647, 0.1686,  ...,

In [14]:
class_names = data.classes
class_names

['other_documents', 'receipt']

In [15]:
class DocumentClassifier(nn.Module):
    def __init__(self, num_classes=2, pretrained=True):
        super(DocumentClassifier, self).__init__()
        self.resnet = models.resnet18(pretrained=pretrained)
        in_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.resnet(x)
    

In [19]:
# Device agnostic code
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [28]:
# Instantiate the model & define loss function and optimizer
model = DocumentClassifier(num_classes=2, pretrained=True).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [29]:
# Calculate Accuracy
def accuracy_fn(label_true, label_pred):
    correct = torch.eq(label_true, label_pred).sum().item()
    acc = (correct / len(label_pred)) * 100
    return acc

In [30]:
# Import tqdm for progress bar
from tqdm.auto import tqdm

num_epochs = 10

for epoch in tqdm(range(num_epochs)):
    # Training phase
    model.train()
    train_loss = 0

    for batch, (inputs, labels) in enumerate(train_data_loader):
        inputs, labels = inputs.to(device), labels.to(device)

         # Forward pass
        outputs = model(inputs)
        # Calculate loss (per batch)
        loss = criterion(outputs, labels)
        # Cumulatively add up the loss per epoch
        train_loss += loss
        # Optimizer zero grad
        optimizer.zero_grad()
        # Loss backward
        loss.backward()
        # Optimizer step
        optimizer.step()

        # Print out how many samples have been seen
        if batch % 400 == 0:
            print(f"Looked at {batch * len(inputs)}/{len(train_data_loader.dataset)} samples")

    # Divide total train loss by length of train dataloader (average loss per batch per epoch)
    train_loss /= len(train_data_loader)

    # Testing phase
    model.eval()
    val_loss, val_acc = 0, 0

    with torch.inference_mode():
        for inputs, labels in val_data_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            # Calculate loss
            loss = criterion(outputs, labels)
            # Calculate loss (cumulatively)
            val_loss += loss
            # 3. Calculate accuracy
            val_acc += accuracy_fn(label_true=labels, label_pred=outputs.argmax(dim=1))

        # Divide total val loss by length of val dataloader (average loss per batch per epoch)
        val_loss /= len(val_data_loader)

        # Divide total val accuracy by length of val dataloader (average acc per batch per epoch)
        val_acc /= len(val_data_loader)

    ## Print out what's happening
    print(f"\nTrain loss: {train_loss:.5f} | Test loss: {val_loss:.5f}, Test acc: {val_acc:.2f}%\n")



  0%|          | 0/10 [00:00<?, ?it/s]

Looked at 0/4352 samples

Train loss: 0.26719 | Test loss: 0.20171, Test acc: 92.82%

Looked at 0/4352 samples

Train loss: 0.10104 | Test loss: 0.34789, Test acc: 85.35%

Looked at 0/4352 samples

Train loss: 0.09164 | Test loss: 0.09773, Test acc: 96.19%

Looked at 0/4352 samples

Train loss: 0.07635 | Test loss: 0.46796, Test acc: 89.95%

Looked at 0/4352 samples

Train loss: 0.07659 | Test loss: 0.09964, Test acc: 96.02%

Looked at 0/4352 samples

Train loss: 0.07785 | Test loss: 0.09482, Test acc: 97.14%

Looked at 0/4352 samples

Train loss: 0.05877 | Test loss: 0.09611, Test acc: 95.59%

Looked at 0/4352 samples

Train loss: 0.06556 | Test loss: 0.10686, Test acc: 95.50%

Looked at 0/4352 samples

Train loss: 0.05789 | Test loss: 0.36376, Test acc: 88.14%

Looked at 0/4352 samples

Train loss: 0.08082 | Test loss: 0.17037, Test acc: 95.05%



In [308]:
import pickle

with open('Trained_Model.pkl', 'wb') as file:
    pickle.dump(model, file)

--------------------------------------------------------------------------------------------------------------------------------