In [None]:
import os
from torchvision import transforms
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image

# Read data and unzip it

In [None]:
!unzip '/content/drive/MyDrive/Computer_vision/Datasets/Classification_dataset.zip'

Archive:  /content/drive/MyDrive/Computer_vision/Datasets/Classification_dataset.zip
   creating: Classification_dataset_v3/
  inflating: Classification_dataset_v3/Dataset_info.txt  
   creating: Classification_dataset_v3/images/
   creating: Classification_dataset_v3/images/test/
   creating: Classification_dataset_v3/images/test/Cat/
  inflating: Classification_dataset_v3/images/test/Cat/cat_1000.jpg  
  inflating: Classification_dataset_v3/images/test/Cat/cat_1001.jpg  
  inflating: Classification_dataset_v3/images/test/Cat/cat_1002.jpg  
  inflating: Classification_dataset_v3/images/test/Cat/cat_1003.jpg  
  inflating: Classification_dataset_v3/images/test/Cat/cat_1004.jpg  
  inflating: Classification_dataset_v3/images/test/Cat/cat_1005.jpg  
  inflating: Classification_dataset_v3/images/test/Cat/cat_1006.jpg  
  inflating: Classification_dataset_v3/images/test/Cat/cat_1007.jpg  
  inflating: Classification_dataset_v3/images/test/Cat/cat_1008.jpg  
  inflating: Classification_data

# Custom dataset

In [None]:
class ImageDataset(Dataset):
  def __init__(self, image_dir, transforms=None):
    self.image_dir = image_dir
    self.features = []
    self.labels = []
    self.class_name = {}
    self.transforms = transforms

    for label, label_name in enumerate(os.listdir(image_dir)):
      self.class_name[label] = label_name
      image_path = os.path.join(image_dir, label_name)
      for image in os.listdir(image_path):
        img_full_path = os.path.join(image_path, image)
        self.features.append(img_full_path)
        self.labels.append(label)

  def __len__(self):
    return len(self.features)

  def __getitem__(self, indx):
    img_path = self.features[indx]
    image = Image.open(img_path).convert("RGB")
    label = self.labels[indx]

    if self.transforms:
      image = self.transforms(image)

    return image, label

In [None]:
transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

In [None]:
# Dataset instance for test and train
train_dir = "/content/Classification_dataset_v3/images/train"
test_dir = "/content/Classification_dataset_v3/images/test"

train_dataset = ImageDataset(train_dir, transforms)
test_dataset = ImageDataset(test_dir, transforms)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

### Check dataset is successfully loaded or not?

In [None]:
print(train_dataset.class_name)
print(test_dataset.class_name)

{0: 'Dog', 1: 'person', 2: 'Cat'}
{0: 'Dog', 1: 'person', 2: 'Cat'}


### plot the image with label

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
for image, label in train_dataloader:
  img = image[0].numpy()
  lbl = label[0]
  print(img.shape)
  img = img.transpose((1, 2, 0))
  print(img.shape)

  print(train_dataset.class_name[lbl.item()])
  plt.imshow(img)
  break

# Create Custom CNN model

In [None]:
import torch.nn as nn
from torch.optim import Adam

### CNN architect
- 4 steps convolution layer based on batch normalization, activation function, max pool
- then flatten the features image
- apply fully connected layer sized of features images size
- hidden layer and output layer

In [None]:
class CnnModel(nn.Module):
  def __init__(self, in_dim, out_dim):
    super(CnnModel, self).__init__()

    self.convolution = nn.Sequential(
        # convolution layer 1
        nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # convolution layer 2
        nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # convolution layer 3
        nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(128),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        # convolution layer 4
        nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
    )

    self._to_linear = None
    self.count_features(in_dim)

    self.connected = nn.Sequential(
        nn.Linear(self._to_linear, 512),
        nn.ReLU(),
        nn.Linear(512, 128),
        nn.ReLU(),
        nn.Linear(128, out_dim)
    )

  def count_features(self, in_dim=128):
    dummy_img = torch.zeros((1, 3, in_dim, in_dim))
    outputs = self.convolution(dummy_img)
    flatten = outputs.view(1, -1)
    self._to_linear = flatten.shape[1]

  def forward(self, img):
    img = self.convolution(img)
    img = img.view(img.size(0), -1)
    img = self.connected(img)

    return img

In [None]:
device = torch.device('cuda' if torch.torch.cuda.is_available() else 'cpu' )
model = CnnModel(128, 3).to(device)
model

CnnModel(
  (convolution): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

### Loss function and optimizer

In [None]:
loss_fun = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.01)

In [None]:
# training loop
epochs = 100

PATH = '/content/drive/MyDrive/Computer_vision/model'
if not os.path.exists(PATH):
  os.makedirs(PATH, exist_ok=True)

try:
  for epoch in range(epochs):
    model.train()
    running_loss = 0.0 # epoch loss
    for img, label in train_dataloader:
      img, label = img.to(device), label.to(device)
      optimizer.zero_grad()

      outputs = model(img)
      loss = loss_fun(outputs, label)
      running_loss += loss.item()
      loss.backward()
      optimizer.step()

    print(f'Epoch [{epoch + 1}/{epochs}]: loss = {running_loss/len(train_dataloader)}')

  saved_path = os.path.join(PATH, 'cnn_model_100.pth')
  torch.save(model.state_dict(), saved_path)
  print('Saved Successfully..')
except Exception as e:
  print('Something went wrong!')
  print(e)

Epoch [1/100]: loss = 6.007481330946872
Epoch [2/100]: loss = 0.9397631974596726
Epoch [3/100]: loss = 0.8126112479912607
Epoch [4/100]: loss = 0.7643955117777774
Epoch [5/100]: loss = 0.6918584440883837
Epoch [6/100]: loss = 0.6430793291644046
Epoch [7/100]: loss = 0.6372488006165153
Epoch [8/100]: loss = 0.6088922582174602
Epoch [9/100]: loss = 0.5837187054910158
Epoch [10/100]: loss = 0.5785722986647958
Epoch [11/100]: loss = 0.5602207968109533
Epoch [12/100]: loss = 0.5847384095191955
Epoch [13/100]: loss = 0.5632211923599243
Epoch [14/100]: loss = 0.5484103212231084
Epoch [15/100]: loss = 0.5034564030797858
Epoch [16/100]: loss = 0.49500601134802164
Epoch [17/100]: loss = 0.4421647431034791
Epoch [18/100]: loss = 0.46008656228843486
Epoch [19/100]: loss = 0.43644455012522243
Epoch [20/100]: loss = 0.42955122590065004
Epoch [21/100]: loss = 0.40975488800751536
Epoch [22/100]: loss = 0.3875958535232042
Epoch [23/100]: loss = 0.36345316046162657
Epoch [24/100]: loss = 0.3568905079051

### Validate model

In [None]:
count = 0.0
total = 0.0

model.eval()
with torch.no_grad():
  for img, label in test_dataloader:
    img, label = img.to(device), label.to(device)
    predicted = model(img)

    label_indx = torch.argmax(predicted, dim=1)
    total += label.size(0)
    count += (label == label_indx).sum().item()

acc = count / total
print(f'Model Accuracy: {acc * 100:.2f}%')

Model Accuracy: 82.50%


In [None]:
import cv2
from torchvision import transforms

# Create Model Inference
Requires things
- Model Class
- Model path
- Transforms object
- class name for labeling

In [None]:
class ImageClassification:
  def __init__(self, model_path, class_name):
    self.class_name = class_name
    self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.model = CnnModel(128, 3).to(self.device)
    self.model.load_state_dict(torch.load(model_path, map_location=self.device))
    self.model.eval()
    self.transforms = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])

  def predict_label(self, image_path):
    img = Image.open(image_path).convert("RGB")
    img = self.transforms(img).unsqueeze(0)
    img = img.to(self.device)
    with torch.no_grad():
      out = self.model(img)
      label = torch.argmax(out, 1).item()
      label_name = self.class_name[label]

    image = cv2.imread(image_path)
    image = cv2.putText(image, label_name, (10, 40), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (255, 23, 0), 2)
    cv2.imwrite('predicte_image.jpg', image)

    return image, label_name


In [None]:
class_name = {0: 'Dog', 1: 'person', 2: 'Cat'}
classify = ImageClassification('/content/drive/MyDrive/Computer_vision/model/cnn_model_100.pth', class_name)

In [None]:
_img, label = classify.predict_label("/content/Classification_dataset_v3/images/test/person/person_2008.jpg")
print(f'The image of {label}.')

The image of person.
