<a href="https://colab.research.google.com/github/ovaheb/hello-world/blob/master/AI_CA5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torchvision
import torch
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.datasets import ImageFolder
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
import numpy as np
from google.colab import drive
import torch.nn as nn
import torch.nn.functional as F
import time
from datetime import timedelta
from datetime import datetime
import pandas as pd
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
class CustomToTensor(object):
    ## the input image must be grayscaled first
    def __call__(self, image):
        image = np.array(image, dtype = np.float32)
        # numpy image: H x W
        return torch.from_numpy(image)

In [0]:
transform = transforms.Compose([transforms.Grayscale(), CustomToTensor()]) ## composes multiple transforms into single one
dataset = ImageFolder(root = "/content/drive/My Drive/categorized_products", transform = transform)

In [0]:
batch_size = 64
validation_split = 0.2
random_seed = 7

# Creating data indices for training and validation splits:
indices = list(range(len(dataset))) # indices of the dataset
n = len(dataset)  # total number of examples
split = int(np.floor(validation_split * n))
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)

train_loader = torch.utils.data.DataLoader(dataset, batch_size = batch_size, sampler = train_sampler, num_workers = 64)
test_loader = torch.utils.data.DataLoader(dataset, batch_size = batch_size, sampler = test_sampler, num_workers = 64)

In [0]:
classes = dataset.classes

In [0]:
def imshow(img_array, labels, classes):
    _, axs = plt.subplots(3, 4) 
    for i in range(3):
      for j in range(4):
        axs[i][j].imshow(img_array[i * 4 + j], cmap = 'gray')
        axs[i][j].set_title(classes[labels[i * 4 + j]])
        axs[i][j].axis('off')
    plt.show()

data_iter = iter(train_loader)
images, labels = data_iter.next()
while len(set(labels)) < 7:
  images, labels = data_iter.next()
imshow(images[0:12], labels[0:12], classes)

In [0]:
def get_number_distribution(loader, classes):
  total_category_count = {classes[i] : 0 for i in range(len(classes))}
  for _, labels in loader:
    for label in labels:
      total_category_count[ classes[label] ] += 1
  result = []
  for item in classes:
    result.append(total_category_count[item])
  return result

In [0]:
total_category_count = get_number_distribution(train_loader, classes)

In [0]:
df = pd.DataFrame({"Category":classes, "Count":total_category_count})
plt.figure(figsize = (20,10))
plt.bar('Category', 'Count', data = df)
plt.xlabel("Category", size = 15)
plt.ylabel("Count", size = 15)
plt.title("Barplot of Category Distribution", size = 18)
plt.xticks(rotation = 90)
plt.show()

In [0]:
dataset[0][0].shape

In [0]:
criterion = nn.CrossEntropyLoss()
batch_size = 64
number_of_epochs = 10
learning_rate = 0.01
activation_function = F.relu
class Model(nn.Module):
    def __init__(self, class_num, act = activation_function):
        super(Model, self).__init__()

        self.layer1 = nn.Linear(1 * 80 * 60, 24 * 20 * 15)
        self.act1 = act

        self.layer2 = nn.Linear(24 * 20 * 15, 72 * 10 * 5)
        self.act2 = act

        self.layer3 = nn.Linear(72 * 10 * 5, 144 * 5 * 5)
        self.act3 = act

        self.layer4 = nn.Linear(144 * 5 * 5, 82)
        self.act4 = act

        self.layer5 = nn.Linear(82, class_num)

    def forward(self, x):

        x = x.view(x.size(0), -1)

        x = self.layer1(x)
        x = self.act1(x)

        x = self.layer2(x)
        x = self.act2(x)

        x = self.layer3(x)
        x = self.act3(x)

        x = self.layer4(x)
        x = self.act4(x)

        x = self.layer5(x)
        return x

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [0]:
model = Model(len(classes))
model = model.to(device)

In [0]:
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [0]:
def fit(model, train_loader, device, criterion, optimizer, num_epochs):

  total_time = 0.

  for epoch in range(num_epochs):
      train_loss = 0.
      d1 = datetime.now()
      for images, labels in train_loader:
          
        images = images.to(device)
        labels = labels.to(device)

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(images)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()
        train_loss += loss.item()

      average_loss = train_loss / len(train_loader)
      d2 = datetime.now()
      delta = d2 - d1
      seconds = float(delta.total_seconds())
      total_time += seconds
      print('epoch %d, train_loss: %.3f, time elapsed: %s seconds' % (epoch + 1, average_loss, seconds))
  print('total training time: %.3f minutes' % (total_time / 60))

In [0]:
fit(model, train_loader, device, criterion, optimizer, number_of_epochs)

In [0]:
def test_model_accuracy(model, test_loader):
    # Calculate Accuracy         
    correct = 0.
    total = 0.
    # Iterate through test dataset
    with torch.no_grad():
      for images, labels in test_loader:
        outputs = model(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.to('cpu') == labels).sum().item()

    accuracy = 100 * correct / total
    print('Accuracy: {}%'.format(accuracy))
test_model_accuracy(model, test_loader)