In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from time import time
from matplotlib import pyplot as plt
from IPython.display import display
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
from torchvision import models
import torch.optim as optim

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using:', device)

Using: cuda


In [3]:
batch_size = 32
epochs_range = 15

In [4]:
train_dataset_path = '/content/drive/MyDrive/Visual Recognition/Mask_Data/Train_Data'
test_dataset_path = '/content/drive/MyDrive/Visual Recognition/Mask_Data/Test_Data/test'

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(100),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = torchvision.datasets.ImageFolder(train_dataset_path, transform=transform)
test_dataset = torchvision.datasets.ImageFolder(test_dataset_path, transform=transform)

trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

In [5]:
i = 0
for i, data in enumerate(trainloader, 0):
  print(data[0].shape)
  break

torch.Size([32, 3, 100, 100])


In [6]:
data_path = '/content/drive/MyDrive/Visual Recognition/Mask_Data/Train_Data'
categories = os.listdir(data_path)
labels = [i for i in range(len(categories))]

label_dict = dict(zip(labels, categories))

print(label_dict)
print(labels)
print(categories)

{0: 'with_mask', 1: 'without_mask'}
[0, 1]
['with_mask', 'without_mask']


In [11]:
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=200, kernel_size=(3,3))
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(in_channels=200, out_channels=100, kernel_size=(2,2))
        self.Dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(in_features=24*24*100, out_features=50)
        self.fc2 = nn.Linear(in_features=50, out_features=2)

    def forward(self, x):
        x = F.relu(self.conv1(x)) #98*98*200
        x = self.pool(x) #49*49*200
        x = F.relu(self.conv2(x)) #48*48*100
        x = self.pool(x) #24*24*100
        x = self.Dropout(x)
        x = x.view(-1, 24 * 24 * 100)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x))
        return x

net = ConvNet().to(device)

In [12]:
# model = models.alexnet(pretrained=True)
# # Loss
criterion = nn.CrossEntropyLoss()

# #Optimizer(SGD)
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr = 0.001)

In [13]:
total = 0
print('Trainable parameters:')
for name, param in net.named_parameters():
    if param.requires_grad:
        print(name, '\t\t', param.numel())
        total += param.numel()
print('----------------------------------')
print('Total', '\t\t    ', total)

Trainable parameters:
conv1.weight 		 5400
conv1.bias 		 200
conv2.weight 		 80000
conv2.bias 		 100
fc1.weight 		 2880000
fc1.bias 		 50
fc2.weight 		 100
fc2.bias 		 2
----------------------------------
Total 		     2965852


In [14]:
for epoch in range(epochs_range):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        output = net(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 43 == 42:    # print every 43 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 43))
            running_loss = 0.0

print('Finished Training of Model')



Finished Training of Model


In [15]:
correct = 0
total = 0
with torch.no_grad():
    for i, data in enumerate(testloader, 0):
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))



Accuracy of the network on the test images: 95 %


In [18]:
path = "/content/drive/MyDrive/Visual Recognition/Mask_Data/model95.pth"
torch.save(net, path)
# Then later:
# model = torch.load(filepath)

In [19]:
model = torch.load(path)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 200, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(200, 100, kernel_size=(2, 2), stride=(1, 1))
  (Dropout): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=57600, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=2, bias=True)
)