<a href="https://colab.research.google.com/github/jovinod/image/blob/main/Crop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
import torch
import warnings
warnings.simplefilter('ignore')
import numpy as np
import torchvision
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch.optim as optim
import torch.nn.functional as F
from google.colab import drive

In [3]:
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# While training a CNN model we may need to transform the input image data by normalizing it. Normalizing is the 
# process of creating a standard distribution (x - mean)/std. Typically on web you will see some standard values 
# being used for normalizing e.g. torch.transforms.Normalize((0.5,0.5,05.),(0.5,0.5,0.5)). Here the first vector 
# is the mean for R, G, B channels and second vector is the standard deviation for R, G, B channel. If the input 
# data is in the range 0-1, the above transformation will change it between the range -1 to 1 e.g. (0 - 0.5)/0.5
# and (1 - 0.5)/0.5

# Do we need to normalize our data?
img_loader = torch.utils.data.DataLoader(torchvision.datasets.ImageFolder('/content/drive/My Drive/crop/train',
                                                                         transform=transforms.ToTensor()), \
                                         batch_size=64, \
                                         shuffle=False, \
                                         num_workers=4)

first_batch = iter(img_loader).next()
feature, label = first_batch
print(f'Min pixel value {feature[:1,:,:].min()}')
print(f'Max pixel value {feature[:1,:,:].max()}')


# From the output you can see that our data is between 0 and 1. The reason the data is between 0 & 1 is because
# we have used a tansform torch.transform.ToTensor which converts the input data in range 0-255 to 0-1

Min pixel value 0.0
Max pixel value 0.886274516582489


In [None]:
# To normalize our data we need to find the mean and standard deviation for each of the channel. We will use 
# the existing data to identify these values. Although we would like to find the mean and standard deviation in
# a single go using all the records, however we have 70k+ records which may be too large to fit into a single batch
# As such we are using the batch size of 4096. At the end we will find a mean of mean and mean of standard deviation

img_loader = torch.utils.data.DataLoader(torchvision.datasets.ImageFolder('/content/drive/My Drive/crop/train',
                                                                         transform=transforms.ToTensor()), \
                                         batch_size=4096, \
                                         shuffle=False, \
                                         num_workers=4)

pop_mean = []
pop_std0 = []
pop_std1 = []
for i, data in enumerate(img_loader, 0):
    numpy_image = data[0].numpy()
    
    # The axis here represents that we are first going to find the mean across the rows (2), then the columns (3)
    # and finally across all the images. Eventually we will get one value for each channel
    batch_mean = np.mean(numpy_image, axis=(0,2,3))
    batch_std0 = np.std(numpy_image, axis=(0,2,3))
    batch_std1 = np.std(numpy_image, axis=(0,2,3), ddof=1) # This is for the degree of freedom N-1
    
    pop_mean.append(batch_mean)
    pop_std0.append(batch_std0)
    pop_std1.append(batch_std1)

pop_mean = np.array(pop_mean).mean(axis=0)
pop_std0 = np.array(pop_std0).mean(axis=0)
pop_std1 = np.array(pop_std1).mean(axis=0)

print(pop_mean)
print(pop_std0)
print(pop_std1)

In [None]:
# Here we create our final transformation that would be used before we send the data for training the model
transform = torchvision.transforms.Compose([transforms.ToTensor(), \
                                            transforms.Normalize((0.4743617, 0.49847862, 0.4265874 ), \
                                                                 (0.21134755, 0.19044809, 0.22679578))
                                           ]
                                          )
crop_dataset = torchvision.datasets.ImageFolder('/content/drive/My Drive/crop/train', transform=transform)
crop_dataset.class_to_idx

In [None]:
# Now we can create a loader that will help us load images in batches for training purpose 
crop_loader = DataLoader(crop_dataset, batch_size=128, shuffle=True)

In [None]:
feature, label = iter(crop_loader).next()
fig, axes = plt.subplots(figsize=(200,100), nrows=16, ncols=8)
for i in range(16):
    for j in range(8):
        ax = axes[i][j]
        ax.imshow((feature[(8*i)+j]).permute(1, 2, 0))
        ax.title.set_text(' '.join('%5s' % os.path.basename(crop_dataset.imgs[(8*i)+j][0])))

In [None]:
# Here we look at a single image. Our intent is to see how does an image transform through the convolution model that 
# we are proposing
img_one_feature = feature[:1]
img_one_label = label[:1]
img_one_feature.shape, img_one_label.shape

plt.imshow(img_one_feature[0].permute(1,2,0))

# We define a convolution that converts the RGB channel into 6 features/filters/channels using a kernel size of 3 
# a stride of 1 and padding of 1. On this we would apply pooling of 2*2
cnv1 = nn.Conv2d(3, 6, kernel_size=9, padding=1, stride=1)
#print(cnv1.weight, cnv1.bias)
layer1 = cnv1(img_one_feature)
fig, axes = plt.subplots(figsize=(200,100), ncols=6)
for i in range(6):
    x = torch.tensor(layer1[0,i:i+1,:], requires_grad=False)
    ax = axes[i]
    ax.imshow(x.permute(1, 2, 0))

layer1 = F.relu(layer1)
fig, axes = plt.subplots(figsize=(200,100), ncols=6)
for i in range(6):
    x = torch.tensor(layer1[0,i:i+1,:], requires_grad=False)
    ax = axes[i]
    ax.imshow(x.permute(1, 2, 0))

pool = nn.MaxPool2d(2, 2)    
layer1 = pool(layer1)
fig, axes = plt.subplots(figsize=(200,100), ncols=6)
for i in range(6):
    x = torch.tensor(layer1[0,i:i+1,:], requires_grad=False)
    ax = axes[i]
    ax.imshow(x.permute(1, 2, 0))
print()


# We define a convolution that converts the RGB channel into 12 features/filters/channels using a kernel size of 3 
# a stride of 1 and padding of 1. On this we would apply pooling of 2*2
cnv2 = nn.Conv2d(6, 12, kernel_size=6, padding=1, stride=1)
#print(cnv2.weight, cnv1.bias)
layer2 = cnv2(layer1)
fig, axes = plt.subplots(figsize=(200,100), ncols=12)
for i in range(12):
    x = torch.tensor(layer2[0,i:i+1,:], requires_grad=False)
    ax = axes[i]
    ax.imshow(x.permute(1, 2, 0))

layer2 = F.relu(layer2)
fig, axes = plt.subplots(figsize=(200,100), ncols=12)
for i in range(12):
    x = torch.tensor(layer2[0,i:i+1,:], requires_grad=False)
    ax = axes[i]
    ax.imshow(x.permute(1, 2, 0))

pool = nn.MaxPool2d(2, 2)    
layer2 = pool(layer2)
fig, axes = plt.subplots(figsize=(200,100), ncols=12)
for i in range(12):
    x = torch.tensor(layer2[0,i:i+1,:], requires_grad=False)
    ax = axes[i]
    ax.imshow(x.permute(1, 2, 0))
print()


# We define a convolution that converts the RGB channel into 12 features/filters/channels using a kernel size of 3 
# a stride of 1 and padding of 1. On this we would apply pooling of 2*2
cnv3 = nn.Conv2d(12, 36, kernel_size=3, padding=1, stride=1)
#print(cnv3.weight, cnv1.bias)
layer3 = cnv3(layer2)
fig, axes = plt.subplots(figsize=(200,100), ncols=12, nrows=3)
for i in range(3):
    for j in range(12):
        x = torch.tensor(layer3[0,(12*i) + j: (12*i) + j + 1,:], requires_grad=False)
        ax = axes[i][j]
        ax.imshow(x.permute(1, 2, 0))

layer3 = F.relu(layer3)
fig, axes = plt.subplots(figsize=(200,100), ncols=12, nrows=3)
for i in range(3):
    for j in range(12):
        x = torch.tensor(layer3[0,(12*i) + j: (12*i) + j + 1,:], requires_grad=False)
        ax = axes[i][j]
        ax.imshow(x.permute(1, 2, 0))

pool = nn.MaxPool2d(2, 2)    
layer3 = pool(layer3)
fig, axes = plt.subplots(figsize=(200,100), ncols=12, nrows=3)
for i in range(3):
    for j in range(12):
        x = torch.tensor(layer3[0,(12*i) + j: (12*i) + j + 1,:], requires_grad=False)
        ax = axes[i][j]
        ax.imshow(x.permute(1, 2, 0))
print()

In [None]:
# Now create a a model that can be trained for disease detection
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 9)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 12, 6)
        self.conv3 = nn.Conv2d(12, 18, 3)
        self.fc1 = nn.Linear(18 * 28 * 28, 4096)
        self.fc2 = nn.Linear(4096, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 38)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [None]:
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in net.state_dict():
    print(param_tensor, "\t", net.state_dict()[param_tensor].size())

In [None]:
# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
net.to(device)

In [None]:
for epoch in range(5):  # loop over the dataset multiple times
    running_loss = 0.0
    correct = 0
    total = 0
    net.train()
    for i, data in enumerate(crop_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        if i % 500 == 499:    # print every 2000 mini-batches
            accu=100.*correct/total
            
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 500:.3f} accuracy:{accu:.3f}')
            running_loss = 0.0
print('Finished Training')

In [None]:
torch.save(net.state_dict(), '/content/drive/My Drive/crop/model')