Importing required libraries

In [1]:
import os
import json
import time
import random
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models

In [3]:
######################################################
# Mixed Precision with Apex and Monitoring with Wandb
import wandb
from apex import amp
from apex.optimizers import FusedAdam
######################################################

### Login to [Wandb](https://wandb.ai/home) 

Save API Key once login

In [4]:
##################
wandb.login()
#####################

[34m[1mwandb[0m: Currently logged in as: [33mbgiddwani[0m (use `wandb login --relogin` to force relogin)


True

### Set GPU Device if multiple

In [5]:
!nvidia-smi

Fri Jan 14 19:49:24 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.5     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-DGXS...  On   | 00000000:07:00.0 Off |                    0 |
| N/A   48C    P0    56W / 300W |  21335MiB / 32505MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-DGXS...  On   | 00000000:08:00.0 Off |                    0 |
| N/A   45C    P0    41W / 300W |      5MiB / 32508MiB |      0%      Default |
|       

In [6]:
##############################################################
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]="0"
##############################################################

### Use device for `cuda` or `cpu` based on availability

```python
cuda = torch.device('cuda')     # Default CUDA device
cuda0 = torch.device('cuda:0')  # GPU 0 
cuda2 = torch.device('cuda:2')  # GPU 2 (these are 0-indexed)


x = torch.tensor([1., 2.], device=cuda0)
# x.device is device(type='cuda', index=0)
y = torch.tensor([1., 2.]).cuda()
# y.device is device(type='cuda', index=0)

with torch.cuda.device(1):
    # allocates a tensor on GPU 1
    a = torch.tensor([1., 2.], device=cuda)

    # transfers a tensor from CPU to GPU 1
    b = torch.tensor([1., 2.]).cuda()
    # a.device and b.device are device(type='cuda', index=1)

    # You can also use ``Tensor.to`` to transfer a tensor:
    b2 = torch.tensor([1., 2.]).to(device=cuda)
    # b.device and b2.device are device(type='cuda', index=1)

    c = a + b
    # c.device is device(type='cuda', index=1)

    z = x + y
    # z.device is device(type='cuda', index=0)

    # even within a context, you can specify the device
    # (or give a GPU index to the .cuda call)
    d = torch.randn(2, device=cuda2)
    e = torch.randn(2).to(cuda2)
    f = torch.randn(2).cuda(cuda2)
    # d.device, e.device, and f.device are all device(type='cuda', index=2)
```

In [7]:
####################################################################
#GPU using CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device
####################################################################

device(type='cuda')

Create a Model Dir

In [8]:
try:
    os.makedirs("./saved")
except FileExistsError:
    # directory already exists
    pass

Neccesaary and Performance Tuning Hyperparameters

In [9]:
config = dict(
    #Neccessary
    TRAIN_CSV = "../data/train.csv",
    TEST_CSV = "../data/test.csv",
    IMAGE_PATH= "../data/images",
    VOCAB = "labels.json",
    saved_path="./saved/resnet18.pt",
    lr=0.001, 
    EPOCHS = 10,
    BATCH_SIZE = 32,
    IMAGE_SIZE = 224,
    TRAIN_VALID_SPLIT = 0.2,
    device=device,)

### Initiate a Wandb Project

In [10]:
#Initiate the Project and Entity
wandb.init(project="pytorch-lab", config=config)
# access all HPs through wandb.config, so logging matches execution!
config = wandb.config

[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade

CondaEnvException: Unable to determine environment

Please re-run this command with one of the following options:

* Provide an environment name via --name or -n
* Re-run this command inside an activated conda environment.



### Data Manipulation (Can be written Separately too)

In [11]:
train_df = pd.read_csv(config.TRAIN_CSV)
test_df = pd.read_csv(config.TEST_CSV)
f = open(config.VOCAB)
vocab = json.load(f)

df_fnames = train_df["image_id"].append(test_df["image_id"],ignore_index=True).tolist()
def create_fname(path,extension):
    def add_extension(fname):
        return os.path.join(path,fname)+extension
    return add_extension

jpeg_extension_creator = create_fname(config.IMAGE_PATH,".jpg")
train_df["image_id"] = train_df["image_id"].apply(jpeg_extension_creator)
test_df["image_id"] = test_df["image_id"].apply(jpeg_extension_creator)
for label in vocab:
    train_df.loc[train_df[label] == 1, "label" ] = vocab[label] 
train_df["label"] = train_df["label"].astype(int)

Data Split: Train and Val

In [12]:
train_df_X, valid_df_X, train_df_y, valid_df_y = train_test_split(train_df["image_id"],
                                                                  train_df["label"], 
                                                                  test_size=config.TRAIN_VALID_SPLIT, 
                                                                  random_state=0)

In [13]:
train_df_split = pd.DataFrame(data={"image_id": train_df_X, "label": train_df_y})
train_df_split.to_csv("../data/train_split.csv", sep=',',index=False)

valid_df_split = pd.DataFrame(data={"image_id": valid_df_X, "label": valid_df_y})
valid_df_split.to_csv("../data/val_split.csv", sep=',',index=False)

In [14]:
print("Number of train input samples is {}".format(len(train_df_X)))
print("Number of valid input samples is {}".format(len(valid_df_X)))
print("Number of train output samples is {}".format(len(train_df_y)))
print("Number of valid output samples is {}".format(len(valid_df_y)))

Number of train input samples is 1456
Number of valid input samples is 365
Number of train output samples is 1456
Number of valid output samples is 365


In [15]:
np.array(Image.open(train_df_X[0])).dtype 

dtype('uint8')

```
--> Image_File_Path (String) 
--> Image.open(File_Path) 
--> np.array(Image.open(File_Path))
--> Images [0-255] uint8 
--> [0-1]; float32 
--> x - Mean_training_dataset  / Std_training_dataset```

Apply Data Transforms (Aumentations + Processing)

In [16]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop((config.IMAGE_SIZE,config.IMAGE_SIZE)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize((config.IMAGE_SIZE,config.IMAGE_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

Custom Dataset and Dataloader for Plant Pathology Images

In [17]:
class PlantPathologyDataset(Dataset):
    def __init__(self,x,y,vocab,transforms):
        self.x = x # File Path in CSV
        self.y = y # Label in CSV
        self.vocab = vocab # Dictionary
        self.transforms = transforms
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self,idx): #File Name --> Preprocessed 3-D Tensor
        fname = self.x.iloc[idx]        
        label = self.y.iloc[idx]
        image = Image.open(fname)
        
        if self.transforms:
            image = self.transforms(image)

        return image, label #[3,224,224], [0-3] 

In [18]:
train_ds = PlantPathologyDataset(train_df_X, 
                                 train_df_y, 
                                 vocab,
                                 data_transforms["train"])
valid_ds = PlantPathologyDataset(valid_df_X, 
                                 valid_df_y,
                                 vocab,
                                 data_transforms["val"])

Optimizers:
Gradient Descent:-
    a. Stoicastic Gradient Descent bs = 1; 'n' number of examples. 'n / 1' number of data loader/steps for 1 Epoch
    b. Mini-Batch Gradient Descent bs = 32; 'n' number of examples. 'n / 32' number of dataloaders/step for 1 Epoch 
    c. Full Batch Gradient Descent bs = total_number_of_samples number of dataloader/steps = 1 for 1 Epoch

In [19]:
len(train_ds)

1456

In [20]:
train_ds[0][0].shape #3,224,224

torch.Size([3, 224, 224])

In [21]:
#Number of Iterations
1456 / 32

45.5

In [22]:
##########################################################
train_dl = DataLoader(train_ds,
                      batch_size=config.BATCH_SIZE,
                      shuffle=True,)
valid_dl = DataLoader(valid_ds,
                      batch_size=config.BATCH_SIZE,
                      shuffle=False,)
############################################################

In [23]:
len(train_dl)

46

Load Model : Pretrained from torchvision model zoo or Saved model

In [24]:
model = models.resnet18(pretrained=True)

#Modify the classifier for agriculture data
num_ftrs = model.fc.in_features
model.fc = nn.Sequential(nn.Linear(num_ftrs,512),
                        nn.ReLU(),
                        nn.Dropout(p=0.3),
                        nn.Linear(512,4))

In [25]:
model = model.to(config.device)

In [26]:
## BackPropagation & Optimization
## W_new = W_old - LR * W_gradient ; Gradient Descent Optimization Formulation

In [27]:
optimizer = optim.Adam(model.parameters(),lr=config.lr)

CrossEntropyLoss = Softmax(Final Activation Function for Normalizing the output of the FC Layer) + Negative Log Likelihood (NLL) Loss

In [28]:
# Loss Function
criterion = nn.CrossEntropyLoss()

### Training Pipeline Starts

In [29]:
def train_model(model,criterion,optimizer,num_epochs=10):
    ############################################################
    # tell wandb to watch what the model gets up to: gradients, weights, and more!
    wandb.watch(model, criterion, log="all", log_freq=10)
    ############################################################

    since = time.time()                                            
    batch_ct = 0
    example_ct = 0
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        #Training
        model.train()
        for x,y in train_dl: #BS=32 ([BS,3,224,224], [BS,4])            
            x = x.to(config.device)
            y = y.to(config.device) #CHW --> #HWC
            
            #######################################################################
            # The second code snippet does not zero the memory of each individual parameter, 
            # also the subsequent backward pass uses assignment instead of addition to store gradients,
            # this reduces the number of memory operations.
            
            optimizer.zero_grad()

            ######################################################################
            
            train_logits = model(x) #Input = [BS,3,224,224] (Image) -- Model --> [BS,4] (Output Scores)
            _, train_preds = torch.max(train_logits, 1)
            train_loss = criterion(train_logits,y)
            

            train_loss.backward() # Backpropagation this is where your W_gradient
            loss=train_loss

            optimizer.step() # W_new = W_old - LR * W_gradient 
            example_ct += len(x) 
            batch_ct += 1
            
            ########################################################################
            # Stores Wandb Logs here
            # Report metrics every 25th batch
            if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
            ########################################################################
        
        #validation
        model.eval()
        running_loss = 0.0
        running_corrects = 0
        total = 0
        # Disable gradient calculation for validation or inference using torch.no_rad()
        with torch.no_grad():
            for x,y in valid_dl:
                x = x.to(config.device)
                y = y.to(config.device) #CHW --> #HWC
                valid_logits = model(x)
                _, valid_preds = torch.max(valid_logits, 1)
                valid_loss = criterion(valid_logits,y)
                running_loss += valid_loss.item() * x.size(0)
                running_corrects += torch.sum(valid_preds == y.data)
                total += y.size(0)
                ########################################################################
                # Test Accuracy Logs
                wandb.log({"test_accuracy": running_corrects / total})
                ########################################################################
            
        epoch_loss = running_loss / len(valid_ds)
        epoch_acc = running_corrects.double() / len(valid_ds)
        print("Validation Loss is {}".format(epoch_loss))
        print("Validation Accuracy is {}".format(epoch_acc.cpu()))

            
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    
    torch.save(model.state_dict(), config.saved_path)

In [30]:
def train_log(loss, example_ct, epoch):
    loss = float(loss)
    # where the magic happens
    wandb.log({"epoch": epoch, "loss": loss}, step=example_ct)
    print(f"Loss after " + str(example_ct).zfill(5) + f" examples: {loss:.3f}")

In [31]:
train_model(model, criterion, optimizer, num_epochs=config.EPOCHS)

Epoch 0/9
----------
Loss after 00768 examples: 0.804
Validation Loss is 1.3900960204535968
Validation Accuracy is 0.715068493150685
Epoch 1/9
----------
Loss after 01552 examples: 0.472
Loss after 02352 examples: 0.615
Validation Loss is 0.9468017408292587
Validation Accuracy is 0.7205479452054795
Epoch 2/9
----------
Loss after 03136 examples: 0.341
Loss after 03936 examples: 0.596
Validation Loss is 0.7305804726195662
Validation Accuracy is 0.7972602739726028
Epoch 3/9
----------
Loss after 04720 examples: 0.447
Loss after 05520 examples: 0.388
Validation Loss is 0.35017603968512523
Validation Accuracy is 0.9095890410958904
Epoch 4/9
----------
Loss after 06304 examples: 0.386
Loss after 07104 examples: 0.588
Validation Loss is 0.4408817216958085
Validation Accuracy is 0.8904109589041096
Epoch 5/9
----------
Loss after 07888 examples: 0.452
Loss after 08688 examples: 0.575
Validation Loss is 0.43791362414621327
Validation Accuracy is 0.8876712328767123
Epoch 6/9
----------
Loss afte

## Thank You