# PROJECT: Early crop disease detection

# Preliminaries

In [1]:
!nvidia-smi # to see what GPU you have

Fri Nov 11 01:41:50 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.73.08    Driver Version: 510.73.08    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   25C    P8    14W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
!pip install wandb --quiet

In [3]:
import torch
from torchsummary import summary
import torchvision #This library is used for image-based operations (Augmentations)
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms, models  # datsets  , transforms
from torch.utils.data.sampler import SubsetRandomSampler
import os
import gc
from tqdm import tqdm
from PIL import Image
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
# from warmup_scheduler import GradualWarmupScheduler
import glob
import wandb
from numba import cuda
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device: ", device)

Device:  cuda


In [4]:
# from google.colab import drive # Link your drive if you are a colab user
# drive.mount('/content/drive') # Models in this HW take a long time to get trained and make sure to save it her

# TODOs
As you go, please read the code and keep an eye out for TODOs!

# Download Data from Mendley

In [5]:
!mkdir './content/'
!mkdir './content/data'
# importing libraries
import requests, zipfile, io

"""
	Download images folder from given url, move it to dataset folder.
"""

#url for data without augmentation
url = "https://data.mendeley.com/datasets/tywbtsjrjv/1/files/d5652a28-c1d8-4b76-97f3-72fb80f94efc/Plant_leaf_diseases_dataset_without_augmentation.zip?dl=1"

# # url for data with augmentation
# url = "https://data.mendeley.com/datasets/tywbtsjrjv/1/files/b4e3a32f-c0bd-4060-81e9-6144231f2520/Plant_leaf_diseases_dataset_with_augmentation.zip?dl=1"
response = requests.get(url)
z = zipfile.ZipFile(io.BytesIO(response.content))
z.extractall()

# Configs

In [6]:
config = {
    'batch_size': 4, # Increase this if your GPU can handle it
    'lr': 0.001,
    'checkpointPath' : 'checkpointdiseasef.pth',
    'epochs': 10, # 10 epochs is recommended ONLY for the early submission - you will have to train for much longer typically.
}

# Augmentations

In [8]:
transforms = {
    'train': torchvision.transforms.Compose([
        torchvision.transforms.RandomResizedCrop(224),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': torchvision.transforms.Compose([
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': torchvision.transforms.Compose([
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

data_dir = 'Plant_leave_diseases_dataset_without_augmentation'
image_datasets = {x: datasets.ImageFolder(data_dir, transform=transforms[x])
                  for x in ['train', 'val', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4, shuffle=True, num_workers=4)
               for x in ['train', 'val', 'test']}
data_size = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [9]:
# You can do this with ImageFolder as well, but it requires some tweaking
class ClassificationTestDataset(torch.utils.data.Dataset):

    def __init__(self, data_dir, transforms):
        self.data_dir   = data_dir
        self.transforms = transforms

        # This one-liner basically generates a sorted list of full paths to each image in the test directory
        self.img_paths  = list(map(lambda fname: os.path.join(self.data_dir, fname), sorted(os.listdir(self.data_dir))))

    def __len__(self):
        return len(self.img_paths)
    
    def __getitem__(self, idx):
        return self.transforms(Image.open(self.img_paths[idx]))

# Network

In [14]:
class CNN(torch.nn.Module):
    def __init__(self, K):
        super(CNN, self).__init__()
        self.conv_layers = torch.nn.Sequential(
            # conv1
            torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(32),
            torch.nn.MaxPool2d(2),
            # conv2
            torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
            torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(64),
            torch.nn.MaxPool2d(2),
            # conv3
            torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(128),
            torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(128),
            torch.nn.MaxPool2d(2),
            # conv4
            torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(256),
            torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            torch.nn.ReLU(),
            torch.nn.BatchNorm2d(256),
            torch.nn.MaxPool2d(2),
        )

        self.dense_layers = torch.nn.Sequential(
            torch.nn.Dropout(0.4),
            torch.nn.Linear(50176, 1024),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.4),
            torch.nn.Linear(1024, K),
        )

    def forward(self, X):
        out = self.conv_layers(X)

        # Flatten
        out = out.view(-1, 50176)

        # Fully connected
        out = self.dense_layers(out)

        return out

targets_size =39
model = CNN(targets_size)
model.to(torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))
summary(model, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 224, 224]             896
              ReLU-2         [-1, 32, 224, 224]               0
       BatchNorm2d-3         [-1, 32, 224, 224]              64
            Conv2d-4         [-1, 32, 224, 224]           9,248
              ReLU-5         [-1, 32, 224, 224]               0
       BatchNorm2d-6         [-1, 32, 224, 224]              64
         MaxPool2d-7         [-1, 32, 112, 112]               0
            Conv2d-8         [-1, 64, 112, 112]          18,496
              ReLU-9         [-1, 64, 112, 112]               0
      BatchNorm2d-10         [-1, 64, 112, 112]             128
           Conv2d-11         [-1, 64, 112, 112]          36,928
             ReLU-12         [-1, 64, 112, 112]               0
      BatchNorm2d-13         [-1, 64, 112, 112]             128
        MaxPool2d-14           [-1, 64,

# Setup everything for training

In [15]:
criterion = torch.nn.CrossEntropyLoss()
optimizer_conv = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)


# Let's train!

In [16]:
def train(model, criterion, optimizer, scheduler):

    
    model.train()
    batch_bar = tqdm(total=len(dataloaders['train']), dynamic_ncols=True, leave=False, position=0, desc='Train', ncols=5) 

    running_loss = 0.0
    running_corrects = 0
    
    for i,(inputs, labels) in enumerate(dataloaders['train']):
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            
            # if phase == 'train':
            loss.backward()
            optimizer.step()
                
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)


        scheduler.step()
        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * running_corrects / (config['batch_size']*(i + 1))),
            loss="{:.04f}".format(float(running_loss / (i + 1))),
            running_corrects=running_corrects,
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        # scaler.scale(loss).backward() # This is a replacement for loss.backward()
        # scaler.step(optimizer) # This is a replacement for optimizer.step()
        # scaler.update() 

        # TODO? Depending on your choice of scheduler,
        # You may want to call some schdulers inside the train function. What are these?
      
        batch_bar.update() # Update tqdm bar
        # clear computation cache
        torch.cuda.empty_cache()
        # del images
        # del labels
        # del loss

    batch_bar.close() # You need this to close the tqdm bar

    # acc = 100 * num_correct / (config['batch_size']* len(dataloader))
    # total_loss = float(total_loss / len(dataloader))
    epoch_loss = running_loss / data_size['train']
    epoch_acc = 100*running_corrects.double() / data_size['train']

    return epoch_acc, epoch_loss


In [17]:
def validate(model, criterion, optimizer, scheduler):
  
    model.eval()
    batch_bar = tqdm(total=len(dataloaders['val']), dynamic_ncols=True, position=0, leave=False, desc='Val', ncols=5)

    # model.train()

    running_loss = 0.0
    running_corrects = 0
    
    for i,(inputs, labels) in enumerate(dataloaders['val']):
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        with torch.set_grad_enabled(True):
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            
            # if phase == 'train':
            loss.backward()
            optimizer.step()
                
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    # if phase == 'train':
    # scheduler.step()
                
    # epoch_loss = running_loss / data_size['train']
    # epoch_acc = 100*running_corrects.double() / data_size['train']
            
            # print('{} Loss: {:.4f} Train Acc: {:.4f}'.format(
            #     phase, epoch_loss, epoch_acc))

        # tqdm lets you add some details so you can monitor training as you train.
        batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * running_corrects / (config['batch_size']*(i + 1))),
            loss="{:.04f}".format(float(running_loss / (i + 1))),
            running_corrects=running_corrects,
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
        
        # scaler.scale(loss).backward() # This is a replacement for loss.backward()
        # scaler.step(optimizer) # This is a replacement for optimizer.step()
        # scaler.update() 

        # TODO? Depending on your choice of scheduler,
        # You may want to call some schdulers inside the train function. What are these?
      
        batch_bar.update() # Update tqdm bar
        # clear computation cache
        torch.cuda.empty_cache()
        # del images
        # del labels
        # del loss

    batch_bar.close() # You need this to close the tqdm bar

    # acc = 100 * num_correct / (config['batch_size']* len(dataloader))
    # total_loss = float(total_loss / len(dataloader))
    epoch_loss = running_loss / data_size['val']
    epoch_acc = 100*running_corrects.double() / data_size['val']

    return epoch_acc, epoch_loss

In [18]:
import time
import copy
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
                batch_bar = tqdm(total=len(dataloaders['train']), dynamic_ncols=True, position=0, leave=False, desc='Train', ncols=5)
            else:
                model.eval()
                batch_bar = tqdm(total=len(dataloaders['val']), dynamic_ncols=True, position=0, leave=False, desc='Val', ncols=5)
                
            running_loss = 0.0
            running_corrects = 0
            
            for i,(inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            if phase == 'train':
                scheduler.step()
                
            epoch_loss = running_loss / data_size[phase]
            epoch_acc = running_corrects.double() / data_size[phase]
            
            batch_bar.set_postfix(
            acc="{:.04f}%".format(100 * running_corrects / (config['batch_size']*(i + 1))),
            loss="{:.04f}".format(float(running_loss / (i + 1))),
            running_corrects=running_corrects,
            lr="{:.04f}".format(float(optimizer.param_groups[0]['lr'])))
            
#             print("Val Acc {:.04f}%\t Val Loss {:.04f}".format(val_acc, val_loss))

            wandb.log({"train_loss":epoch_loss, 'train_Acc': epoch_acc})
            
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))
            
            if phase == 'val' and epoch_acc > best_acc:
#                 val_=epoch_acc
                best_acc = epoch_acc
                
                # best_model_wts = copy.deepcopy(model.state_dict())
                print("Saving model")
                torch.save({'model_state_dict':model.state_dict(),
                            'optimizer_state_dict':optimizer_conv.state_dict(),
                            #'scheduler_state_dict':scheduler.state_dict(),
                            'epoch_acc': epoch_acc, 
                            'epoch': epoch}, './checkpointdiseasef.pth')
                # best_valacc = val_acc
                wandb.save('checkpointdiseasef.pth')
                
#             print("Val Acc {:.04f}%\t Val Loss {:.04f}".format(epoch_acc, val_loss))
            if phase=='val':
                wandb.log({'validation_Acc':epoch_acc, 
                   'validation_loss': epoch_loss})
        print()
    
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    
    model.load_state_dict(best_model_wts)
    return model

In [19]:
# criterion = torch.nn.CrossEntropyLoss()
# optimizer_conv = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
# exp_lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

In [20]:
gc.collect() # These commands help you when you face CUDA OOM error
torch.cuda.empty_cache()
# free_gpu_cache()  

# Wandb

In [21]:
wandb.login(key="0df4b2e8d09c7bfafcc61974b29dd9ab7ed479b6") #API Key is in your wandb account, under settings (wandb.ai/settings)

[34m[1mwandb[0m: Currently logged in as: [33msntivugu[0m ([33mruffers[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc


True

In [22]:
# Create your wandb run
run = wandb.init(
    name = "Crop_disease_midterm", ## Wandb creates random run names if you skip this field
#     reinit = True, ### Allows reinitalizing runs when you re-run this cell
    id = '1tex8yye', ###Insert specific run id here if you want to resume a previous run
    resume = True, ### You need this to resume previous runs, but comment out reinit = True when using this
    project = "Crop_disease_midterm", ### Project should be created in your wandb account 
    config = config ### Wandb Config for your run
)

# Experiments

In [None]:
# free_gpu_cache() 

model_resnet = train_model(model, criterion, optimizer_conv, exp_lr_scheduler, num_epochs=25)

Epoch 0/24
----------


Train:   0%|                         | 0/13862 [08:42<?, ?it/s, acc=88.2791%, loss=1.5055, lr=0.0000, running_corrects=tensor(48949, device='cuda:0')]

train Loss: 0.3764 Acc: 0.8828


Val:   0%|                           | 0/13862 [02:30<?, ?it/s, acc=96.6762%, loss=0.5713, lr=0.0000, running_corrects=tensor(53605, device='cuda:0')]

val Loss: 0.1428 Acc: 0.9668
Saving model

Epoch 1/24
----------


Train:   0%|                         | 0/13862 [08:49<?, ?it/s, acc=88.0609%, loss=1.5045, lr=0.0000, running_corrects=tensor(48828, device='cuda:0')]

train Loss: 0.3761 Acc: 0.8806


Val:   0%|                           | 0/13862 [02:30<?, ?it/s, acc=95.2153%, loss=1.5014, lr=0.0000, running_corrects=tensor(52795, device='cuda:0')]

val Loss: 0.3754 Acc: 0.9522

Epoch 2/24
----------


Train:   0%|                         | 0/13862 [08:47<?, ?it/s, acc=88.4522%, loss=1.4693, lr=0.0000, running_corrects=tensor(49045, device='cuda:0')]

train Loss: 0.3673 Acc: 0.8845


Val:   0%|                           | 0/13862 [02:30<?, ?it/s, acc=95.8592%, loss=0.9107, lr=0.0000, running_corrects=tensor(53152, device='cuda:0')]

val Loss: 0.2277 Acc: 0.9586

Epoch 3/24
----------


Train:   0%|                         | 0/13862 [08:47<?, ?it/s, acc=88.1330%, loss=1.5388, lr=0.0000, running_corrects=tensor(48868, device='cuda:0')]

train Loss: 0.3847 Acc: 0.8813


Val:   0%|                           | 0/13862 [02:30<?, ?it/s, acc=96.0251%, loss=0.8433, lr=0.0000, running_corrects=tensor(53244, device='cuda:0')]

val Loss: 0.2108 Acc: 0.9603

Epoch 4/24
----------


Train:   0%|                         | 0/13862 [08:46<?, ?it/s, acc=88.4180%, loss=1.4899, lr=0.0000, running_corrects=tensor(49026, device='cuda:0')]

train Loss: 0.3725 Acc: 0.8842


Val:   0%|                           | 0/13862 [02:30<?, ?it/s, acc=96.4868%, loss=0.6485, lr=0.0000, running_corrects=tensor(53500, device='cuda:0')]

val Loss: 0.1621 Acc: 0.9649

Epoch 5/24
----------


Train:   0%|                         | 0/13862 [08:47<?, ?it/s, acc=88.3368%, loss=1.5062, lr=0.0000, running_corrects=tensor(48981, device='cuda:0')]

train Loss: 0.3765 Acc: 0.8834


Val:   0%|                           | 0/13862 [02:30<?, ?it/s, acc=96.3443%, loss=0.7428, lr=0.0000, running_corrects=tensor(53421, device='cuda:0')]

val Loss: 0.1857 Acc: 0.9634

Epoch 6/24
----------


Train:   0%|                         | 0/13862 [08:47<?, ?it/s, acc=88.0952%, loss=1.5149, lr=0.0000, running_corrects=tensor(48847, device='cuda:0')]

train Loss: 0.3787 Acc: 0.8810


Val:   0%|                           | 0/13862 [02:31<?, ?it/s, acc=96.3010%, loss=0.7150, lr=0.0000, running_corrects=tensor(53397, device='cuda:0')]

val Loss: 0.1787 Acc: 0.9630

Epoch 7/24
----------


Train:   0%|                         | 0/13862 [08:48<?, ?it/s, acc=88.3855%, loss=1.4881, lr=0.0000, running_corrects=tensor(49008, device='cuda:0')]

train Loss: 0.3720 Acc: 0.8839


Val:   0%|                           | 0/13862 [02:31<?, ?it/s, acc=95.1107%, loss=1.4311, lr=0.0000, running_corrects=tensor(52737, device='cuda:0')]

val Loss: 0.3578 Acc: 0.9511

Epoch 8/24
----------


Train:   0%|                         | 0/13862 [08:48<?, ?it/s, acc=88.1565%, loss=1.5177, lr=0.0000, running_corrects=tensor(48881, device='cuda:0')]

train Loss: 0.3794 Acc: 0.8816


Val:   0%|                           | 0/13862 [02:31<?, ?it/s, acc=95.7293%, loss=1.0440, lr=0.0000, running_corrects=tensor(53080, device='cuda:0')]

val Loss: 0.2610 Acc: 0.9573

Epoch 9/24
----------


                                                                                                                                                      

In [None]:
best_valacc = 0.0

for epoch in range(config['epochs']):

    curr_lr = float(optimizer_conv.param_groups[0]['lr'])

    train_acc, train_loss = train(model, criterion, optimizer_conv, exp_lr_scheduler)
    # train_losses, validation_losses = batch_gd(
    # model, criterion, train_loader, validation_loader, 5)
    
    print("\nEpoch {}/{}: \nTrain Acc {:.04f}%\t Train Loss {:.04f}\t Learning Rate {:.04f}".format(
        epoch + 1,
        config['epochs'],
        train_acc,
        train_loss,
        curr_lr))
    
    val_acc, val_loss = validate(model, criterion, optimizer_conv, exp_lr_scheduler)
    
    print("Val Acc {:.04f}%\t Val Loss {:.04f}".format(val_acc, val_loss))

    wandb.log({"train_loss":train_loss, 'train_Acc': train_acc, 'validation_Acc':val_acc, 
               'validation_loss': val_loss, "learning_Rate": curr_lr})
    
    # If you are using a scheduler in your train function within your iteration loop, you may want to log
    # your learning rate differently 

    # #Save model in drive location if val_acc is better than best recorded val_acc
    if val_acc >= best_valacc:
      #path = os.path.join(root, model_directory, 'checkpoint' + '.pth')
      print("Saving model")
      torch.save({'model_state_dict':model.state_dict(),
                  'optimizer_state_dict':optimizer_conv.state_dict(),
                  #'scheduler_state_dict':scheduler.state_dict(),
                  'val_acc': val_acc, 
                  'epoch': epoch}, './drive/MyDrive/checkpointdiseasefv3.pth')
      best_valacc = val_acc
      wandb.save('checkpointdiseasefv2.pth')
      # You may find it interesting to exlplore Wandb Artifcats to version your models
run.finish()

# Classification Task: Testing

In [None]:
# test_results = test(model, test_loader)