# Model for disease prediction on tomato leaves

   Thanks to the dataset PlantVillage, we have access to a dataset with disease on different plant's leaves. Here, we took disease only from tomato in order to prevent the disease. We can try different model. The first one that we tried is the model AlexNet.

In [1]:
import torch
import os
from PIL import Image
import cv2
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
import time
from datetime import datetime


from torch.utils.tensorboard import SummaryWriter

import torchvision.transforms as transforms
import torch
import torch.nn as nn

from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader


data_path = os.path.realpath("PlantVillage")
labels_name = [name[0].split("\\")[-1]  for name in os.walk(data_path) 
               if "Tomato" in name[0]]
directory_paths = [os.path.join(data_path, name) for name in labels_name]

num_classes = len(labels_name)

# Data transformation

# Model creation 

In [2]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', weights="AlexNet_Weights.IMAGENET1K_V1")
model.eval()

Using cache found in C:\Users\yassi/.cache\torch\hub\pytorch_vision_v0.10.0


AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [3]:
model.classifier[6] = nn.Linear(4096, len(labels_name))
model.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [4]:
data_transforms = transforms.Compose([
        transforms.Resize((224,224)),             # resize the input to 224x224
        transforms.ToTensor(),              # put the input to tensor format
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # normalize the input
        # the normalization is based on images from ImageNet
    ])

In [7]:
dataset = ImageFolder(root=data_path, transform=data_transforms)

train_size = int(0.66 * len(dataset))
test_size = int(0.005 * len(dataset))
rest = len(dataset) - train_size - test_size
train_set, test_set, validation_set = torch.utils.data.random_split(dataset, [train_size, test_size, rest])

training_loader = DataLoader(train_set, batch_size=10, shuffle=True)
testing_loader = DataLoader(test_set, batch_size=10, shuffle=True)


In [8]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [9]:
def train_one_epoch(epoch_index, tb_writer, training_loader):
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in tqdm(enumerate(training_loader), total = len(training_loader)):
        # Every data instance is an input + label pair
        inputs, labels = data
        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            tb_x = epoch_index * len(training_loader) + i + 1
            tb_writer.add_scalar('Loss/train', last_loss, tb_x)
            running_loss = 0.

    return last_loss

In [13]:
# Initializing in a separate cell so we can easily add more epochs to the same run
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/plant_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 5

best_vloss = 1_000_000.

for epoch in tqdm(range(EPOCHS)):
    print('EPOCH {}:'.format(epoch_number + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    model.train(True)
    avg_loss = train_one_epoch(epoch_number, writer, training_loader)

    # We don't need gradients on to do reporting
    model.train(False)
    

    running_vloss = 0.0
    for i, vdata in enumerate(testing_loader):
        vinputs, vlabels = vdata
        voutputs = model(vinputs)
        vloss = loss_fn(voutputs, vlabels)
        running_vloss += vloss

    avg_vloss = running_vloss / (i + 1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

    # Log the running loss averaged per batch
    # for both training and validation
    writer.add_scalars('Training vs. Validation Loss',
                    { 'Training' : avg_loss, 'Validation' : avg_vloss },
                    epoch_number + 1)
    writer.flush()

    # Track best performance, and save the model's state
    if avg_vloss < best_vloss:
        best_vloss = avg_vloss
        model_path = 'model_{}_{}'.format(timestamp, epoch_number)
        torch.save(model.state_dict(), model_path)

    epoch_number += 1

  0%|          | 0/5 [00:00<?, ?it/s]

EPOCH 1:


  0%|          | 0/80 [00:00<?, ?it/s]

LOSS train 0.0 valid 0.3934473991394043
EPOCH 2:


  0%|          | 0/80 [00:00<?, ?it/s]

LOSS train 0.0 valid 0.28817296028137207
EPOCH 3:


  0%|          | 0/80 [00:00<?, ?it/s]

LOSS train 0.0 valid 0.4646357595920563
EPOCH 4:


  0%|          | 0/80 [00:00<?, ?it/s]

LOSS train 0.0 valid 0.4125227630138397
EPOCH 5:


  0%|          | 0/80 [00:00<?, ?it/s]

LOSS train 0.0 valid 0.3104753792285919


RuntimeError: File models cannot be opened.

# Evaluate our model:

In [15]:
model_name = "model_20230419_051231_0"

model.load_state_dict(torch.load(model_name))
model.eval()

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [26]:
test_image = "PlantVillage/Tomato_healthy/00bce074-967b-4d50-967a-31fdaa35e688___RS_HL 0223.JPG"

img = Image.open(test_image)

pred = model(data_transforms(img).unsqueeze(0))

In [27]:
pred

tensor([[-3.1607, -4.0064,  0.0470, -1.4172, -2.2360,  1.0947,  6.5668, -5.3828,
         -3.8390, 11.4823]], grad_fn=<AddmmBackward0>)

In [28]:
import numpy as np

print(pred.argmax())

tensor(9)
