<a href="https://colab.research.google.com/github/s25337/plant-disease-ai/blob/main/plant_illness_detect.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import kagglehub
import os
import pandas as pd

dataset_path = kagglehub.dataset_download("vipoooool/new-plant-diseases-dataset")
train_dir = os.path.join(dataset_path, "New Plant Diseases Dataset(Augmented)/New Plant Diseases Dataset(Augmented)")

dataset_path_train = os.path.join(train_dir, "train")
dataset_path_valid = os.path.join(train_dir, "valid")

def create_dataset_csv(dataset_dir, output_csv):
    image_paths = []
    labels = []

    for subfolder in os.listdir(dataset_dir):
        subfolder_path = os.path.join(dataset_dir, subfolder)

        if os.path.isdir(subfolder_path):
            label = 'healthy' if subfolder.endswith('healthy') else 'not_healthy'

            for file in os.listdir(subfolder_path):
                if file.endswith(('.jpg', '.png', '.jpeg')):
                    image_path = os.path.join(subfolder_path, file)

                    image_paths.append(image_path)
                    labels.append(label)

    data = pd.DataFrame({
        'image_path': image_paths,
        'label': labels
    })

    data.to_csv(output_csv, index=False)
    print(f"Dataset CSV has been created at {output_csv}.")


create_dataset_csv(dataset_path_train, 'train_labels.csv')
create_dataset_csv(dataset_path_valid, 'valid_labels.csv')

Dataset CSV has been created at train_labels.csv.
Dataset CSV has been created at valid_labels.csv.


In [24]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
import torch.nn as nn
import torch.optim as optim

In [25]:
class PlantDiseaseDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data_frame = pd.read_csv(csv_file)
        self.transform = transform

    def __len__(self):
        return len(self.data_frame)

    def __getitem__(self, idx):
        img_path = self.data_frame.iloc[idx, 0]
        label = self.data_frame.iloc[idx, 1]
        img = Image.open(img_path).convert("RGB")

        #(1 = healthy, 0 = not_healthy)
        label = 1 if label == 'healthy' else 0

        if self.transform:
            img = self.transform(img)

        return img, label

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [26]:
train_dataset = PlantDiseaseDataset(csv_file='train_labels.csv', transform=transform)
valid_dataset = PlantDiseaseDataset(csv_file='valid_labels.csv', transform=transform)

# Create data loaders for batch processing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

In [27]:
def train_model(model, train_loader, valid_loader, num_epochs=10, lr=0.001, device='cuda', model_path="./content/default.pth"):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_accuracy = 100 * correct / total
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.2f}%")

        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_accuracy = 100 * val_correct / val_total
        print(f"Validation Accuracy: {val_accuracy:.2f}%")
    model_path = model_path
    torch.save(model.state_dict(), model_path)
    print(f"Model saved to {model_path}")
    return model

In [28]:
import torch.nn as nn
class RainforestNN(nn.Module):
    def __init__(self):
        super(RainforestNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(128 * 16 * 16, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 2)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(-1, 128 * 16 * 16)
        x = self.classifier(x)
        return x

In [29]:
import torch

device = torch.device("cpu")

print(f"Using device: {device}")
model_rain_forest = RainforestNN()
model = model_rain_forest.to(device)

rain_forest_trained = train_model(model_rain_forest, train_loader, valid_loader, num_epochs=10, lr=0.001, device='cpu', model_path="./content/rain_forest.pth")

Using device: cpu
Epoch [1/10], Loss: 0.1505, Accuracy: 95.67%
Validation Accuracy: 99.17%
Epoch [2/10], Loss: 0.0444, Accuracy: 98.94%
Validation Accuracy: 99.17%
Epoch [3/10], Loss: 0.0490, Accuracy: 99.31%
Validation Accuracy: 99.33%
Epoch [4/10], Loss: 0.0203, Accuracy: 99.63%
Validation Accuracy: 99.83%
Epoch [5/10], Loss: 0.0191, Accuracy: 99.75%
Validation Accuracy: 99.67%
Epoch [6/10], Loss: 0.0460, Accuracy: 99.18%
Validation Accuracy: 98.50%
Epoch [7/10], Loss: 0.0185, Accuracy: 99.63%
Validation Accuracy: 97.83%
Epoch [8/10], Loss: 0.0279, Accuracy: 99.35%
Validation Accuracy: 99.33%
Epoch [9/10], Loss: 0.0370, Accuracy: 99.59%
Validation Accuracy: 99.83%
Epoch [10/10], Loss: 0.0113, Accuracy: 99.80%
Validation Accuracy: 99.50%
Model saved to ./content/rain_forest.pth


In [30]:
os.makedirs(os.path.dirname("./content/rain_forest.pth"), exist_ok=True)

In [31]:
os.makedirs(os.path.dirname("./content/cnn.pth"), exist_ok=True)

In [32]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, 2)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 32 * 32 * 32)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [33]:
model_cnn = SimpleCNN()
model = model_cnn.to(device)
rain_forest_trained = train_model(model_cnn, train_loader, valid_loader, num_epochs=10, lr=0.001, device='cpu', model_path="./content/cnn.pth")

Epoch [1/10], Loss: 0.1067, Accuracy: 96.04%
Validation Accuracy: 99.33%
Epoch [2/10], Loss: 0.0246, Accuracy: 99.51%
Validation Accuracy: 99.50%
Epoch [3/10], Loss: 0.0103, Accuracy: 99.75%
Validation Accuracy: 99.50%
Epoch [4/10], Loss: 0.0133, Accuracy: 99.63%
Validation Accuracy: 99.50%
Epoch [5/10], Loss: 0.0104, Accuracy: 99.75%
Validation Accuracy: 99.67%
Epoch [6/10], Loss: 0.0079, Accuracy: 99.88%
Validation Accuracy: 99.50%
Epoch [7/10], Loss: 0.0006, Accuracy: 100.00%
Validation Accuracy: 99.50%
Epoch [8/10], Loss: 0.0002, Accuracy: 100.00%
Validation Accuracy: 99.50%
Epoch [9/10], Loss: 0.0001, Accuracy: 100.00%
Validation Accuracy: 99.50%
Epoch [10/10], Loss: 0.0001, Accuracy: 100.00%
Validation Accuracy: 99.67%
Model saved to ./content/cnn.pth


In [34]:
!pip install efficientnet_pytorch

from efficientnet_pytorch import EfficientNet

model_efficientnet = EfficientNet.from_pretrained('efficientnet-b0', num_classes=2)

model_efficientnet = model_efficientnet.to(device)

efficientnet_trained = train_model(model_efficientnet, train_loader, valid_loader, num_epochs=10, lr=0.001, device='cpu', model_path="content/efficientnet.pth")

Loaded pretrained weights for efficientnet-b0
Epoch [1/10], Loss: 0.0737, Accuracy: 97.26%
Validation Accuracy: 97.66%
Epoch [2/10], Loss: 0.0149, Accuracy: 99.67%
Validation Accuracy: 98.16%
Epoch [3/10], Loss: 0.0119, Accuracy: 99.55%
Validation Accuracy: 99.33%
Epoch [4/10], Loss: 0.0052, Accuracy: 99.80%
Validation Accuracy: 100.00%
Epoch [5/10], Loss: 0.0151, Accuracy: 99.59%
Validation Accuracy: 99.50%
Epoch [6/10], Loss: 0.0174, Accuracy: 99.43%
Validation Accuracy: 99.33%
Epoch [7/10], Loss: 0.0068, Accuracy: 99.84%
Validation Accuracy: 99.83%
Epoch [8/10], Loss: 0.0031, Accuracy: 99.88%
Validation Accuracy: 99.67%
Epoch [9/10], Loss: 0.0139, Accuracy: 99.55%
Validation Accuracy: 100.00%
Epoch [10/10], Loss: 0.0029, Accuracy: 99.96%
Validation Accuracy: 100.00%
Model saved to content/efficientnet.pth


In [35]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from matplotlib.backends.backend_pdf import PdfPages

In [36]:
loaded_model_rain_forest = RainforestNN() 
loaded_model_rain_forest.load_state_dict(torch.load("./content/rain_forest.pth"))  # Load weights
loaded_model_rain_forest = loaded_model_rain_forest.to(device)  
loaded_model_rain_forest.eval()  # evaluation mode
loaded_model_cnn = SimpleCNN()  # new instance
loaded_model_cnn.load_state_dict(torch.load("./content/cnn.pth"))  # Load the saved weights
loaded_model_cnn = loaded_model_cnn.to(device)  # Move the loaded model to the device
loaded_model_cnn.eval()  # evaluation mode
test_loader = valid_loader
loaded_model_efficientnet = model_efficientnet
loaded_model_efficientnet.load_state_dict(torch.load("content/efficientnet.pth"))
loaded_model_efficientnet = loaded_model_efficientnet.to(device)
loaded_model_efficientnet.eval()

  loaded_model_rain_forest.load_state_dict(torch.load("./content/rain_forest.pth"))  # Load the saved weights
  loaded_model_cnn.load_state_dict(torch.load("./content/cnn.pth"))  # Load the saved weights
  loaded_model_efficientnet.load_state_dict(torch.load("content/efficientnet.pth"))


EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d((0, 1, 0, 1))
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d((1, 1, 1, 1))
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
    

In [37]:
y_pred_rain_forest = []
y_true_rain_forest = []
for images, labels in test_loader:
  images = images.to(device)  # Move images to the same device as the model
  labels = labels.to(device)
  outputs = loaded_model_rain_forest(images)
  _, predicted = torch.max(outputs, 1)
  y_pred_rain_forest.extend(predicted.tolist())
  y_true_rain_forest.extend(labels.tolist())

# Uzyskaj predykcje z model2
y_pred_cnn = []
y_true_cnn = []
for images, labels in test_loader:
  images = images.to(device)  # Move images to the same device as the model
  labels = labels.to(device)
  outputs = loaded_model_cnn(images)
  _, predicted = torch.max(outputs, 1)
  y_pred_cnn.extend(predicted.tolist())
  y_true_cnn.extend(labels.tolist())
  
y_pred_eficientnet = []
y_true_efficientnet = []
for images, labels in test_loader:
  images = images.to(device)
  labels = labels.to(device)
  outputs = loaded_model_cnn(images)
  _, predicted = torch.max(outputs, 1)
  y_pred_eficientnet.extend(predicted.tolist())
  y_true_efficientnet.extend(labels.tolist())

In [38]:
cm1 = confusion_matrix(y_true_rain_forest, y_pred_rain_forest)
cm2 = confusion_matrix(y_true_cnn, y_pred_cnn)
cm3 = confusion_matrix(y_true_efficientnet, y_pred_eficientnet)

In [39]:
with PdfPages('./content/confusion_matrices.pdf') as pdf:
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm1, annot=True, fmt="d", cmap="RdPu")
    plt.title("Macierz błędów dla RainForest")
    plt.xlabel("Predykcje")
    plt.ylabel("Rzeczywiste wartości")
    #plt.show()
    pdf.savefig() 
    plt.close()
    
    # Wyświetl macierz błędów dla model2
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm2, annot=True, fmt="d", cmap="RdPu")
    plt.title("Macierz błędów dla CNN")
    plt.xlabel("Predykcje")
    plt.ylabel("Rzeczywiste wartości")
    #plt.show()
    pdf.savefig() 
    plt.close()
    
    
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm3, annot=True, fmt="d", cmap="RdPu")
    plt.title("Macierz błędów dla EfficientNet")
    plt.xlabel("Predykcje")
    plt.ylabel("Rzeczywiste wartości")
    #plt.show()
    pdf.savefig()
    plt.close()