## Plant disease detection using a Fine tuned ResNet50

----------------

Defining the path to the Dataset

In [1]:
path = '/kaggle/input/plantvillage-dataset/color'

In [2]:
from torchvision import datasets

# Load the full dataset without transformations
full_dataset = datasets.ImageFolder(
    root=path,
    transform=None  # No transformations applied yet
)

In [3]:
import torch

# Define the sizes for the splits
train_size = int(0.6 * len(full_dataset))# Train the model
val_size = int(0.2 * len(full_dataset))
test_size = len(full_dataset) - train_size - val_size

# Split the dataset
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    full_dataset, [train_size, val_size, test_size]
)

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")

Training set size: 32583
Validation set size: 10861
Test set size: 10861


In [4]:
from torchvision import transforms

# Define transformations for training set (with data augmentation)
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Define transformations for validation and test sets (basic preprocessing)
val_test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [10]:
from torchvision.datasets import ImageFolder
from torch.utils.data import Subset

# Function to apply transformations to a subset
def apply_transform(subset, transform):
    # Create a new dataset with the same samples but new transform
    dataset = ImageFolder(root=full_dataset.root, transform=transform)
    return Subset(dataset, subset.indices)

# Apply transformations to each subset
train_dataset = apply_transform(train_dataset, train_transform)
val_dataset = apply_transform(val_dataset, val_test_transform)
test_dataset = apply_transform(test_dataset, val_test_transform)

In [6]:
batch_size = 32  # Adjust as needed

# Create data loaders
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, num_workers=2
)

val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False, num_workers=2
)

test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, num_workers=2
)

In [11]:
from torchvision import models

# Load a pre-trained ResNet model
model = models.resnet50(pretrained=True)

# Modify the final fully connected layer to match the number of classes
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 39)  # 38 classes and one folder for non leave images

# Move the model to the GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [12]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#optimizer = torch.optim.Adagrad(model.parameters(), lr= 0.001)

In [14]:
import csv
import time

def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=25):
    # Prepare to store results
    train_size = len(train_loader.dataset)
    val_size = len(val_loader.dataset)
    results_file = "training_log.csv"
    header = ['epoch', 'train_loss', 'train_acc', 'val_loss', 'val_acc', 'time_elapsed']

    # Open CSV and write the header
    with open(results_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(header)

    for epoch in range(num_epochs):
        start_time = time.time()
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)

        # Track metrics for both phases
        metrics = {'train_loss': 0.0, 'train_acc': 0.0, 'val_loss': 0.0, 'val_acc': 0.0}
        
        for phase, loader in zip(['train', 'val'], [train_loader, val_loader]):
            if phase == 'train':
                model.train()
            else:
                model.eval()
        
            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in loader:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Update metrics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

        
            # Finalize metrics for the epoch
            epoch_loss = running_loss / (train_size if phase == 'train' else val_size)
            epoch_acc = running_corrects.double() / (train_size if phase == 'train' else val_size)
            metrics[f'{phase}_loss'] = epoch_loss
            metrics[f'{phase}_acc'] = epoch_acc

            
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')


        # Record time and write to CSV
        time_elapsed = time.time() - start_time
        with open(results_file, 'a', newline='') as f:
            writer = csv.writer(f)
            writer.writerow([epoch + 1, metrics['train_loss'], metrics['train_acc'],
                             metrics['val_loss'], metrics['val_acc'], time_elapsed])

    print(f"\nTraining complete! Results saved to {results_file}")
    return model


In [None]:
# Train the model
model = train_model(
    model, criterion, optimizer,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=10
)

Epoch 1/10
----------
train Loss: 0.3284 Acc: 0.9102
val Loss: 0.0627 Acc: 0.9803
Epoch 2/10
----------
train Loss: 0.1122 Acc: 0.9676
val Loss: 0.0439 Acc: 0.9874
Epoch 3/10
----------
train Loss: 0.0878 Acc: 0.9743
val Loss: 0.0451 Acc: 0.9852
Epoch 4/10
----------
train Loss: 0.0717 Acc: 0.9801
val Loss: 0.0299 Acc: 0.9913
Epoch 5/10
----------
train Loss: 0.0637 Acc: 0.9815
val Loss: 0.0277 Acc: 0.9913
Epoch 6/10
----------
train Loss: 0.0560 Acc: 0.9836
val Loss: 0.0260 Acc: 0.9926
Epoch 7/10
----------
train Loss: 0.0537 Acc: 0.9846
val Loss: 0.0265 Acc: 0.9913
Epoch 8/10
----------
train Loss: 0.0474 Acc: 0.9860
train Loss: 0.0454 Acc: 0.9866
val Loss: 0.0195 Acc: 0.9945
Epoch 10/10
----------


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Set the model to evaluation mode
model.eval()

# Initialize metrics tracking
test_loss = 0.0
test_correct = 0
test_total = 0

# Lists to store all predictions and labels
all_preds = []
all_labels = []

# Disable gradient computation for evaluation
with torch.no_grad():
    for inputs, labels in val_loader:
        # Move inputs and labels to the same device as the model
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Update test loss and accuracy
        test_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()

        # Store predictions and labels for metric calculation
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Convert lists to numpy arrays
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# Calculate metrics
test_accuracy = 100 * test_correct / test_total
test_loss /= len(val_loader)

# Precision, Recall, F1-Score
precision = precision_score(all_labels, all_preds, average='weighted') 
recall = recall_score(all_labels, all_preds, average='weighted')      
f1 = f1_score(all_labels, all_preds, average='weighted')              

# Print metrics
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%")
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

# Save results to a file
with open("evaluation_results_momentum.txt", "w") as f:
    f.write("Metric\tValue\n")
    f.write(f"Test Loss\t{test_loss:.4f}\n")
    f.write(f"Test Accuracy\t{test_accuracy:.2f}%\n")
    f.write(f"Precision\t{precision:.4f}\n")
    f.write(f"Recall\t{recall:.4f}\n")
    f.write(f"F1-Score\t{f1:.4f}\n")

print("Evaluation results saved to 'evaluation_results.txt'")

In [None]:
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd

# Set the model to evaluation mode
model.eval()

class_names = full_dataset.classes
# Initialize variables to store results
all_labels = []
all_predictions = []

# Disable gradient computation for evaluation
with torch.no_grad():
    for inputs, labels in test_loader:
        # Move inputs and labels to the same device as the model
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)

        # Predictions
        _, predicted = torch.max(outputs, 1)

        # Collect labels and predictions for metric calculation
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

# Calculate accuracy
accuracy = accuracy_score(all_labels, all_predictions)

# Generate classification report for each class
report = classification_report(
    all_labels, all_predictions, target_names=class_names, output_dict=True
)

# Convert the report to a DataFrame for easier saving
report_df = pd.DataFrame(report).transpose()

# Save the report to a file
report_df.to_csv("class_specific_metrics.csv", index=True)

# Print accuracy and classification report
print(f"Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(report_df)