# Setup

In [12]:
import os
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Data Loader

In [13]:
# Define transformations
transform_inception = transforms.Compose([
    transforms.Resize(299),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_vit = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [14]:
# Load dataset
data_dir = './potato'
train_dir = os.path.join(data_dir, 'Train')
test_dir = os.path.join(data_dir, 'Test')

In [15]:
train_dataset_inception = datasets.ImageFolder(train_dir, transform=transform_inception)
train_dataset_vit = datasets.ImageFolder(train_dir, transform=transform_vit)

test_dataset_inception = datasets.ImageFolder(test_dir, transform=transform_inception)
test_dataset_vit = datasets.ImageFolder(test_dir, transform=transform_vit)

# Create data loaders
train_loader_inception = DataLoader(train_dataset_inception, batch_size=32, shuffle=True)
train_loader_vit = DataLoader(train_dataset_vit, batch_size=32, shuffle=True)

test_loader_inception = DataLoader(test_dataset_inception, batch_size=32, shuffle=False)
test_loader_vit = DataLoader(test_dataset_vit, batch_size=32, shuffle=False)

# Inception V3

In [17]:
# Load pre-trained InceptionV3 model
model_inception = models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT)

# Modify the classifier for the dataset
num_classes = len(train_dataset_inception.classes)
model_inception.fc = nn.Linear(model_inception.fc.in_features, num_classes)

# Move model to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_inception.to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_inception = torch.optim.Adam(model_inception.parameters(), lr=0.001)

# Train InceptionV3
for epoch in range(10):
    model_inception.train()
    for images, labels in train_loader_inception:
        images, labels = images.to(device), labels.to(device)
        optimizer_inception.zero_grad()
        outputs = model_inception(images)
        if isinstance(outputs, tuple):
            outputs = outputs[0]  # Select primary output
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_inception.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 0.808927059173584
Epoch 2, Loss: 0.08275655657052994
Epoch 3, Loss: 0.22340166568756104
Epoch 4, Loss: 0.5032994747161865
Epoch 5, Loss: 3.0533785820007324
Epoch 6, Loss: 0.004764764104038477
Epoch 7, Loss: 8.379476639674976e-05
Epoch 8, Loss: 0.17812642455101013
Epoch 9, Loss: 2.1143958568573
Epoch 10, Loss: 0.023470256477594376


In [18]:
# Evaluate InceptionV3
model_inception.eval()
predicted_classes_list = []
labels_list = []

with torch.no_grad():
    for images, labels in test_loader_inception:
        images, labels = images.to(device), labels.to(device)
        outputs = model_inception(images)
        if isinstance(outputs, tuple):
            outputs = outputs[0]
        predicted_classes = torch.argmax(outputs, dim=1)
        predicted_classes_list.extend(predicted_classes.cpu().numpy())
        labels_list.extend(labels.cpu().numpy())

# Compute evaluation metrics
accuracy = accuracy_score(labels_list, predicted_classes_list)
print(f"InceptionV3 Accuracy: {accuracy}")

report = classification_report(labels_list, predicted_classes_list)
print("InceptionV3 Classification Report:\n", report)

conf_mat = confusion_matrix(labels_list, predicted_classes_list)
print("InceptionV3 Confusion Matrix:\n", conf_mat)

InceptionV3 Accuracy: 0.9433333333333334
InceptionV3 Classification Report:
               precision    recall  f1-score   support

           0       0.92      1.00      0.96       100
           1       1.00      0.83      0.91       100
           2       0.93      1.00      0.96       100

    accuracy                           0.94       300
   macro avg       0.95      0.94      0.94       300
weighted avg       0.95      0.94      0.94       300

InceptionV3 Confusion Matrix:
 [[100   0   0]
 [  9  83   8]
 [  0   0 100]]


# ViT

In [22]:
# Load pre-trained ViT model
model_vit = models.vit_b_16(weights=models.ViT_B_16_Weights.DEFAULT)

# Modify the classifier for the dataset
num_classes = len(train_dataset_vit.classes)
model_vit.heads.head = nn.Linear(model_vit.heads.head.in_features, num_classes)

# Move model to device
model_vit.to(device)

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer_vit = torch.optim.Adam(model_vit.parameters(), lr=0.001)

# Train ViT
for epoch in range(10):
    model_vit.train()
    for images, labels in train_loader_vit:
        images, labels = images.to(device), labels.to(device)
        optimizer_vit.zero_grad()
        outputs = model_vit(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_vit.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 0.5770184397697449
Epoch 2, Loss: 0.8810605406761169
Epoch 3, Loss: 0.37967926263809204
Epoch 4, Loss: 0.08804091066122055
Epoch 5, Loss: 0.6425436735153198
Epoch 6, Loss: 0.05353998765349388
Epoch 7, Loss: 0.1216326355934143
Epoch 8, Loss: 0.019034769386053085
Epoch 9, Loss: 0.002046598121523857
Epoch 10, Loss: 0.005959389731287956


In [23]:
# Evaluate ViT
model_vit.eval()
predicted_classes_list = []
labels_list = []

with torch.no_grad():
    for images, labels in test_loader_vit:
        images, labels = images.to(device), labels.to(device)
        outputs = model_vit(images)
        predicted_classes = torch.argmax(outputs, dim=1)
        predicted_classes_list.extend(predicted_classes.cpu().numpy())
        labels_list.extend(labels.cpu().numpy())

# Compute evaluation metrics
accuracy = accuracy_score(labels_list, predicted_classes_list)
print(f"ViT Accuracy: {accuracy}")

report = classification_report(labels_list, predicted_classes_list)
print("ViT Classification Report:\n", report)

conf_mat = confusion_matrix(labels_list, predicted_classes_list)
print("ViT Confusion Matrix:\n", conf_mat)

ViT Accuracy: 0.9033333333333333
ViT Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.85      0.90       100
           1       0.85      0.87      0.86       100
           2       0.90      0.99      0.94       100

    accuracy                           0.90       300
   macro avg       0.91      0.90      0.90       300
weighted avg       0.91      0.90      0.90       300

ViT Confusion Matrix:
 [[85 14  1]
 [ 3 87 10]
 [ 0  1 99]]


# Ensemble

In [25]:
def predict_ensemble(images_inception, images_vit):
    images_inception, images_vit = images_inception.to(device), images_vit.to(device)
    
    # Ensure both batches have the same size
    batch_size = min(images_inception.shape[0], images_vit.shape[0])
    images_inception = images_inception[:batch_size]
    images_vit = images_vit[:batch_size]
    
    output_inception = model_inception(images_inception)
    if isinstance(output_inception, tuple):
        output_inception = output_inception[0]  # Select the primary output
    output_vit = model_vit(images_vit)
    
    # Calculate weighted average (example weights: 0.6 for Inception, 0.4 for ViT)
    weights = [0.6, 0.4]
    output_inception_softmax = torch.nn.functional.softmax(output_inception, dim=1)
    output_vit_softmax = torch.nn.functional.softmax(output_vit, dim=1)
    
    output = weights[0] * output_inception_softmax + weights[1] * output_vit_softmax
    
    return torch.argmax(output, dim=1)

class_names = ['early_blight', 'healthy', 'late_blight']
predicted_classes_list = []
labels_list = []

# Example usage with aligned batches
test_loader_inception_iter = iter(test_loader_inception)
test_loader_vit_iter = iter(test_loader_vit)

for _ in range(len(test_loader_inception)):
    images_inception, labels_inception = next(test_loader_inception_iter)
    images_vit, labels_vit = next(test_loader_vit_iter)
    
    predicted_classes = predict_ensemble(images_inception, images_vit)
    
    # Append predictions and labels to lists
    predicted_classes_list.extend(predicted_classes.cpu().numpy())
    labels_list.extend(labels_inception.cpu().numpy())
    

# Evaluation Metrics

In [26]:
accuracy = accuracy_score(labels_list, predicted_classes_list)
print(f"Accuracy: {accuracy}")

# Compute classification report
report = classification_report(labels_list, predicted_classes_list)
print("Classification Report:\n", report)

# Compute confusion matrix
conf_mat = confusion_matrix(labels_list, predicted_classes_list)
print("Confusion Matrix:\n", conf_mat)

Accuracy: 0.9766666666666667
Classification Report:
               precision    recall  f1-score   support

           0       0.96      1.00      0.98       100
           1       1.00      0.93      0.96       100
           2       0.97      1.00      0.99       100

    accuracy                           0.98       300
   macro avg       0.98      0.98      0.98       300
weighted avg       0.98      0.98      0.98       300

Confusion Matrix:
 [[100   0   0]
 [  4  93   3]
 [  0   0 100]]
