In [14]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from matplotlib import image as mp_image
import seaborn as sns

# Required magic to display matplotlib plots in notebooks
%matplotlib inline

from sklearn import metrics
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import shutil
#for dirname, _, filenames in os.walk('/kaggle/input'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [16]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import confusion_matrix
import seaborn as sns
import pandas as pd
import os
from PIL import Image
import shutil

# Set the path to the SenMangoFruitDDS dataset
training_folder_name = '/kaggle/input/mangofruitdds/MangoFruitDDS/SenMangoFruitDDS_original'
# Define the classes based on the diseases and "Healthy" category
classes = ['Alternaria', 'Anthracnose', 'Black_Mould_Rot', 'Stem_and_Rot', 'Healthy']

# Set image size for the CNN model
img_size = (128, 128)

# Create a folder for resized images
train_folder = '/kaggle/working/Mango'

In [12]:
# Function to resize image
def resize_image(src_image, size=(128, 128), bg_color="white"):
    src_image.thumbnail(size, Image.LANCZOS)
    new_image = Image.new("RGB", size, bg_color)
    new_image.paste(src_image, (int((size[0] - src_image.size[0]) / 2), int((size[1] - src_image.size[1]) / 2)))
    return new_image

# Create resized copies of all the images
size = (128, 128)

if os.path.exists(train_folder):
    shutil.rmtree(train_folder)

for root, folders, files in os.walk(training_folder_name):
    for sub_folder in folders:
        save_folder = os.path.join(train_folder, sub_folder)
        if not os.path.exists(save_folder):
            os.makedirs(save_folder)
        file_names = os.listdir(os.path.join(root, sub_folder))
        for file_name in file_names:
            file_path = os.path.join(root, sub_folder, file_name)
            image = Image.open(file_path)
            resized_image = resize_image(image, size)
            save_as = os.path.join(save_folder, file_name)
            resized_image.save(save_as)

In [13]:
# Data augmentation and preprocessing
data_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Load the dataset
full_dataset = torchvision.datasets.ImageFolder(
    root=train_folder,
    transform=data_transform
)

# Split into training and testing datasets
train_size = int(0.7 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/working/data'

In [None]:
# Define the CNN model
class Net(nn.Module):
    def __init__(self, num_classes=5):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2)
        self.drop = nn.Dropout2d(p=0.2)
        self.fc = nn.Linear(in_features=32 * 32 * 24, out_features=num_classes)

    def forward(self, x):
        x = F.relu(self.pool(self.conv1(x)))
        x = F.relu(self.pool(self.conv2(x)))
        x = F.dropout(self.drop(x), training=self.training)
        x = x.view(-1, 32 * 32 * 24)
        x = self.fc(x)
        return torch.log_softmax(x, dim=1)

# Instantiate the model
model = Net(num_classes=len(classes))

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set up the optimizer and loss criterion
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_criteria = nn.CrossEntropyLoss()

# TRAINING FUNCTION

In [None]:
# Training function
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    train_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_criteria(output, target)
        train_loss += loss.item()
        loss.backward()
        optimizer.step()
    avg_loss = train_loss / len(train_loader)
    print('Epoch {}: Train set: Average loss: {:.6f}'.format(epoch, avg_loss))
    return avg_loss

# TESTING FUNCTION

In [None]:
# Testing function
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += loss_criteria(output, target).item()
            _, predicted = torch.max(output.data, 1)
            correct += torch.sum(target == predicted).item()
    avg_loss = test_loss / len(test_loader)
    accuracy = correct / len(test_loader.dataset)
    print('Test set: Average loss: {:.6f}, Accuracy: {:.2f}%'.format(avg_loss, accuracy * 100))
    return avg_loss

# TRAINING LOOP

In [None]:
# Training loop
epochs = 10
epoch_nums = []
training_loss = []
validation_loss = []

for epoch in range(1, epochs + 1):
    train_loss = train(model, device, train_loader, optimizer, epoch)
    test_loss = test(model, device, test_loader)
    epoch_nums.append(epoch)
    training_loss.append(train_loss)
    validation_loss.append(test_loss)

# PLOTTING LOSS HISTORY

In [None]:
# Plot loss history
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.plot(epoch_nums, training_loss, label='Training Loss')
plt.plot(epoch_nums, validation_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# CONFUSION MATRIX

In [None]:
# Confusion matrix
truelabels = []
predictions = []
model.eval()
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        truelabels.extend(target.cpu().numpy())
        predictions.extend(output.argmax(dim=1).cpu().numpy())
        
# Plot confusion matrix
cm = confusion_matrix(truelabels, predictions, labels=range(len(classes)))
df_cm = pd.DataFrame(cm, index=classes, columns=classes)
plt.figure(figsize=(8, 6))
sns.heatmap(df_cm, annot=True, fmt='g', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Sample Images

In [None]:
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Specify the main folder containing the subdirectories
main_folder = '/kaggle/working/Mango'

# Specify the subdirectories
subdirectories = ['Alternaria', 'Anthracnose', 'Black Mould Rot', 'Stem end Rot', 'Healthy']

# Display three sample images from each subdirectory
plt.figure(figsize=(15, 15))

for i, subdir in enumerate(subdirectories):
    subdir_path = os.path.join(main_folder, subdir)
    image_files = [f for f in os.listdir(subdir_path) if f.endswith(('.jpg', '.jpeg', '.png'))][:3]
    
    for j, image_file in enumerate(image_files):
        img_path = os.path.join(subdir_path, image_file)
        img = mpimg.imread(img_path)

        # Calculate the position in the subplot grid
        position = i * 3 + j + 1

        plt.subplot(5, 3, position)
        plt.imshow(img)
        plt.title(f"{subdir}\n{image_file}")
        plt.axis('off')

plt.show()

