# Cassava Leaf Disease Classification

## Import

In [6]:
import numpy as np 
import pandas as pd 
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
import os
import pandas as pd
from torchvision.io import read_image
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

## Data Loading

In [3]:
img_dir = "C:/Users/aakas/cassava-leaf-disease-classification/train_images/"
annotations_file = "C:/Users/aakas/cassava-leaf-disease-classification/train.csv"

In [4]:
class CassavaImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
       

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [7]:
data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224,224)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees = 45),  
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [8]:
cassava_dataset = CassavaImageDataset(annotations_file, img_dir, data_transforms)

In [9]:
train_size = int(0.75 * cassava_dataset.__len__())
test_size = cassava_dataset.__len__() - train_size
train_dataset, test_dataset = torch.utils.data.random_split(cassava_dataset, [train_size, test_size])

In [11]:
print(f"train size: {train_dataset.__len__()}")
print(f"test size: {test_dataset.__len__()}")

train size: 16047
test size: 5350


In [12]:
train_dataloader = DataLoader(train_dataset, batch_size=8, num_workers = 2, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=8, num_workers = 2, shuffle=True)