In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torchvision import models
from torch.utils.data import DataLoader
from torch import nn, optim

In [3]:
#Google Colab has a weird relationship with local files so I used the Kaggle package to import it with their API
import kagglehub
path = kagglehub.dataset_download("mdwaquarazam/agricultural-crops-image-classification")

print("Path to dataset files:", path)

data_dir = '/root/.cache/kagglehub/datasets/mdwaquarazam/agricultural-crops-image-classification/versions/1/Agricultural-crops'

Downloading from https://www.kaggle.com/api/v1/datasets/download/mdwaquarazam/agricultural-crops-image-classification?dataset_version_number=1...


100%|██████████| 79.0M/79.0M [00:04<00:00, 19.8MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/mdwaquarazam/agricultural-crops-image-classification/versions/1


In [4]:
#The data I found was not split into respective folders so I needed to break it up myself
input_folder=data_dir
!pip install split-folders
import splitfolders
split_ratio = (0.7,0.2,0.1)
splitfolders.ratio( input_folder,
                  output='/content/sample_data', seed=9,
                  ratio=split_ratio,
                  group_prefix=None,
                  )

target_to_class = {v: k for k, v in ImageFolder(data_dir).class_to_idx.items()}
print(target_to_class)

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl.metadata (6.2 kB)
Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


Copying files: 829 files [00:00, 2936.25 files/s]

{0: 'Cherry', 1: 'Coffee-plant', 2: 'Cucumber', 3: 'Fox_nut(Makhana)', 4: 'Lemon', 5: 'Olive-tree', 6: 'Pearl_millet(bajra)', 7: 'Tobacco-plant', 8: 'almond', 9: 'banana', 10: 'cardamom', 11: 'chilli', 12: 'clove', 13: 'coconut', 14: 'cotton', 15: 'gram', 16: 'jowar', 17: 'jute', 18: 'maize', 19: 'mustard-oil', 20: 'papaya', 21: 'pineapple', 22: 'rice', 23: 'soyabean', 24: 'sugarcane', 25: 'sunflower', 26: 'tea', 27: 'tomato', 28: 'vigna-radiati(Mung)', 29: 'wheat'}





In [5]:
#Defining the transform function to resize images and turn them into tensors
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [6]:
#Breaking folder into datasets and turning those sets into batches through the DataLoader
train_folder = '/content/sample_data/train'
test_folder = '/content/sample_data/test'
valid_folder = '/content/sample_data/val'

train_dataset = torchvision.datasets.ImageFolder(train_folder, transform=transform)
test_dataset = torchvision.datasets.ImageFolder(test_folder, transform=transform)
valid_dataset = torchvision.datasets.ImageFolder(valid_folder, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

target_to_class = {v: k for k, v in torchvision.datasets.ImageFolder(train_folder).class_to_idx.items()}
print(target_to_class)

{0: 'Cherry', 1: 'Coffee-plant', 2: 'Cucumber', 3: 'Fox_nut(Makhana)', 4: 'Lemon', 5: 'Olive-tree', 6: 'Pearl_millet(bajra)', 7: 'Tobacco-plant', 8: 'almond', 9: 'banana', 10: 'cardamom', 11: 'chilli', 12: 'clove', 13: 'coconut', 14: 'cotton', 15: 'gram', 16: 'jowar', 17: 'jute', 18: 'maize', 19: 'mustard-oil', 20: 'papaya', 21: 'pineapple', 22: 'rice', 23: 'soyabean', 24: 'sugarcane', 25: 'sunflower', 26: 'tea', 27: 'tomato', 28: 'vigna-radiati(Mung)', 29: 'wheat'}


In [7]:
#I tried using EfficientNet-B0 but my loss would not decay over each epoch. I heard ResNet50 is better for image classification
model = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 134MB/s]


In [8]:
#Defining the number of features that the model can look for as output
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_dataset.classes))

In [9]:
#Setting up the optimizer Adam and the loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [10]:
#Took about 2 hours to train (might need to add cuda), but thankfully the loss value is decaying over each iteration.
#The model seems to have hit a wall in terms of training. I may need to tweek the weights and rerun
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total}%')

Epoch 1/10, Loss: 3.0212834676106772
Test Accuracy: 14.285714285714286%
Epoch 2/10, Loss: 2.1703756716516285
Test Accuracy: 27.61904761904762%
Epoch 3/10, Loss: 1.733240630891588
Test Accuracy: 23.80952380952381%
Epoch 4/10, Loss: 1.540731085671319
Test Accuracy: 22.857142857142858%
Epoch 5/10, Loss: 1.0892149209976196
Test Accuracy: 44.76190476190476%
Epoch 6/10, Loss: 0.8015559795829985
Test Accuracy: 38.095238095238095%
Epoch 7/10, Loss: 0.5086444583204057
Test Accuracy: 51.42857142857143%
Epoch 8/10, Loss: 0.5500633551014794
Test Accuracy: 34.285714285714285%
Epoch 9/10, Loss: 0.49004090080658597
Test Accuracy: 44.76190476190476%
Epoch 10/10, Loss: 0.4639886799785826
Test Accuracy: 42.857142857142854%
