In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader
from torch import nn, optim

In [None]:
#Google Colab has a weird relationship with local files so I used the Kaggle package to import it with their API
import kagglehub
path = kagglehub.dataset_download("mdwaquarazam/agricultural-crops-image-classification")

print("Path to dataset files:", path)

data_dir = '/root/.cache/kagglehub/datasets/mdwaquarazam/agricultural-crops-image-classification/versions/1/Agricultural-crops'

Path to dataset files: /root/.cache/kagglehub/datasets/mdwaquarazam/agricultural-crops-image-classification/versions/1


In [None]:
#The data I found was not split into respective folders so I needed to break it up myself
input_folder=data_dir
!pip install split-folders
import splitfolders
split_ratio = (0.7,0.2,0.1)
splitfolders.ratio( input_folder,
                  output='/content/sample_data', seed=9,
                  ratio=split_ratio,
                  group_prefix=None,
                  )

target_to_class = {v: k for k, v in ImageFolder(data_dir).class_to_idx.items()}
print(target_to_class)



Copying files: 829 files [00:00, 2554.42 files/s]

{0: 'Cherry', 1: 'Coffee-plant', 2: 'Cucumber', 3: 'Fox_nut(Makhana)', 4: 'Lemon', 5: 'Olive-tree', 6: 'Pearl_millet(bajra)', 7: 'Tobacco-plant', 8: 'almond', 9: 'banana', 10: 'cardamom', 11: 'chilli', 12: 'clove', 13: 'coconut', 14: 'cotton', 15: 'gram', 16: 'jowar', 17: 'jute', 18: 'maize', 19: 'mustard-oil', 20: 'papaya', 21: 'pineapple', 22: 'rice', 23: 'soyabean', 24: 'sugarcane', 25: 'sunflower', 26: 'tea', 27: 'tomato', 28: 'vigna-radiati(Mung)', 29: 'wheat'}





In [None]:
#Defining the transform function to resize images and turn them into tensors
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [None]:
#Breaking folder into datasets and turning those sets into batches through the DataLoader
train_folder = '/content/sample_data/train'
test_folder = '/content/sample_data/test'
valid_folder = '/content/sample_data/val'

train_dataset = torchvision.datasets.ImageFolder(train_folder, transform=transform)
test_dataset = torchvision.datasets.ImageFolder(test_folder, transform=transform)
valid_dataset = torchvision.datasets.ImageFolder(valid_folder, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)

target_to_class = {v: k for k, v in torchvision.datasets.ImageFolder(train_folder).class_to_idx.items()}
print(target_to_class)

{0: 'Cherry', 1: 'Coffee-plant', 2: 'Cucumber', 3: 'Fox_nut(Makhana)', 4: 'Lemon', 5: 'Olive-tree', 6: 'Pearl_millet(bajra)', 7: 'Tobacco-plant', 8: 'almond', 9: 'banana', 10: 'cardamom', 11: 'chilli', 12: 'clove', 13: 'coconut', 14: 'cotton', 15: 'gram', 16: 'jowar', 17: 'jute', 18: 'maize', 19: 'mustard-oil', 20: 'papaya', 21: 'pineapple', 22: 'rice', 23: 'soyabean', 24: 'sugarcane', 25: 'sunflower', 26: 'tea', 27: 'tomato', 28: 'vigna-radiati(Mung)', 29: 'wheat'}


In [None]:
#I tried using EfficientNet-B0 but my loss would not decay over each epoch. I heard ResNet50 is better for image classification
model = models.resnet50(pretrained=True)



In [None]:
#Defining the number of features that the model can look for as output
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(train_dataset.classes))

In [None]:
#Setting up the optimizer Adam and the loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
#Took about an hour to train, but thankfully the loss value is decaying over each iteration.
num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in valid_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Validation Accuracy: {100 * correct / total}%')

Epoch 1/10, Loss: 2.552346494462755
Validation Accuracy: 8.387096774193548%
Epoch 2/10, Loss: 1.8085828555954828
Validation Accuracy: 14.838709677419354%
Epoch 3/10, Loss: 1.4721281958950891
Validation Accuracy: 29.032258064516128%
Epoch 4/10, Loss: 1.2089709407753415
Validation Accuracy: 37.41935483870968%
Epoch 5/10, Loss: 1.0426735215716891
Validation Accuracy: 40.0%
Epoch 6/10, Loss: 0.7684086859226227
Validation Accuracy: 37.41935483870968%
Epoch 7/10, Loss: 0.5762355766362615
Validation Accuracy: 48.38709677419355%
Epoch 8/10, Loss: 0.42229797028832966
Validation Accuracy: 47.74193548387097%
Epoch 9/10, Loss: 0.41490552491611904
Validation Accuracy: 50.32258064516129%
Epoch 10/10, Loss: 0.41345889535215163
Validation Accuracy: 53.54838709677419%
