A simplified notebook calling the modular pipeline.

1: Fix Import Path

In [4]:
import sys
import os

# Add the root directory to sys.path so modules from src can be imported
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)


 2: Imports

In [5]:
from src.preprocessing import load_data, get_transforms, SoilDataset, split_data
from src.model import get_resnet_model
from src.train import train_model
from torch.utils.data import DataLoader
import torch


3: Set Paths and Load Data

In [8]:
# Paths
TRAIN_CSV = "../data/soil_classification-2025/train_labels.csv"
TRAIN_DIR = "../data/soil_classification-2025/train"

# Load and prepare data
df, label_encoder, class_names = load_data(TRAIN_CSV)
train_df, val_df = split_data(df)
train_transform, val_transform = get_transforms()


4: Create Datasets and DataLoaders

In [9]:
# Create Dataset objects
train_dataset = SoilDataset(train_df, TRAIN_DIR, transform=train_transform)
val_dataset = SoilDataset(val_df, TRAIN_DIR, transform=val_transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


5: Initialize Model and Device

In [10]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create model
model = get_resnet_model(num_classes=len(class_names))




 6: Train the Model

In [11]:
# Train
train_model(model, train_loader, val_loader, device, num_epochs=5)


Epoch [1/5], Loss: 0.4480, Min F1: 0.8696
✅ Model saved.
Epoch [2/5], Loss: 0.1494, Min F1: 0.9500
✅ Model saved.
Epoch [3/5], Loss: 0.0818, Min F1: 0.9195
Epoch [4/5], Loss: 0.0644, Min F1: 0.9610
✅ Model saved.
Epoch [5/5], Loss: 0.0563, Min F1: 0.9512
