In [None]:
# notebooks/training.ipynb

import os
import pandas as pd
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import models
import torch.nn as nn
import torch.optim as optim
from src.preprocessing import SoilDataset, get_transforms

# Set paths
train_csv_path = "/kaggle/input/soil-classification/soil_classification-2025/train_labels.csv"
train_dir = "/kaggle/input/soil-classification/soil_classification-2025/train/"
model_path = "/kaggle/working/soil_resnet18.pth"

# Load labels
labels_df = pd.read_csv(train_csv_path)

# Create dataset and dataloader
transform = get_transforms(train=True)
dataset = SoilDataset(train_dir, labels_df, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Load model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 4)
model = model.to("cuda" if torch.cuda.is_available() else "cpu")

# Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 5
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to("cuda"), labels.to("cuda")
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(dataloader):.4f}")

# Save model
torch.save(model.state_dict(), model_path)
print("Model saved at", model_path)
