In [None]:
!pip install torchmetrics --quiet

In [None]:
!pip install opendatasets --quiet
import opendatasets as od
od.download("https://www.kaggle.com/datasets/mssmartypants/rice-type-classification")

In [None]:
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import torchmetrics  # For better metric calculation
import matplotlib.pyplot as plt

# Device setup
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

## Data Loading

In [None]:
# Load
data_df = pd.read_csv("/content/rice-type-classification/riceClassification.csv")
data_df = data_df.drop('id', axis=1)

# Separate features (X) and target (y)
X = data_df.drop('Class', axis=1)
y = data_df['Class']

# Create a 3-way split (Train, Validation, Test)
# Note: stratify=y is CRITICAL. It ensures the class balance (55/45)
# is preserved in all three of your splits.
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, stratify=y_temp, random_state=42)

print(f"Train shape: {X_train.shape}, Val shape: {X_val.shape}, Test shape: {X_test.shape}")

## Feature Engineering

# 1. Define the preprocessing pipeline
#    For this data, we just need to scale all numeric features.
preprocessor = Pipeline([
    ('scaler', StandardScaler())
])

# 2. Fit the pipeline ONLY on the training data
X_train_scaled = preprocessor.fit_transform(X_train)

# 3. Transform the val and test data using the *same* (fitted) pipeline
X_val_scaled = preprocessor.transform(X_val)
X_test_scaled = preprocessor.transform(X_test)

In [None]:
print(type(y_train.values))
print(type(y_train))

## Scalable Dataloaders

In [None]:
# Convert numpy arrays to torch tensors
# We keep them as float32 for the model and long for the labels
X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_t = torch.tensor(y_train.values, dtype=torch.float32)
X_val_t = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_t = torch.tensor(y_val.values, dtype=torch.float32)
X_test_t = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_t = torch.tensor(y_test.values, dtype=torch.float32)

# Create TensorDatasets
train_dataset = TensorDataset(X_train_t, y_train_t)
val_dataset = TensorDataset(X_val_t, y_val_t)
test_dataset = TensorDataset(X_test_t, y_test_t)

# Create DataLoaders
BATCH_SIZE = 32
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Model

class ProductionModel(nn.Module):
    def __init__(self, input_features, hidden_units):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(input_features, hidden_units),
            nn.ReLU(),  # <--- Added Non-linearity
            nn.Dropout(0.3),  # <--- Added Regularization
            nn.Linear(hidden_units, hidden_units // 2),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_units // 2, 1) # <--- Output is raw logits
        )

    def forward(self, x):
        return self.layer_stack(x)

# Note: X_train_scaled.shape[1] is the number of input features (10)
model = ProductionModel(
    input_features=X_train_scaled.shape[1],
    hidden_units=64  # Let's try a larger model
).to(device)

summary(model, (X_train_scaled.shape[1],))

In [None]:
# --- Hyperparameters ---
EPOCHS = 20
LR = 0.001

# --- Loss and Optimizer ---
# USE BCEWithLogitsLoss. It's numerically more stable than
# a separate Sigmoid layer + BCELoss. It expects RAW LOGITS.
criterion = nn.BCEWithLogitsLoss()
optimizer = Adam(model.parameters(), lr=LR)

# --- Metrics ---
# Use torchmetrics for reliable accuracy
train_acc = torchmetrics.Accuracy(task="binary").to(device)
val_acc = torchmetrics.Accuracy(task="binary").to(device)

# --- History ---
history = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}


for epoch in range(EPOCHS):
    # --- Training ---
    model.train() # <--- Set model to TRAINING mode (enables Dropout)
    train_loss = 0
    for X_batch, y_batch in train_loader:
        # Move THIS BATCH to the device
        X_batch, y_batch = X_batch.to(device), y_batch.to(device).unsqueeze(1)

        # 1. Forward pass
        y_logits = model(X_batch)

        # 2. Calculate loss
        loss = criterion(y_logits, y_batch)
        train_loss += loss.item()

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        # 6. Update accuracy
        train_acc.update(y_logits, y_batch)

    # Calculate average loss and acc for the epoch
    avg_train_loss = train_loss / len(train_loader)
    epoch_train_acc = train_acc.compute()
    train_acc.reset() # Reset for next epoch

    # --- Validation ---
    model.eval() # <--- Set model to EVALUATION mode (disables Dropout)
    val_loss = 0
    with torch.no_grad():
        for X_batch_val, y_batch_val in val_loader:
            X_batch_val, y_batch_val = X_batch_val.to(device), y_batch_val.to(device).unsqueeze(1)

            y_val_logits = model(X_batch_val)
            loss = criterion(y_val_logits, y_batch_val)
            val_loss += loss.item()
            val_acc.update(y_val_logits, y_batch_val)

    avg_val_loss = val_loss / len(val_loader)
    epoch_val_acc = val_acc.compute()
    val_acc.reset()

    print(f"Epoch {epoch+1}/{EPOCHS} | "
          f"Train Loss: {avg_train_loss:.4f}, Train Acc: {epoch_train_acc:.4f} | "
          f"Val Loss: {avg_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}")

    history["train_loss"].append(avg_train_loss)
    history["train_acc"].append(epoch_train_acc.item())
    history["val_loss"].append(avg_val_loss)
    history["val_acc"].append(epoch_val_acc.item())

In [None]:
# Plot the history
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

axs[0].plot(history['train_loss'], label="Training Loss")
axs[0].plot(history['val_loss'], label="Validation Loss")
axs[0].set_title("Loss over Epochs")
axs[0].legend()

axs[1].plot(history['train_acc'], label="Training Accuracy")
axs[1].plot(history['val_acc'], label="Validation Accuracy")
axs[1].set_title("Accuracy over Epochs")
axs[1].legend()

plt.show()

# --- Final Test ---
# Evaluate the model on the unseen test set
test_acc = torchmetrics.Accuracy(task="binary").to(device)
model.eval()
with torch.no_grad():
    for X_batch_test, y_batch_test in test_loader:
        X_batch_test, y_batch_test = X_batch_test.to(device), y_batch_test.to(device).unsqueeze(1)
        y_test_logits = model(X_batch_test)
        test_acc.update(y_test_logits, y_batch_test)

final_test_accuracy = test_acc.compute()
print(f"Final Test Accuracy: {final_test_accuracy.item():.4f}")