### Preparing the Environment

In [1]:
import os
import torch
import numpy as np
import pandas as pd
from torch import nn, optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

### Reading the Data

In [2]:
train_df = pd.read_csv("train.csv")
test_df  = pd.read_csv("test.csv")

print("Train:", train_df.shape, "Test:", test_df.shape)


Train: (1200, 3074) Test: (1200, 3073)


### Extracting IDs, Labels, and Pixel Arrays


In [3]:
train_labels = train_df["y"].values.astype(np.int64) - 1 
train_df = train_df.drop(columns=["id", "y"])

test_ids = test_df["id"].values
test_df = test_df.drop(columns=["id"])

train_pixels = train_df.values.astype(np.float32)
test_pixels  = test_df.values .astype(np.float32)

### Reshaping

image_array function, reshapes each flat 3072-vector into a 3×32×32 image tensor (channels_first format).

In [4]:
def image_array(flat_array):
    return flat_array.reshape(-1, 3, 32, 32)

X_train = image_array(train_pixels)
X_test  = image_array(test_pixels)

### Data Split to Training and Validation

Stratified split into 80% train / 20% validation.

In [5]:
X_tr, X_val, y_tr, y_val = train_test_split(
    X_train, train_labels, test_size=0.2, stratify=train_labels, random_state=42)

### Building a PyTorch Dataset and DataLoaders

It wraps NumPy arrays in a PyTorch Dataset.

In [6]:
class FarmImageDataset(Dataset):
    def __init__(self, images, labels=None):
        self.images = torch.from_numpy(images)      # float32 tensor
        self.labels = None if labels is None else torch.from_numpy(labels)
    def __len__(self):
        return len(self.images)
    def __getitem__(self, idx):
        x = self.images[idx]
        if self.labels is None:
            return x
        y = self.labels[idx]
        return x, y


train_ds = FarmImageDataset(X_tr, y_tr)
val_ds   = FarmImageDataset(X_val, y_val)
test_ds  = FarmImageDataset(X_test)

batch_size = 64
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=batch_size)
test_loader  = DataLoader(test_ds,  batch_size=batch_size)

### Defining the CNN Model

Two convolutional blocks:

Conv2d(3→32), ReLU, MaxPool → outputs 32×16×16
Conv2d(32→64), ReLU, MaxPool → outputs 64×8×8

In [7]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=3):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1), 
            nn.ReLU(),
            nn.MaxPool2d(2),                             
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),                                 
            nn.Linear(64*8*8, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.classifier(x)
        return x



### Training and Validation Loops

Using Adam optimizer with learning rate 1e-3 and standard cross-entropy loss.

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

### One epoch of training

Forward pass → compute loss → backward → optimizer step.

Accumulates total loss and total correct predictions to return average loss & accuracy.

Evaluation on validation (no gradient updates).

In [13]:
# import os
# import numpy as np
# import pandas as pd
# import torch
# import optuna

# from torch import nn, optim
# from torch.utils.data import Dataset, DataLoader
# from sklearn.model_selection import train_test_split

# class TunableCNN(nn.Module):
#     def __init__(self, dropout_rate):
#         super().__init__()
#         self.conv_layers = nn.Sequential(
#             nn.Conv2d(3, 32, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2),
#             nn.Conv2d(32, 64, kernel_size=3, padding=1),
#             nn.ReLU(),
#             nn.MaxPool2d(2)
#         )
#         self.classifier = nn.Sequential(
#             nn.Flatten(),
#             nn.Linear(64 * 8 * 8, 128),
#             nn.ReLU(),
#             nn.Dropout(dropout_rate),
#             nn.Linear(128, 3)
#         )

#     def forward(self, x):
#         return self.classifier(self.conv_layers(x))


# def train_epoch(model, loader, criterion, optimizer):
#     model.train()
#     total_loss, total_correct = 0, 0
#     for Xb, yb in loader:
#         Xb, yb = Xb.to(device), yb.to(device)
#         preds  = model(Xb)
#         loss   = criterion(preds, yb)
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#         total_loss    += loss.item() * Xb.size(0)
#         total_correct += (preds.argmax(1) == yb).sum().item()
#     return total_loss / len(loader.dataset), total_correct / len(loader.dataset)

# def eval_epoch(model, loader, criterion):
#     model.eval()
#     total_loss, total_correct = 0, 0
#     with torch.no_grad():
#         for Xb, yb in loader:
#             Xb, yb = Xb.to(device), yb.to(device)
#             preds  = model(Xb)
#             total_loss    += criterion(preds, yb).item() * Xb.size(0)
#             total_correct += (preds.argmax(1) == yb).sum().item()
#     return total_loss / len(loader.dataset), total_correct / len(loader.dataset)


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# def objective(trial):
#     lr           = trial.suggest_loguniform('lr',    1e-5, 1e-2)
#     momentum     = trial.suggest_float('momentum',  0.0, 0.99)
#     dropout_rate = trial.suggest_float('dropout',   0.2, 0.7, step=0.1)
#     batch_size   = trial.suggest_categorical('batch_size',[32,64,128])
#     n_epochs     = trial.suggest_int('epochs', 10, 50, step=5)
    
#     model     = TunableCNN(dropout_rate).to(device)
#     criterion = nn.CrossEntropyLoss()
#     optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

#     train_loader = DataLoader(FarmImageDataset(X_tr, y_tr),
#                               batch_size=batch_size, shuffle=True)
#     val_loader   = DataLoader(FarmImageDataset(X_val, y_val),
#                               batch_size=batch_size, shuffle=False)

#     best_val_acc = 0.0
#     for epoch in range(1, n_epochs + 1):
#         train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
#         val_loss, val_acc     = eval_epoch(model, val_loader,   criterion)


#         trial.report(val_acc, epoch)
#         if trial.should_prune():
#             raise optuna.TrialPruned()

#         if val_acc > best_val_acc:
#             best_val_acc = val_acc

#     return best_val_acc


In [14]:
# study = optuna.create_study(direction='maximize',
#                             pruner=optuna.pruners.MedianPruner())
# study.optimize(objective, n_trials=50)

# print("Best hyperparameters:", study.best_params)
# print("Best validation accuracy:", study.best_value)

[I 2025-05-06 23:18:09,314] A new study created in memory with name: no-name-140066df-c182-4b88-829e-71bf97c67fa8
  lr           = trial.suggest_loguniform('lr',    1e-5, 1e-2)
[I 2025-05-06 23:18:25,996] Trial 0 finished with value: 0.8333333333333334 and parameters: {'lr': 0.001193584091184172, 'momentum': 0.8175326261230483, 'dropout': 0.30000000000000004, 'batch_size': 128, 'epochs': 50}. Best is trial 0 with value: 0.8333333333333334.
[I 2025-05-06 23:18:40,856] Trial 1 finished with value: 0.375 and parameters: {'lr': 9.361512409633552e-05, 'momentum': 0.9094694864943689, 'dropout': 0.30000000000000004, 'batch_size': 128, 'epochs': 45}. Best is trial 0 with value: 0.8333333333333334.
[I 2025-05-06 23:18:45,090] Trial 2 finished with value: 0.3875 and parameters: {'lr': 0.00013197597580010998, 'momentum': 0.06969746923246922, 'dropout': 0.2, 'batch_size': 32, 'epochs': 10}. Best is trial 0 with value: 0.8333333333333334.
[I 2025-05-06 23:18:51,680] Trial 3 finished with value: 0.3

Best hyperparameters: {'lr': 0.0035722662860218075, 'momentum': 0.9713655185904604, 'dropout': 0.5, 'batch_size': 64, 'epochs': 50}
Best validation accuracy: 0.9541666666666667


In [None]:
# import torch
# from torch import optim, nn
# from torch.utils.data import DataLoader

# best_lr        = 0.0035722662860218075
# best_momentum  = 0.9713655185904604
# best_dropout   = 0.5   
# best_batch     = 64
# best_epochs    = 50

# train_loader = DataLoader(train_ds, batch_size=best_batch, shuffle=True)
# val_loader   = DataLoader(val_ds,   batch_size=best_batch)

# model     = SimpleCNN().to(device)   
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(
#     model.parameters(),
#     lr=best_lr,
#     momentum=best_momentum
# )

# best_val_acc = 0.0
# for epoch in range(1, best_epochs + 1):
   
#     model.train()
#     total_loss, total_correct = 0.0, 0
#     for Xb, yb in train_loader:
#         Xb, yb = Xb.to(device), yb.to(device)
#         preds  = model(Xb)
#         loss   = criterion(preds, yb)
#         optimizer.zero_grad()
#         loss.backward()
#         optimizer.step()
#         total_loss    += loss.item() * Xb.size(0)
#         total_correct += (preds.argmax(1) == yb).sum().item()
#     train_acc = total_correct / len(train_loader.dataset)

  
#     model.eval()
#     val_loss, val_correct = 0.0, 0
#     with torch.no_grad():
#         for Xb, yb in val_loader:
#             Xb, yb = Xb.to(device), yb.to(device)
#             preds  = model(Xb)
#             val_loss    += criterion(preds, yb).item() * Xb.size(0)
#             val_correct += (preds.argmax(1) == yb).sum().item()
#     val_acc = val_correct / len(val_loader.dataset)

#     if val_acc > best_val_acc:
#         best_val_acc = val_acc
#         torch.save(model.state_dict(), "cnn_model_tuned.pth")

#     print(f"Epoch {epoch:02d} – train_acc: {train_acc:.3f}, val_acc: {val_acc:.3f}")

# print(f"Best validation accuracy: {best_val_acc:.3f}")


Epoch 01 – train_acc: 0.369, val_acc: 0.338
Epoch 02 – train_acc: 0.418, val_acc: 0.512
Epoch 03 – train_acc: 0.539, val_acc: 0.617
Epoch 04 – train_acc: 0.665, val_acc: 0.725
Epoch 05 – train_acc: 0.733, val_acc: 0.754
Epoch 06 – train_acc: 0.792, val_acc: 0.825
Epoch 07 – train_acc: 0.767, val_acc: 0.762
Epoch 08 – train_acc: 0.789, val_acc: 0.688
Epoch 09 – train_acc: 0.756, val_acc: 0.758
Epoch 10 – train_acc: 0.797, val_acc: 0.758
Epoch 11 – train_acc: 0.847, val_acc: 0.792
Epoch 12 – train_acc: 0.852, val_acc: 0.875
Epoch 13 – train_acc: 0.866, val_acc: 0.863
Epoch 14 – train_acc: 0.889, val_acc: 0.879
Epoch 15 – train_acc: 0.907, val_acc: 0.904
Epoch 16 – train_acc: 0.911, val_acc: 0.904
Epoch 17 – train_acc: 0.918, val_acc: 0.904
Epoch 18 – train_acc: 0.944, val_acc: 0.883
Epoch 19 – train_acc: 0.926, val_acc: 0.904
Epoch 20 – train_acc: 0.915, val_acc: 0.892
Epoch 21 – train_acc: 0.910, val_acc: 0.900
Epoch 22 – train_acc: 0.924, val_acc: 0.921
Epoch 23 – train_acc: 0.929, val

In [9]:
def train_epoch(loader):
    model.train()
    total_loss, total_correct = 0, 0
    for Xb, yb in loader:
        Xb, yb = Xb.to(device), yb.to(device)
        preds = model(Xb)
        loss = criterion(preds, yb)
        optimizer.zero_grad(); loss.backward(); optimizer.step()
        total_loss += loss.item() * Xb.size(0)
        total_correct += (preds.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), total_correct/len(loader.dataset)

def eval_epoch(loader):
    model.eval()
    total_loss, total_correct = 0, 0
    with torch.no_grad():
        for Xb, yb in loader:
            Xb, yb = Xb.to(device), yb.to(device)
            preds = model(Xb)
            total_loss += criterion(preds, yb).item() * Xb.size(0)
            total_correct += (preds.argmax(1) == yb).sum().item()
    return total_loss/len(loader.dataset), total_correct/len(loader.dataset)

best_val_acc = 0
for epoch in range(1, 40):
    train_loss, train_acc = train_epoch(train_loader)
    val_loss, val_acc     = eval_epoch(val_loader)
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "cnn_model.pth")
    print(f"Epoch {epoch:02d} – train_acc: {train_acc:.3f}, val_acc: {val_acc:.3f}")


Epoch 01 – train_acc: 0.600, val_acc: 0.812
Epoch 02 – train_acc: 0.803, val_acc: 0.846
Epoch 03 – train_acc: 0.847, val_acc: 0.858
Epoch 04 – train_acc: 0.882, val_acc: 0.879
Epoch 05 – train_acc: 0.898, val_acc: 0.867
Epoch 06 – train_acc: 0.887, val_acc: 0.904
Epoch 07 – train_acc: 0.904, val_acc: 0.904
Epoch 08 – train_acc: 0.931, val_acc: 0.921
Epoch 09 – train_acc: 0.927, val_acc: 0.908
Epoch 10 – train_acc: 0.936, val_acc: 0.938
Epoch 11 – train_acc: 0.944, val_acc: 0.950
Epoch 12 – train_acc: 0.954, val_acc: 0.942
Epoch 13 – train_acc: 0.952, val_acc: 0.946
Epoch 14 – train_acc: 0.947, val_acc: 0.963
Epoch 15 – train_acc: 0.951, val_acc: 0.908
Epoch 16 – train_acc: 0.942, val_acc: 0.933
Epoch 17 – train_acc: 0.961, val_acc: 0.967
Epoch 18 – train_acc: 0.968, val_acc: 0.950
Epoch 19 – train_acc: 0.950, val_acc: 0.942
Epoch 20 – train_acc: 0.964, val_acc: 0.912
Epoch 21 – train_acc: 0.966, val_acc: 0.963
Epoch 22 – train_acc: 0.963, val_acc: 0.933
Epoch 23 – train_acc: 0.965, val

### Measuring the Final Model's Performance on the Validation Set

In [10]:
model.load_state_dict(torch.load("cnn_model.pth"))
val_loss, val_acc = eval_epoch(val_loader)
print(f"Final validation accuracy (re-loaded best model): {val_acc*100:.2f}%")


Final validation accuracy (re-loaded best model): 97.08%


### Retraining the Final Model on the Combination of Train and Val Data Sets

In [12]:
X_full = np.concatenate([X_tr, X_val], axis=0)
y_full = np.concatenate([y_tr,   y_val  ], axis=0)

full_ds     = FarmImageDataset(X_full, y_full)
full_loader = DataLoader(full_ds, batch_size=64, shuffle=True)


model     = SimpleCNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

for epoch in range(1, 40):
    train_loss, train_acc = train_epoch(full_loader)
    print(f"[Full train] Epoch {epoch:02d} – train_acc: {train_acc:.3f}")


[Full train] Epoch 01 – train_acc: 0.657
[Full train] Epoch 02 – train_acc: 0.813
[Full train] Epoch 03 – train_acc: 0.835
[Full train] Epoch 04 – train_acc: 0.880
[Full train] Epoch 05 – train_acc: 0.906
[Full train] Epoch 06 – train_acc: 0.905
[Full train] Epoch 07 – train_acc: 0.922
[Full train] Epoch 08 – train_acc: 0.933
[Full train] Epoch 09 – train_acc: 0.951
[Full train] Epoch 10 – train_acc: 0.938
[Full train] Epoch 11 – train_acc: 0.950
[Full train] Epoch 12 – train_acc: 0.958
[Full train] Epoch 13 – train_acc: 0.971
[Full train] Epoch 14 – train_acc: 0.965
[Full train] Epoch 15 – train_acc: 0.963
[Full train] Epoch 16 – train_acc: 0.963
[Full train] Epoch 17 – train_acc: 0.963
[Full train] Epoch 18 – train_acc: 0.927
[Full train] Epoch 19 – train_acc: 0.958
[Full train] Epoch 20 – train_acc: 0.973
[Full train] Epoch 21 – train_acc: 0.973
[Full train] Epoch 22 – train_acc: 0.983
[Full train] Epoch 23 – train_acc: 0.975
[Full train] Epoch 24 – train_acc: 0.981
[Full train] Epo

### Feeding the Test Set to the Final Model

In [13]:
all_preds = []
with torch.no_grad():
    for Xb in test_loader:
        Xb = Xb.to(device)
        preds = model(Xb).argmax(1).cpu().numpy() + 1 
        all_preds.append(preds)
all_preds = np.concatenate(all_preds)

submission = pd.DataFrame({"id": test_ids, "y": all_preds})
submission.to_csv("cnn_submission_8.csv", index=False)
print("cnn_submission_8.csv with", len(submission), "rows.")

cnn_submission_8.csv with 1200 rows.
