# CNN From Scratch for Image Classification
### Issue #134 – Forget Accuracy, Try from Scratch

This notebook implements a Convolutional Neural Network (CNN) **from scratch**
for image classification using PyTorch.

The workflow connects previous issues by:
- Reusing dataset organization (train / test / eval)
- Training a CNN from random initialization
- Avoiding all pretrained models

The focus is on understanding CNN fundamentals rather than achieving high accuracy.


In [47]:
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import shutil
from pathlib import Path
from PIL import Image
from PIL import UnidentifiedImageError
torch.backends.cudnn.benchmark = True



In [32]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.cuda.is_available():
    print("GPU is enabled")
    print("Device name:", torch.cuda.get_device_name(0))
    print("Number of GPUs:", torch.cuda.device_count())
else:
    print("Running on CPU")


GPU is enabled
Device name: Tesla P100-PCIE-16GB
Number of GPUs: 1


In [33]:
def safe_pil_loader(path):
    try:
        with open(path, "rb") as f:
            img = Image.open(f)
            img = img.convert("RGB")
            return img
    except (UnidentifiedImageError, OSError, ValueError):
        return None


In [34]:
from torchvision.datasets import ImageFolder

class SafeImageFolder(ImageFolder):
    def __getitem__(self, index):
        path, target = self.samples[index]
        sample = safe_pil_loader(path)

        if sample is None:
            raise RuntimeError(f"Corrupted image slipped through filter: {path}")

        if self.transform is not None:
            sample = self.transform(sample)

        return sample, target


## **Dataset Sanitization**

In [35]:
from tqdm import tqdm

def filter_corrupted_samples(image_folder):
    """
    Removes corrupted image paths from ImageFolder.samples
    """
    valid_samples = []
    corrupted = 0

    for path, label in tqdm(image_folder.samples, desc="Filtering corrupted images"):
        try:
            with open(path, "rb") as f:
                img = Image.open(f)
                img.verify()  # quick integrity check
            valid_samples.append((path, label))
        except Exception:
            corrupted += 1

    image_folder.samples = valid_samples
    image_folder.targets = [label for _, label in valid_samples]

    print(f"Removed {corrupted} corrupted images.")
    print(f"Remaining valid images: {len(valid_samples)}")


## Dataset Splitter Class

In [36]:
class DatasetSplitter:
    def __init__(self, source_dir, output_dir, split_ratio=(0.7, 0.2, 0.1), seed=42):
        self.source_dir = Path(source_dir)
        self.output_dir = Path(output_dir)
        self.train_ratio, self.test_ratio, self.eval_ratio = split_ratio
        self.seed = seed
        random.seed(self.seed)

    def _create_dirs(self, classes):
        for split in ["train", "test", "eval"]:
            for cls in classes:
                (self.output_dir / split / cls).mkdir(parents=True, exist_ok=True)

    def split(self):
        classes = [d.name for d in self.source_dir.iterdir() if d.is_dir()]
        self._create_dirs(classes)

        for cls in classes:
            images = list((self.source_dir / cls).glob("*"))
            random.shuffle(images)

            total = len(images)
            train_end = int(self.train_ratio * total)
            test_end = train_end + int(self.test_ratio * total)

            splits = {
                "train": images[:train_end],
                "test": images[train_end:test_end],
                "eval": images[test_end:]
            }

            for split, files in splits.items():
                for file in files:
                    target = self.output_dir / split / cls / file.name
                    if not target.exists():
                        os.symlink(file, target)


In [37]:
SOURCE_DATASET_PATH = "/kaggle/input/spacenet-an-optimally-distributed-astronomy-data/SpaceNet.FLARE.imam_alam"
OUTPUT_DATASET_PATH = "/kaggle/working/dataset"

splitter = DatasetSplitter(
    source_dir=SOURCE_DATASET_PATH,
    output_dir=OUTPUT_DATASET_PATH,
    split_ratio=(0.7, 0.2, 0.1)
)

splitter.split()

print("Dataset successfully split into train / test / eval directories.")


Dataset successfully split into train / test / eval directories.


In [38]:
DATASET_DIR = "/kaggle/working/dataset"

required_dirs = ["train", "test", "eval"]
for d in required_dirs:
    path = os.path.join(DATASET_DIR, d)
    assert os.path.exists(path), f"Missing directory: {path}"

print("Dataset split verified: train / test / eval present")


Dataset split verified: train / test / eval present


## Dataset Loading and Preprocessing

Images are resized and converted to tensors before being passed into the CNN.
No normalization is applied to keep preprocessing minimal and educational.


In [48]:
BATCH_SIZE = 64
IMG_SIZE = 128

train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

# ---- Datasets ----
train_dataset = SafeImageFolder(
    root=os.path.join(DATASET_DIR, "train"),
    transform=train_transform
)
filter_corrupted_samples(train_dataset)

val_dataset = SafeImageFolder(
    root=os.path.join(DATASET_DIR, "eval"),
    transform=train_transform
)
filter_corrupted_samples(val_dataset)

test_dataset = SafeImageFolder(
    root=os.path.join(DATASET_DIR, "test"),
    transform=train_transform
)
filter_corrupted_samples(test_dataset)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2,
    pin_memory=torch.cuda.is_available()
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=torch.cuda.is_available()
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=torch.cuda.is_available()
)

# ---- Metadata ----
num_classes = len(train_dataset.classes)
print("Detected Classes:", train_dataset.classes)
print("Number of classes:", num_classes)


Filtering corrupted images: 100%|██████████| 8974/8974 [02:34<00:00, 58.13it/s]  


Removed 960 corrupted images.
Remaining valid images: 8014


Filtering corrupted images: 100%|██████████| 1289/1289 [00:37<00:00, 34.64it/s]


Removed 143 corrupted images.
Remaining valid images: 1146


Filtering corrupted images: 100%|██████████| 2561/2561 [01:14<00:00, 34.29it/s] 

Removed 273 corrupted images.
Remaining valid images: 2288
Detected Classes: ['asteroid', 'black hole', 'comet', 'constellation', 'galaxy', 'nebula', 'planet', 'star']
Number of classes: 8





## CNN Architecture (Built From Scratch)

A simple CNN architecture is implemented using:
- Convolution layers
- ReLU activations
- Max pooling
- Fully connected layers

No pretrained networks are used.


In [49]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * (IMG_SIZE // 8) * (IMG_SIZE // 8), 256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x


In [41]:
model = SimpleCNN(num_classes).to(device)
print(model)


SimpleCNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=32768, out_features=256, bias=True)
    (2): ReLU()
    (3): Linear(in_features=256, out_features=8, bias=True)
  )
)


## Loss Function and Optimizer

Cross-entropy loss is used for multi-class classification.
The Adam optimizer is used to train the network.


In [50]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [51]:
model = model.to(device)
criterion = criterion.to(device)

print("Model parameters on:", next(model.parameters()).device)


Model parameters on: cuda:0


## Model Training

The CNN is trained from random initialization using the training dataset.


In [52]:
import time

EPOCHS = 5
total_start_time = time.time()

for epoch in range(EPOCHS):
    print(f">>> Starting Epoch {epoch+1}")
    epoch_start_time = time.time()

    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    if device.type == "cuda":
        torch.cuda.synchronize()

    epoch_time = time.time() - epoch_start_time
    avg_loss = running_loss / len(train_loader)

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Loss: {avg_loss:.4f} | "
        f"Time: {epoch_time:.2f}s"
    )

total_time = time.time() - total_start_time
print(f"\nTotal Training Time: {total_time:.2f}s")


>>> Starting Epoch 1
Epoch [1/5] | Loss: 0.2161 | Time: 563.33s
>>> Starting Epoch 2
Epoch [2/5] | Loss: 0.1541 | Time: 619.99s
>>> Starting Epoch 3
Epoch [3/5] | Loss: 0.1390 | Time: 572.46s
>>> Starting Epoch 4
Epoch [4/5] | Loss: 0.1252 | Time: 558.03s
>>> Starting Epoch 5
Epoch [5/5] | Loss: 0.1255 | Time: 565.60s

Total Training Time: 2879.41s


## Validation Evaluation


In [53]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_accuracy = 100 * correct / total
print(f"Validation Accuracy: {val_accuracy:.2f}%")


Validation Accuracy: 58.29%


## Test Evaluation


In [54]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")


Test Accuracy: 58.87%


## Summary

In this notebook, a Convolutional Neural Network was implemented and trained
entirely from scratch using PyTorch. The model was trained from random
initialization and evaluated on validation and test datasets.

This task focuses on understanding CNN internals rather than maximizing accuracy,
as required by Issue #134.
