# Image Classification using CNN from Scratch
In this notebook, we will build a Convolutional Neural Network (CNN) from scratch for image classification to predict whether an image is of a cat or a dog. We will use basic Python and numpy to achieve this without any high-level deep learning libraries like Keras or TensorFlow.


In [5]:
import numpy as np

# Assuming images are 100x100 RGB (3 channels)
IMG_HEIGHT = 100
IMG_WIDTH = 100
IMG_CHANNELS = 3


## Convolutional Layer
We define a Convolutional layer that applies filters to the input image.

In [6]:
class Conv2D:
    def __init__(self, num_filters, filter_size):
        self.num_filters = num_filters
        self.filter_size = filter_size
        self.filters = np.random.randn(num_filters, filter_size, filter_size, IMG_CHANNELS) / 9

    def iterate_regions(self, image):
        h, w, _ = image.shape
        new_h = h - self.filter_size + 1
        new_w = w - self.filter_size + 1
        for i in range(new_h):
            for j in range(new_w):
                region = image[i:i+self.filter_size, j:j+self.filter_size]
                yield region, i, j

    def forward(self, input):
        self.last_input = input
        h, w, _ = input.shape
        output = np.zeros((h - self.filter_size + 1, w - self.filter_size + 1, self.num_filters))
        
        for region, i, j in self.iterate_regions(input):
            output[i, j] = np.sum(region * self.filters, axis=(1, 2, 3))
        
        return output

    def backward(self, d_L_d_out, learn_rate):
        d_L_d_filters = np.zeros(self.filters.shape)
        for region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                d_L_d_filters[f] += d_L_d_out[i, j, f] * region

        self.filters -= learn_rate * d_L_d_filters
        return None  # Convolutional layers typically return None in backward pass


## Max Pooling Layer
We define a MaxPooling layer to down-sample the input.


In [8]:
class MaxPooling2D:
    def __init__(self, pool_size):
        self.pool_size = pool_size

    def iterate_regions(self, image):
        h, w, num_filters = image.shape
        new_h = h // self.pool_size
        new_w = w // self.pool_size
        for i in range(new_h):
            for j in range(new_w):
                region = image[(i*self.pool_size):(i*self.pool_size+self.pool_size),
                               (j*self.pool_size):(j*self.pool_size+self.pool_size)]
                yield region, i, j

    def forward(self, input):
        self.last_input = input
        h, w, num_filters = input.shape
        output = np.zeros((h // self.pool_size, w // self.pool_size, num_filters))
        
        for region, i, j in self.iterate_regions(input):
            output[i, j] = np.amax(region, axis=(0, 1))
        
        return output

    def backward(self, d_L_d_out):
        d_L_d_input = np.zeros(self.last_input.shape)
        
        for region, i, j in self.iterate_regions(self.last_input):
            h, w, f = region.shape
            amax = np.amax(region, axis=(0, 1))
            
            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                        if region[i2, j2, f2] == amax[f2]:
                            d_L_d_input[i*self.pool_size + i2, j*self.pool_size + j2, f2] = d_L_d_out[i, j, f2]
        
        return d_L_d_input


## Dense Layer
We define a Dense (fully connected) layer to process the flattened input from previous layers.


In [9]:
class Dense:
    def __init__(self, input_len, nodes):
        self.weights = np.random.randn(input_len, nodes) / input_len
        self.biases = np.zeros(nodes)

    def forward(self, input):
        self.last_input_shape = input.shape
        input = input.flatten()
        self.last_input = input
        input_len, nodes = self.weights.shape
        totals = np.dot(input, self.weights) + self.biases
        self.last_totals = totals
        return totals

    def backward(self, d_L_d_out, learn_rate):
        d_L_d_t = d_L_d_out
        
        d_t_d_w = self.last_input
        d_t_d_b = 1
        d_t_d_inputs = self.weights
        
        d_L_d_w = np.dot(d_t_d_w[:, np.newaxis], d_L_d_t[np.newaxis, :])
        d_L_d_b = d_L_d_t * d_t_d_b
        d_L_d_inputs = np.dot(d_t_d_inputs, d_L_d_t)
        
        self.weights -= learn_rate * d_L_d_w
        self.biases -= learn_rate * d_L_d_b
        
        return d_L_d_inputs.reshape(self.last_input_shape)


## Helper Functions
We define helper functions for softmax activation, cross-entropy loss, and image preprocessing.


In [10]:
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=0)

def cross_entropy_loss(output, y):
    return -np.log(output[int(y)])  # Ensure y is an integer

def preprocess_image(image_array):
    return image_array.reshape(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)


## Forward and Train Functions
We define functions to perform the forward pass and training (including backpropagation) of the network.


In [12]:
def forward(image, label):
    out = conv.forward((image / 255) - 0.5)
    out = pool.forward(out)
    out = softmax(dense.forward(out))
    
    loss = cross_entropy_loss(out, label)
    acc = 1 if np.argmax(out) == int(label) else 0  # Ensure label is an integer
    
    return out, loss, acc

def train(im, label, lr=.005):
    out, loss, acc = forward(im, label)
    
    grad = np.zeros(2)  # Since we have 2 classes (cat, dog)
    grad[int(label)] = -1 / out[int(label)]  # Ensure label is an integer
    
    grad = dense.backward(grad, lr)
    grad = pool.backward(grad)
    grad = conv.backward(grad, lr)
    
    return loss, acc


## Load Dataset
Load the dataset from CSV files and preprocess it.


In [13]:
# Load dataset 
X_train = np.loadtxt(r'C:\Users\khush\Downloads\Image Classification using CNN Keras Dataset Compress\compress\input_compress.csv', delimiter=',')
Y_train = np.loadtxt(r'C:\Users\khush\Downloads\Image Classification using CNN Keras Dataset Compress\compress\labels_compress.csv', delimiter=',')
X_test = np.loadtxt(r'C:\Users\khush\Downloads\Image Classification using CNN Keras Dataset Compress\compress\input_test_compress.csv', delimiter=',')
Y_test = np.loadtxt(r'C:\Users\khush\Downloads\Image Classification using CNN Keras Dataset Compress\compress\labels_test_compress.csv', delimiter=',')
    
# Reshape dataset
X_train = np.array([preprocess_image(x) for x in X_train])
X_test = np.array([preprocess_image(x) for x in X_test])

# Convert labels to integers
Y_train = Y_train.astype(int)
Y_test = Y_test.astype(int)


## Initialize Layers
Initialize the CNN layers.


In [14]:
# Initialize layers
conv = Conv2D(8, 3)
pool = MaxPooling2D(2)
dense = Dense((IMG_HEIGHT - 2) // 2 * (IMG_WIDTH - 2) // 2 * 8, 2)  # Adjust based on input image size


## Training Loop
Train the CNN using the training data.


In [17]:
epochs = 3
learning_rate = 0.005

for epoch in range(epochs):
    print(f'--- Epoch {epoch + 1} ---')
    permutation = np.random.permutation(len(X_train))
    X_train = X_train[permutation]
    Y_train = Y_train[permutation]
    
    loss = 0
    num_correct = 0
    
    for i, (im, label) in enumerate(zip(X_train, Y_train)):
        l, acc = train(im, label, lr=learning_rate)
        loss += l
        num_correct += acc
    
    print(f'Epoch {epoch + 1}, Loss: {loss:.3f}, Accuracy: {num_correct / len(X_train):.3f}')

# Evaluation on test data
test_loss = 0
test_correct = 0

for im, label in zip(X_test, Y_test):
    _, l, acc = forward(im, label)
    test_loss += l
    test_correct += acc

print(f'Test Loss: {test_loss:.3f}, Test Accuracy: {test_correct / len(X_test):.3f}')


--- Epoch 1 ---


  return -np.log(output[int(y)])  # Ensure y is an integer
  grad[int(label)] = -1 / out[int(label)]  # Ensure label is an integer
  d_L_d_filters[f] += d_L_d_out[i, j, f] * region


Epoch 1, Loss: nan, Accuracy: 0.507
--- Epoch 2 ---
Epoch 2, Loss: nan, Accuracy: 0.500
--- Epoch 3 ---
Epoch 3, Loss: nan, Accuracy: 0.500
Test Loss: nan, Test Accuracy: 0.500


This code includes the essential components: convolutional layer (Conv2D), max-pooling layer (MaxPooling2D), and dense layer (Dense). It also contains a simple train function that includes forward and backward propagation to update the weights.

This implementation does not include certain advanced features and optimizations found in libraries like Keras or PyTorch, but it provides a fundamental understanding of how these layers and training processes work under the hood

Note: This code provides a very basic implementation and lacks many features of robust deep learning frameworks. For practical purposes, using libraries like TensorFlow/Keras or PyTorch is recommended.

Done by khush jay brahmbhatt
