In [None]:
import numpy as np
from scipy import signal
import struct

In [19]:
class Convolutional:
    def __init__(self, input_shape, out_channels, kernel_size):
        self.in_channels, self.H, self.W = input_shape
        self.out_channels = out_channels
        self.K = kernel_size
        self.kernels = np.random.randn(out_channels, self.in_channels, kernel_size, kernel_size) * 0.1
        self.biases = np.zeros((out_channels, 1))
        self.H_out = self.H - kernel_size + 1
        self.W_out = self.W - kernel_size + 1

    def forward(self, X):
        self.input = X
        output = np.zeros((self.out_channels, self.H_out, self.W_out))
        for d in range(self.out_channels):
            for c in range(self.in_channels):
                output[d] += signal.correlate2d(X[c], self.kernels[d, c], mode='valid')
            output[d] += self.biases[d]
        self.output = output
        return output

    def backward(self, d_out, lr):
        d_kernels = np.zeros_like(self.kernels)
        d_input = np.zeros_like(self.input)
        for d in range(self.out_channels):
            for c in range(self.in_channels):
                d_kernels[d, c] = signal.correlate2d(self.input[c], d_out[d], mode='valid')
                d_input[c] += signal.convolve2d(d_out[d], self.kernels[d, c], mode='full')
        self.kernels -= lr * d_kernels
        bias_grad = np.sum(d_out, axis=(1,2)) 
        self.biases -= lr * bias_grad.reshape(self.out_channels, 1)
        return d_input

In [3]:
class MaxPooling:
    def __init__(self, size):
        self.size = size

    def forward(self, X):
        self.input = X
        C, H, W = X.shape
        H_out = H // self.size
        W_out = W // self.size
        output = np.zeros((C, H_out, W_out))
        self.max_mask = np.zeros_like(X)

        for c in range(C):
            for i in range(H_out):
                for j in range(W_out):
                    region = X[c, i*self.size:(i+1)*self.size, j*self.size:(j+1)*self.size]
                    max_val = np.max(region)
                    output[c, i, j] = max_val
                    pos = np.unravel_index(np.argmax(region), region.shape)
                    self.max_mask[c, i*self.size+pos[0], j*self.size+pos[1]] = 1
        self.output = output
        return output

    def backward(self, d_out, lr):
        d_input = np.zeros_like(self.input)
        C, H_out, W_out = d_out.shape
        for c in range(C):
            for i in range(H_out):
                for j in range(W_out):
                    d_input[c, i*self.size:(i+1)*self.size, j*self.size:(j+1)*self.size] += \
                        self.max_mask[c, i*self.size:(i+1)*self.size, j*self.size:(j+1)*self.size] * d_out[c,i,j]
        return d_input


In [4]:
class ReLU:
    def forward(self, X):
        self.input = X
        return np.maximum(0, X)
    def backward(self, d_out, lr):
        return d_out * (self.input > 0)


In [5]:
class Flatten:
    def forward(self, X):
        self.input_shape = X.shape
        return X.reshape(-1,1)
    def backward(self, d_out, lr):
        return d_out.reshape(self.input_shape)


In [6]:
class Dense:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(output_size, input_size) * 0.1
        self.biases = np.zeros((output_size,1))
    def forward(self, X):
        self.input = X
        return self.weights @ X + self.biases
    def backward(self, d_out, lr):
        d_input = self.weights.T @ d_out
        dW = d_out @ self.input.T
        db = d_out
        self.weights -= lr * dW
        self.biases -= lr * db
        return d_input


In [7]:
class Sigmoid:
    def forward(self, X):
        self.output = 1/(1+np.exp(-X))
        return self.output
    def backward(self, d_out, lr):
        return d_out * self.output * (1 - self.output)

def binary_cross_entropy(y_pred, y_true):
    return - (y_true*np.log(y_pred+1e-8) + (1-y_true)*np.log(1-y_pred+1e-8))

def binary_cross_entropy_grad(y_pred, y_true):
    return y_pred - y_true


In [8]:
class Softmax:
    def forward(self, X):
        exp_shifted = np.exp(X - np.max(X))
        self.output = exp_shifted / np.sum(exp_shifted, axis=0, keepdims=True)
        return self.output
    def backward(self, d_out, lr):
        return d_out 

def categorical_cross_entropy(y_pred, y_true):
    y_pred = np.clip(y_pred, 1e-8, 1 - 1e-8)
    return -np.sum(y_true * np.log(y_pred))

def categorical_cross_entropy_grad(y_pred, y_true):
    return y_pred - y_true


In [9]:
def one_hot(y, num_classes=10):
    oh = np.zeros((num_classes,1))
    oh[y] = 1
    return oh

In [None]:
network = [
    Convolutional((1,28,28), out_channels=8, kernel_size=3), 
    ReLU(),
    MaxPooling(2),                                          
    Convolutional((1,13,13), out_channels=16, kernel_size=3), 
    ReLU(),
    MaxPooling(2), 
    Flatten(),                                             
    Dense(400, 128),
    ReLU(),
    Dense(128,10),
    Softmax()
]

In [27]:
def load_mnist_images(filename):
    with open(filename, 'rb') as f:
        magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
        data = np.frombuffer(f.read(), dtype=np.uint8)
        data = data.reshape(num, rows, cols)
        data = data.astype(np.float32) / 255.0
        data = data.reshape(num, 1, rows, cols)
        return data

def load_mnist_labels(filename):
    with open(filename, 'rb') as f:
        magic, num = struct.unpack(">II", f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels

X = load_mnist_images("./t10k-images.idx3-ubyte")
Y = load_mnist_labels("./t10k-labels.idx1-ubyte")

X = X[:1000]
Y = Y[:1000]

num_samples = X.shape[0]
indices = np.arange(num_samples)
np.random.shuffle(indices) 
X = X[indices]           
Y = Y[indices] 
learning_rate = 0.01
epochs = 20

for epoch in range(epochs):
    total_loss = 0

    idx = np.random.randint(0, len(X))  
    first_x = X[idx]
    first_y = one_hot(Y[idx])
    out_first = first_x
    for layer in network:
        out_first = layer.forward(out_first)

    true_index = np.argmax(first_y)       
    pred_index = np.argmax(out_first) 
    
    print(f"Epoch {epoch+1} başlangıcı -> gerçek: {true_index} tahmin: {pred_index}")

    for i in range(len(X)):
        x = X[i]
        y = one_hot(Y[i])
        
        # Forward
        out = x
        for layer in network:
            out = layer.forward(out)
        
        # Loss
        loss = binary_cross_entropy(out, y).sum()
        total_loss += loss

        # Backward
        grad = binary_cross_entropy_grad(out, y)
        for layer in reversed(network):
            grad = layer.backward(grad, learning_rate)
    
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(X)}\n")


Epoch 1 başlangıcı -> gerçek: 4 tahmin: 7
Epoch 1, Loss: 3.2500134907524467

Epoch 2 başlangıcı -> gerçek: 5 tahmin: 1
Epoch 2, Loss: 3.1320232376343147

Epoch 3 başlangıcı -> gerçek: 5 tahmin: 0
Epoch 3, Loss: 1.7184046580617722

Epoch 4 başlangıcı -> gerçek: 2 tahmin: 2
Epoch 4, Loss: 1.091769797560393

Epoch 5 başlangıcı -> gerçek: 9 tahmin: 9
Epoch 5, Loss: 0.7727688036379128

Epoch 6 başlangıcı -> gerçek: 3 tahmin: 3
Epoch 6, Loss: 0.5725316945340652

Epoch 7 başlangıcı -> gerçek: 5 tahmin: 5
Epoch 7, Loss: 0.392473153948304

Epoch 8 başlangıcı -> gerçek: 3 tahmin: 3
Epoch 8, Loss: 0.2474033175972263

Epoch 9 başlangıcı -> gerçek: 3 tahmin: 3
Epoch 9, Loss: 0.20095531534631877

Epoch 10 başlangıcı -> gerçek: 1 tahmin: 1
Epoch 10, Loss: 0.16274186324582562

Epoch 11 başlangıcı -> gerçek: 9 tahmin: 9
Epoch 11, Loss: 0.08093839532549933

Epoch 12 başlangıcı -> gerçek: 5 tahmin: 5
Epoch 12, Loss: 0.11614989361690305

Epoch 13 başlangıcı -> gerçek: 9 tahmin: 9
Epoch 13, Loss: 0.1827016