In [1]:
# Colab setup: enable GPU runtime, then run this cell.
import sys, os, random
print("Python", sys.version)
# Make sure TensorFlow version is compatible (TF 2.x)
try:
    import tensorflow as tf
    print("TensorFlow", tf.__version__)
except Exception as e:
    print("TensorFlow import error:", e)

# Task 1 — Feedforward neural network from scratch (NumPy)

# Task 1: NN from scratch (NumPy)
import numpy as np
np.random.seed(42)

# Create a small synthetic dataset (2D points, 2 classes)
def make_data(n=400):
    N = n//2
    # class 0
    x0 = np.random.randn(N,2) + np.array([-2,0])
    # class 1
    x1 = np.random.randn(N,2) + np.array([2,0])
    X = np.vstack([x0,x1])
    y = np.array([0]*N + [1]*N)
    # one-hot
    Y = np.eye(2)[y]
    # shuffle
    idx = np.random.permutation(n)
    return X[idx], Y[idx]

X, Y = make_data(400)

# Define model architecture
D = X.shape[1]
H1 = 16
H2 = 12
C = 2

# Initialize weights (small random)
def init():
    W1 = 0.01 * np.random.randn(D, H1)
    b1 = np.zeros((1, H1))
    W2 = 0.01 * np.random.randn(H1, H2)
    b2 = np.zeros((1, H2))
    W3 = 0.01 * np.random.randn(H2, C)
    b3 = np.zeros((1, C))
    return W1,b1,W2,b2,W3,b3

# Activation functions
def relu(x): return np.maximum(0, x)
def drelu(x): return (x>0).astype(float)

def softmax(z):
    z = z - np.max(z, axis=1, keepdims=True)
    ex = np.exp(z)
    return ex / np.sum(ex, axis=1, keepdims=True)

# Forward, loss, backward
def forward(X, params):
    W1,b1,W2,b2,W3,b3 = params
    z1 = X.dot(W1) + b1
    a1 = relu(z1)
    z2 = a1.dot(W2) + b2
    a2 = relu(z2)
    z3 = a2.dot(W3) + b3
    probs = softmax(z3)
    cache = (z1,a1,z2,a2,z3,probs)
    return probs, cache

def compute_loss(probs, Y):
    # cross-entropy
    N = Y.shape[0]
    loss = -np.sum(Y * np.log(probs + 1e-9)) / N
    return loss

def backward(X, Y, params, cache):
    W1,b1,W2,b2,W3,b3 = params
    z1,a1,z2,a2,z3,probs = cache
    N = X.shape[0]
    # dLoss/dz3
    dz3 = (probs - Y) / N
    dW3 = a2.T.dot(dz3)
    db3 = np.sum(dz3, axis=0, keepdims=True)
    da2 = dz3.dot(W3.T)
    dz2 = da2 * drelu(z2)
    dW2 = a1.T.dot(dz2)
    db2 = np.sum(dz2, axis=0, keepdims=True)
    da1 = dz2.dot(W2.T)
    dz1 = da1 * drelu(z1)
    dW1 = X.T.dot(dz1)
    db1 = np.sum(dz1, axis=0, keepdims=True)
    grads = (dW1, db1, dW2, db2, dW3, db3)
    return grads

# Training loop (SGD)
params = init()
W1,b1,W2,b2,W3,b3 = params
lr = 0.5
for epoch in range(1,301):
    probs, cache = forward(X, params)
    loss = compute_loss(probs, Y)
    preds = np.argmax(probs, axis=1)
    acc = np.mean(preds == np.argmax(Y,axis=1))
    if epoch % 50 == 0 or epoch==1:
        print(f"Epoch {epoch:03d} loss={loss:.4f} acc={acc:.3f}")
    grads = backward(X, Y, params, cache)
    # update
    dW1,db1,dW2,db2,dW3,db3 = grads
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2
    W3 -= lr * dW3
    b3 -= lr * db3
    params = (W1,b1,W2,b2,W3,b3)

# Final
probs, _ = forward(X, params)
final_acc = np.mean(np.argmax(probs,1) == np.argmax(Y,1))
print("Final training accuracy:", final_acc)


Python 3.11.13 (main, Jun  4 2025, 08:57:29) [GCC 11.4.0]
TensorFlow 2.19.0
Epoch 001 loss=0.6932 acc=0.500
Epoch 050 loss=0.6925 acc=0.975
Epoch 100 loss=0.0630 acc=0.975
Epoch 150 loss=0.0583 acc=0.975
Epoch 200 loss=0.0569 acc=0.975
Epoch 250 loss=0.0559 acc=0.975
Epoch 300 loss=0.0554 acc=0.975
Final training accuracy: 0.975


In [2]:
# Task 2 — Deep neural network using TensorFlow/Keras on MNIST

# Task 2: Deep NN on MNIST (Keras)
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
print("TF", tf.__version__)

# Load MNIST
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(-1, 28*28).astype("float32") / 255.0
x_test  = x_test.reshape(-1, 28*28).astype("float32") / 255.0

# Model
def build_mlp(input_dim=28*28, num_classes=10):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(512, activation='relu'),
        layers.Dense(256, activation='relu'),
        layers.Dense(128, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

model = build_mlp()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.summary()

# Train
history = model.fit(x_train, y_train,
                    validation_split=0.1,
                    epochs=10,
                    batch_size=128)
# Evaluate
test_loss, test_acc = model.evaluate(x_test, y_test)
print("MNIST test accuracy:", test_acc)


TF 2.19.0
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


Epoch 1/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 15ms/step - accuracy: 0.8678 - loss: 0.4423 - val_accuracy: 0.9688 - val_loss: 0.1044
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.9717 - loss: 0.0915 - val_accuracy: 0.9770 - val_loss: 0.0766
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.9815 - loss: 0.0573 - val_accuracy: 0.9757 - val_loss: 0.0818
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 17ms/step - accuracy: 0.9870 - loss: 0.0412 - val_accuracy: 0.9793 - val_loss: 0.0756
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.9907 - loss: 0.0277 - val_accuracy: 0.9773 - val_loss: 0.0876
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.9929 - loss: 0.0233 - val_accuracy: 0.9773 - val_loss: 0.0859
Epoch 7/10
[1m422/42

In [3]:
# Task 3 — Regularization (Dropout and L2)

# Task 3: MLP with Dropout + L2 regularization
from tensorflow.keras import regularizers

def build_mlp_reg(input_dim=28*28, num_classes=10, l2=1e-4, dropout_rate=0.3):
    model = models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(l2)),
        layers.Dropout(dropout_rate),
        layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l2)),
        layers.Dropout(dropout_rate),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2)),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

model_reg = build_mlp_reg()
model_reg.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
model_reg.summary()

history_reg = model_reg.fit(x_train, y_train, validation_split=0.1, epochs=10, batch_size=128)
test_loss_reg, test_acc_reg = model_reg.evaluate(x_test, y_test)
print("MNIST test accuracy with reg:", test_acc_reg)


Epoch 1/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 16ms/step - accuracy: 0.8133 - loss: 0.6912 - val_accuracy: 0.9698 - val_loss: 0.2030
Epoch 2/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.9551 - loss: 0.2411 - val_accuracy: 0.9760 - val_loss: 0.1733
Epoch 3/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step - accuracy: 0.9676 - loss: 0.1962 - val_accuracy: 0.9773 - val_loss: 0.1712
Epoch 4/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 17ms/step - accuracy: 0.9718 - loss: 0.1786 - val_accuracy: 0.9810 - val_loss: 0.1546
Epoch 5/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 19ms/step - accuracy: 0.9782 - loss: 0.1599 - val_accuracy: 0.9823 - val_loss: 0.1489
Epoch 6/10
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9792 - loss: 0.1527 - val_accuracy: 0.9825 - val_loss: 0.1501
Epoch 7/10
[1m422/4

In [None]:
# Task 4 — Build & train a CNN on CIFAR-10 (aim >= ~70%)

# Task 4: CNN on CIFAR-10
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load CIFAR-10
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
y_train = y_train.flatten()
y_test  = y_test.flatten()

# Normalize
x_train = x_train.astype("float32")/255.0
x_test  = x_test.astype("float32")/255.0

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)
datagen.fit(x_train)

# Build model
def build_cnn(input_shape=(32,32,3), num_classes=10):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(64, (3,3), padding='same', activation='relu')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(64, (3,3), padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Conv2D(128, (3,3), padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(128, (3,3), padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.25)(x)

    x = layers.Conv2D(256, (3,3), padding='same', activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPool2D()(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Flatten()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    model = models.Model(inputs, outputs)
    return model

model_cnn = build_cnn()
model_cnn.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
model_cnn.summary()

# Callbacks
callbacks = [
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=12, restore_best_weights=True)
]

# Train: use generator
batch_size = 64
epochs = 50  # Increase if needed to push accuracy up
history = model_cnn.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
                        steps_per_epoch=len(x_train)//batch_size,
                        validation_data=(x_test, y_test),
                        epochs=epochs,
                        callbacks=callbacks)

# Evaluate
test_loss_cnn, test_acc_cnn = model_cnn.evaluate(x_test, y_test, verbose=2)
print("CIFAR-10 CNN test accuracy:", test_acc_cnn)


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 0us/step


Epoch 1/50


  self._warn_if_super_not_called()


[1m781/781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m961s[0m 1s/step - accuracy: 0.3720 - loss: 1.9095 - val_accuracy: 0.5144 - val_loss: 1.3881 - learning_rate: 0.0010
Epoch 2/50
[1m  1/781[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m13:16[0m 1s/step - accuracy: 0.5781 - loss: 1.2593



[1m781/781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 54ms/step - accuracy: 0.5781 - loss: 1.2593 - val_accuracy: 0.4914 - val_loss: 1.4512 - learning_rate: 0.0010
Epoch 3/50
[1m781/781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m925s[0m 1s/step - accuracy: 0.6032 - loss: 1.1199 - val_accuracy: 0.5027 - val_loss: 1.4391 - learning_rate: 0.0010
Epoch 4/50
[1m781/781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 54ms/step - accuracy: 0.5781 - loss: 0.9893 - val_accuracy: 0.5294 - val_loss: 1.3683 - learning_rate: 0.0010
Epoch 5/50
[1m781/781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m930s[0m 1s/step - accuracy: 0.6769 - loss: 0.9189 - val_accuracy: 0.7012 - val_loss: 0.8863 - learning_rate: 0.0010
Epoch 6/50
[1m781/781[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 105ms/step - accuracy: 0.7969 - loss: 0.5565 - val_accuracy: 0.6937 - val_loss: 0.9174 - learning_rate: 0.0010
Epoch 7/50
[1m714/781[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m1: