In [None]:
from keras import datasets, layers, models
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.applications import VGG16
from keras.preprocessing import image
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

# Load CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Creating a list of all the class labels
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Converting the pixels data to float type
train_images = train_images.astype('float32')
test_images = test_images.astype('float32')

# Normalize input to range [0, 1]
train_images = train_images / 255
test_images = test_images / 255

# Resize images to (224, 224) for VGG16
train_images_resized = tf.image.resize(train_images, (224, 224)).numpy()
test_images_resized = tf.image.resize(test_images, (224, 224)).numpy()

# Convert labels to one-hot encoding
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)

# Define the inputs and outputs
X_train = train_images_resized
X_test = test_images_resized
Y_train = train_labels
Y_test = test_labels

# --- Part 14.1-2 (2 pts) --- Show the shape of datasets and one sample image ---
print(f"Shape of train images: {train_images.shape}, Shape of train labels: {train_labels.shape}")
print(f"Shape of test images: {test_images.shape}, Shape of test labels: {test_labels.shape}")

# Show one sample image
plt.imshow(X_train[0])
plt.title(f"Sample image: {class_names[np.argmax(Y_train[0])]}")
plt.show()

# --- Part 14.1-3 (4 pts) --- Create the model using transfer learning ---
def create_model(input_shape, fine_tune=0):
    # Load VGG16 pre-trained model without the top fully connected layers
    conv_base = VGG16(include_top=False, weights='imagenet', input_shape=input_shape)
    
    # Freeze layers based on fine_tune parameter
    if fine_tune > 0:
        for layer in conv_base.layers[:-fine_tune]:
            layer.trainable = False
    else:
        for layer in conv_base.layers:
            layer.trainable = False
    
    # Create a new model on top of the frozen VGG16 base
    model = models.Sequential([
        conv_base,
        Flatten(),
        Dense(20, activation='relu'),  # 1st dense layer with 20 units
        Dense(10, activation='relu'),  # 2nd dense layer with 10 units
        Dense(10, activation='softmax')  # Output layer with 10 units (for 10 classes)
    ])
    
    return model

# Create the model with no fine-tuning initially
input_shape = X_train[0].shape
model = create_model(input_shape, fine_tune=0)

# Display the model architecture
print(model.summary())

# --- Part 14.1-4 (4 pts) --- Compile, train the model and evaluate ---
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, Y_train, epochs=10, batch_size=64, validation_data=(X_test, Y_test))

# Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, Y_test, verbose=2)
print(f"Test accuracy: {test_acc}")

# --- Part 14.2 --- Plot accuracy vs number of trainable layers ---
def plot_accuracy_vs_layers():
    fine_tune_range = range(0, 16)  # 0 to 15 layers of VGG16 can be fine-tuned
    accuracies = []

    for fine_tune in fine_tune_range:
        model = create_model(input_shape, fine_tune=fine_tune)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        model.fit(X_train, Y_train, epochs=5, batch_size=64, verbose=0)
        test_loss, test_acc = model.evaluate(X_test, Y_test, verbose=0)
        accuracies.append(test_acc)

    # Plotting the accuracy
    plt.plot(fine_tune_range, accuracies)
    plt.xlabel("Number of Trainable Layers")
    plt.ylabel("Test Accuracy")
    plt.title("Accuracy vs Number of Trainable Layers")
    plt.show()

# Plot the accuracy vs the number of trainable layers
plot_accuracy_vs_layers()

# --- Part 14.3 Bonus --- Use MNIST dataset as input and repeat the above process ---
from keras.datasets import mnist

# Load MNIST dataset
(train_images_mnist, train_labels_mnist), (test_images_mnist, test_labels_mnist) = mnist.load_data()

# Preprocess MNIST data
train_images_mnist = np.expand_dims(train_images_mnist, axis=-1).astype('float32')
test_images_mnist = np.expand_dims(test_images_mnist, axis=-1).astype('float32')
train_images_mnist /= 255
test_images_mnist /= 255

# Resize MNIST images to (224, 224, 3) for VGG16
train_images_mnist_resized = tf.image.resize(train_images_mnist, (224, 224)).numpy()
test_images_mnist_resized = tf.image.resize(test_images_mnist, (224, 224)).numpy()

# Convert labels to one-hot encoding for MNIST
train_labels_mnist = to_categorical(train_labels_mnist, num_classes=10)
test_labels_mnist = to_categorical(test_labels_mnist, num_classes=10)

# Define the inputs and outputs for MNIST
X_train_mnist = train_images_mnist_resized
X_test_mnist = test_images_mnist_resized
Y_train_mnist = train_labels_mnist
Y_test_mnist = test_labels_mnist

# Create the model with no fine-tuning initially for MNIST
input_shape_mnist = X_train_mnist[0].shape
model_mnist = create_model(input_shape_mnist, fine_tune=0)

# Train and evaluate the model on MNIST
model_mnist.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history_mnist = model_mnist.fit(X_train_mnist, Y_train_mnist, epochs=10, batch_size=64, validation_data=(X_test_mnist, Y_test_mnist))
test_loss_mnist, test_acc_mnist = model_mnist.evaluate(X_test_mnist, Y_test_mnist, verbose=2)
print(f"Test accuracy on MNIST: {test_acc_mnist}")


In [None]:
import numpy as np

# Define the function and its gradients
def f(x, y):
    return x**2 + 2*y + 3*x

# Compute the derivative with respect to x
def f_der_x(x):
    ''' Derivative of f(x, y) with respect to x '''
    return 2*x + 3

# Compute the derivative with respect to y
def f_der_y(y):
    ''' Derivative of f(x, y) with respect to y '''
    return 2

# Implement RMSProp
# RMSProp parameters
gamma = 0.8  # Momentum factor
lrate = 0.01  # Learning rate
epsilon = 1e-8  # Small constant to avoid division by zero

# Accumulated gradient squares for x and y, respectively
gt_x = 0
gt_y = 0

# Initial values of x and y
x, y = 10.0, -10.0

# Number of iterations
no_iterations = 100

# RMSProp optimization loop
for i in range(no_iterations):
    # Compute the gradients for x and y
    grad_x = f_der_x(x)
    grad_y = f_der_y(y)
    
    # Update accumulated gradient squares for x and y
    gt_x = gamma * gt_x + (1 - gamma) * grad_x**2
    gt_y = gamma * gt_y + (1 - gamma) * grad_y**2
    
    # Update variables x and y
    x -= (lrate / (np.sqrt(gt_x) + epsilon)) * grad_x
    y -= (lrate / (np.sqrt(gt_y) + epsilon)) * grad_y
    
    # Show the progress
    print(f"Iteration = {i + 1}: x = {x}, y = {y}, f(x, y) = {f(x, y)}")

# Show the final x value and f value
print(f"Final: x = {x}, y = {y}, f(x, y) = {f(x, y)}")

# Bonus 1: L2 regularization
def f_with_l2(x, y, lambda_reg=0.1):
    '''Function with L2 regularization'''
    return f(x, y) + lambda_reg * (x**2 + y**2)

def f_der_x_l2(x, lambda_reg=0.1):
    '''Derivative of f(x, y) with respect to x with L2 regularization'''
    return f_der_x(x) + 2 * lambda_reg * x

def f_der_y_l2(y, lambda_reg=0.1):
    '''Derivative of f(x, y) with respect to y with L2 regularization'''
    return f_der_y(y) + 2 * lambda_reg * y

# Implement RMSProp with L2 regularization
x, y = 10.0, -10.0
gt_x, gt_y = 0, 0
lambda_reg = 0.1  # Regularization strength

for i in range(no_iterations):
    grad_x = f_der_x_l2(x, lambda_reg)
    grad_y = f_der_y_l2(y, lambda_reg)
    
    gt_x = gamma * gt_x + (1 - gamma) * grad_x**2
    gt_y = gamma * gt_y + (1 - gamma) * grad_y**2
    
    x -= (lrate / (np.sqrt(gt_x) + epsilon)) * grad_x
    y -= (lrate / (np.sqrt(gt_y) + epsilon)) * grad_y
    
    print(f"Iteration = {i + 1}: x = {x}, y = {y}, f(x, y) = {f_with_l2(x, y, lambda_reg)}")

# Show the final x value and f value with L2 regularization
print(f"Final (with L2): x = {x}, y = {y}, f(x, y) = {f_with_l2(x, y, lambda_reg)}")

# Bonus 2: Implement AdaDelta
def adadelta_optimizer(x, y, gamma=0.8, epsilon=1e-8, lrate=0.01, no_iterations=100):
    '''AdaDelta optimizer with RMSProp-like behavior'''

    # Accumulated squared gradients for x and y
    gt_x, gt_y = 0, 0
    # Accumulated squared updates for x and y
    st_x, st_y = 0, 0

    for i in range(no_iterations):
        # Compute the gradients for x and y
        grad_x = f_der_x(x)
        grad_y = f_der_y(y)
        
        # Update accumulated gradient squares (gt) for x and y
        gt_x = gamma * gt_x + (1 - gamma) * grad_x**2
        gt_y = gamma * gt_y + (1 - gamma) * grad_y**2
        
        # Update accumulated squared updates (st) for x and y
        st_x = gamma * st_x + (1 - gamma) * grad_x**2
        st_y = gamma * st_y + (1 - gamma) * grad_y**2
        
        # Compute the update based on AdaDelta formula
        delta_x = np.sqrt((st_x + epsilon) / (gt_x + epsilon)) * grad_x
        delta_y = np.sqrt((st_y + epsilon) / (gt_y + epsilon)) * grad_y
        
        # Update the variables x and y
        x -= lrate * delta_x
        y -= lrate * delta_y
        
        # Show the progress
        print(f"Iteration = {i + 1}: x = {x}, y = {y}, f(x, y) = {f(x, y)}")
    
    return x, y

# Run AdaDelta
x, y = 10.0, -10.0
x, y = adadelta_optimizer(x, y)

# Show the final x value and f value after AdaDelta
print(f"Final (AdaDelta): x = {x}, y = {y}, f(x, y) = {f(x, y)}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Define the true function
# Suppose the true error function is cosine function
def true_function(X):
    return np.cos(X)

# Define the RBF (Radial Basis Function) kernel (Gaussian Kernel)
# Computes the RBF (Gaussian) kernel between two vectors X1 and X2.
# X1 : numpy array 1D (size=n)
# X2 : numpy array 1D (size=m)
# lambda(𝜆) : Kernel coefficient for RBF
# returns:
# K : numpy array of shape (size of X1=n, size of X2=m).
def rbf_kernel(X1, X2, lambda_val=1.0):
    '''RBF kernel (Gaussian kernel) implementation'''
    # Compute squared distances
    sq_dist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) - 2 * np.dot(X1, X2.T)
    # Apply the Gaussian kernel formula
    K = np.exp(-lambda_val * sq_dist)
    return K

# Define the Gaussian Process Regression function
def gaussian_regression(X_train, y_train, X_test, lambda_val=1.0):
    '''Perform Gaussian Process Regression'''
    # Step 1: Compute the covariance matrix K(X_train, X_train)
    K = rbf_kernel(X_train, X_train, lambda_val)  # Covariance of the training data
    
    # Step 2: Compute the covariance between X_train and X_test: k(X_train, X_test)
    K_s = rbf_kernel(X_train, X_test, lambda_val)  # Covariance between X_train and X_test
    
    # Step 3: Compute the covariance matrix K(X_test, X_test)
    K_ss = rbf_kernel(X_test, X_test, lambda_val)  # Covariance of the test data
    
    # Step 4: Compute the inverse of K(X_train, X_train)
    K_inv = np.linalg.inv(K + 1e-8 * np.eye(len(X_train)))  # Regularize with small epsilon to avoid singular matrix
    
    # Step 5: Compute the mean of the posterior predictive distribution
    mu_star = np.dot(K_s.T, np.dot(K_inv, y_train))
    
    # Step 6: Compute the covariance of the posterior predictive distribution
    sigma_star = K_ss - np.dot(K_s.T, np.dot(K_inv, K_s))
    
    return mu_star, sigma_star

# Suppose you did the following 6 experiments changing the values of ‘X_train’.
# ‘y_train’ is its corresponding error function value
X_train = np.array([[1], [3], [5], [6], [7], [8]])
y_train = true_function(X_train).ravel()

# Let’s estimate the error function value when x=2.2
X_test = np.array([[2.2]])

# 16. 1)-3: Estimate mean and covariance of ‘X_test’
mu_star, sigma_star = gaussian_regression(X_train, y_train, X_test)

# Compute standard deviation (diagonal values of covariance matrix)
sd = np.sqrt(np.diagonal(sigma_star))

print(f"Mean for X_test=2.2: {mu_star}")
print(f"Standard deviation for X_test=2.2: {sd}")

# 2) Change X_test to X_test=np.array([[3.4]]). Compute mean and s.d. of X_test=3.4.
X_test = np.array([[3.4]])
mu_star, sigma_star = gaussian_regression(X_train, y_train, X_test)
sd = np.sqrt(np.diagonal(sigma_star))

print(f"Mean for X_test=3.4: {mu_star}")
print(f"Standard deviation for X_test=3.4: {sd}")

# 3) Change X_test to a set of values (0 to 10 in 10 steps)
X_test = np.linspace(0, 10, 10).reshape(-1, 1)

# Compute mean and standard deviation for the new test points
mu_star, sigma_star = gaussian_regression(X_train, y_train, X_test)
sd = np.sqrt(np.diagonal(sigma_star))

# Plotting the results
plt.figure()
plt.plot(X_test, true_function(X_test), 'r:', label="True function")
plt.plot(X_train, y_train, 'r.', markersize=10, label="Training data")
plt.plot(X_test, mu_star, 'b-', label="Prediction")
plt.fill_between(X_test.ravel(), mu_star - 1.96*sd, mu_star + 1.96*sd, alpha=0.2, color='b', label="Confidence interval")
plt.xlabel('X')
plt.ylabel('y')
plt.title('Gaussian Process Regression')
plt.legend()
plt.show()

# 5) Lower Confidence Bound (LCB)
# For each value in X_test, compute LCB(x) = mu(x) - k * sigma(x)
k = 1.96  # For 95% confidence interval, you can adjust 'k' as needed
LCB = mu_star - k * sd

print(f"Lower Confidence Bound (LCB) for each X_test value:")
print(LCB)

# You can choose the next X value based on LCB values (e.g., choose the X value with the minimum LCB for exploration)


: 