# Problem Statement: **BONUS EXERCISE**

Imports and CUDA

In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Check if CUDA (GPU) is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


Problem 1: **Gradient Descent for Demand Forecasting at AtliQ**

AtliQ wants to optimize the prediction of regional product demands using gradient descent.

Assume the loss function is

$$L(w)=(w−4)^2$$

where **w** is a weight parameter initialized at 0.

**Write code to:**

* Perform 10 iterations of gradient descent using a learning rate of 0.1.

* Print the weight **w** at each step.



In [5]:
learning_rate = 0.1
w = 0.0

for i in range (10):
  gradient = 2*(w-4)
  w =w-learning_rate*gradient
  print(f"Step {i+1}: w = {w:.4f}")

Step 1: w = 0.8000
Step 2: w = 1.4400
Step 3: w = 1.9520
Step 4: w = 2.3616
Step 5: w = 2.6893
Step 6: w = 2.9514
Step 7: w = 3.1611
Step 8: w = 3.3289
Step 9: w = 3.4631
Step 10: w = 3.5705




---



Problem 2: **Momentum for Contour Navigation in AtliQ's Supply Chain**

AtliQ's supply chain optimization problem is represented by a contour map of a quadratic function:

$$f(x,y)=x^2 +3y^2$$

Write a code to implement gradient descent (5 iterations) with momentum to minimize this function.

Use:
* Initial point (x, y) = (2, 2)
* Learning rate (η) = 0.1
* Momentum Coefficient (β)) = 0.9

In [None]:
def gradient(x, y):
  grad_x=2*x
  grad_y=6*y
  return # Code Here (Gradients of f(x, y))

x, y =2,2
learning_rate =0.1
momentum =0.9
vx, vy = 0.0, 0.0 # initialized velocity

for i in range(5):
  dx, dy = gradient(x, y)
  vx = # Code Here
  vy = # Code Here
  x += vx
  y += vy
  print(f"Step {i+1}: x = {x:.4f}, y = {y:.4f}")

In [6]:
# Initialize parameters
x, y = 2, 2  # Initial point
learning_rate = 0.1  # Learning rate
beta = 0.9  # Momentum coefficient
iterations = 5  # Number of iterations

# Initialize velocities
v_x, v_y = 0, 0  # Initial velocities

print("Iteration |     x       |     y       |   f(x, y)")
print("------------------------------------------------")

# Gradient descent with momentum loop
for i in range(1, iterations + 1):
    # Compute gradients
    grad_x = 2 * x
    grad_y = 6 * y
    
    # Update velocities
    v_x = beta * v_x + learning_rate * grad_x
    v_y = beta * v_y + learning_rate * grad_y
    
    # Update parameters
    x = x - v_x
    y = y - v_y
    
    # Compute the function value
    f_xy = x**2 + 3 * y**2
    
    # Print the iteration details
    print(f"{i:9} | {x:.6f} | {y:.6f} | {f_xy:.6f}")


Iteration |     x       |     y       |   f(x, y)
------------------------------------------------
        1 | 1.600000 | 0.800000 | 4.480000
        2 | 0.920000 | -0.760000 | 2.579200
        3 | 0.124000 | -1.708000 | 8.767168
        4 | -0.617200 | -1.536400 | 7.462511
        5 | -1.160840 | -0.460120 | 1.982681




---



Problem 3: **RMS Prop for AtliQ's Dynamic Pricing Optimization**

AtliQ's AI model adjusts product prices dynamically. Implement the RMSProp optimizer for minimizing the function:

$$f(w) = w^2 + 5$$

Use:

* Initial weight (𝑤) = 5.0
* Learning rate (η) = 0.01
* Momentum Coefficient(β)=0.9


Run the optimization for 15 iterations and print the weight updates.

In [None]:
def gradient(w):
  grad=2*w
  return # Code Here (Gradients of f(w))

w =5.0
learning_rate =0.01
beta =0.9
epsilon = 1e-8
squared_gradient_average = 0.0 # initialized squared gradient average

for i in range(15):
  grad = gradient(w)
  squared_gradient_average = # Code Here
  w = w-beta*grad**2
  print(f"Step {i+1}: w = {w:.4f}")



---



Problem 4: **Adam Optimizer for AtliQ AI Models**

AtliQ is training an AI model to recommend warehouse restocking schedules. Use the Adam optimizer to minimize the function:

$$f(x) = x^4 - 3x^3 + 2$$

Write code to:

* Initialize x = 3.0

Run the optimizations for 19 iterations (starting from 1) with:
* Learning rate (η) = 0.01
* Momentum Coefficients: β1 = 0.9, β2 = 0.09


In [None]:
def gradient(x):
  return # Code Here (Gradients of f(x))

x =
learning_rate =
beta1, beta2 =
epsilon =
first_moment, second_moment = 0.0, 0.0 # initialized first and second moment

for t in range(1, 20):
  grad = gradient(x)
  m = # Code here (update biased first moment)
  v = # Code here (update biased second moment)
  m_hat = # Code here (corrected first moment)
  v_hat = # Code here (corrected second moment)
  x = # Code here (update rule)
  first_moment, second_moment = m, v
  print(f"Step {t}: x = {x:.4f}")





---

