Name: Soumya Dayal \
Gradient descent implementation on Iris dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Generate Synthetic Data
# generate dataset
np.random.seed(42)

# generate 200 x values in [-10, 10]
x = np.random.uniform(-10, 10, 200)

# target function given: y = 5𝑥4 + 2𝑥3 + 9𝑥 + 12.
y = 5*x**4 + 2*x**3 + 9*x + 12

# starting with non_bias polynomial features
poly_no_bias = np.column_stack([x**4, x**3, x**2, x])

# The bias term X_bias is initialized to 1.
bias = np.ones_like(x)

In [None]:
# scaling to converge answer
mean = poly_no_bias.mean(axis=0)
stds = poly_no_bias.std(axis=0, ddof=0)
stds[stds == 0] = 1.0       # to avoid zero std
poly_scaled = (poly_no_bias - mean) / stds

# final x_matrix
x_matrix = np.column_stack([bias, poly_scaled])

print('Shape of x matrix:', x_matrix.shape)
print("first 3 rows: \n", x_matrix[:3])   #for testing/visualization purposes

Shape of x matrix: (200, 5)
first 3 rows: 
 [[ 1.00000000e+00 -7.81155155e-01  1.69306841e-05 -9.77379746e-01
  -3.72139705e-01]
 [ 1.00000000e+00  1.76866569e+00  1.98033190e+00  1.60087943e+00
   1.58661523e+00]
 [ 1.00000000e+00 -6.16492142e-01  3.06184935e-01 -4.53468178e-01
   8.43056075e-01]]


In [None]:
# computing error function - RMSE
# using formaula- J(θ) = (1/n) * sum_i (y_i - y_hat_i)^2
def compute_rmse(y_true, y_hat):
  residual = y_true - y_hat
  return np.sqrt(np.mean(residual**2))

In [None]:
# gradient descent implementation
def gradient_descent(x, y, lr=0.001, iterations=1000, verbose=False):
  n_samples, n_features = x.shape

  # small initialization (zeros) since large random values can lead to first huge predictions
  # resulting to huge residuals and gradients (leading to nan rmse values)
  theta = np.zeros(n_features, dtype=float)
  rmse_history = []

  for i in range(iterations):
    y_hat = x.dot(theta)        # y_hat = theta . x (from lecture)
    error = y_hat - y           # error vector/residual = y_hat - y
    gradients = (2.0/n_samples) * x.T.dot(error)      # gradient of MSE - θ: (2/n) X^T (Xθ - y)
    theta -= lr * gradients

    #track error to print
    rmse = compute_rmse(y, y_hat)
    rmse_history.append(rmse)

    # added as i was getting nan results for RMSE
    if not np.isfinite(rmse):
      if verbose:
        print(f"Stopped early at iteration {i} (numerical instability).")
      break

  return rmse_history

In [None]:
# Hyperparamters and Submission

learning_rates = [0.001, 0.0005, 0.01]
for lr in learning_rates:
  rmse_history = gradient_descent(x_matrix, y, lr=lr, iterations=2000, verbose=False)
  print(f"Learning Rate: {lr}; RMSE: {rmse_history[-1]:.4f}")

Learning Rate: 0.001; RMSE: 1555.5013
Learning Rate: 0.0005; RMSE: 2185.4615
Learning Rate: 0.01; RMSE: 377.8254


In [None]:
# implementation of part 2 of the assignment begins here
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load and Preprocess the Iris Dataset
# load dataset
iris = load_iris()
X = iris.data
Y = iris.target

# restric to first 2 classes -> 0 = setose 1 = versicolor
mask = Y < 2
X = X[mask]
Y = Y[mask]

# splitting data into training/test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# scaling data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# printing for visualization purposes
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)
print("Classes: ", np.unique(Y_train))

X_train shape: (80, 4)
Y_train shape: (80,)
X_test shape: (20, 4)
Y_test shape: (20,)
Classes:  [0 1]


In [None]:
# model training
from sklearn.linear_model import LogisticRegression

# instatiating with defaults
logistic_regression = LogisticRegression()
logistic_regression.fit(X_train, Y_train)

In [None]:
# make predictions
y_hat = logistic_regression.predict(X_test)
print("Predicted classes: ", y_hat)
print("Actual classes:    ", Y_test)

Predicted classes:  [1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]
Actual classes:     [1 1 1 0 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0]


In [None]:
# performance metrics calculation

TP = np.sum((y_hat == 1) & (Y_test == 1))   # actual = 1, predicted = 1
TN = np.sum((y_hat == 0) & (Y_test == 0))   # actual = 0, predicted = 1
FP = np.sum((y_hat == 1) & (Y_test == 0))   # actual = 0, predicted = 1
FN = np.sum((y_hat == 0) & (Y_test == 1))   # actual = 1, predicted = 0

# print the True Positives (TP), True Negatives (TN), False
# Positives (FP), and False Negatives (FN) using the actual and predicted classes
print("True Positives (TP):", TP)
print("True Negatives (TN):", TN)
print("False Positives (FP):", FP)
print("False Negatives (FN):", FN)

True Positives (TP): 8
True Negatives (TN): 12
False Positives (FP): 0
False Negatives (FN): 0


In [None]:
# calculate and print the Root Mean Squared Error (RMSE).

# using function from above
RMSE = compute_rmse(Y_test, y_hat)
print("Root Mean Squared Error (RMSE):", RMSE)

Root Mean Squared Error (RMSE): 0.0
