<a href="https://colab.research.google.com/github/raihanewubd/MLSummer24/blob/main/Gradient_DescentV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Libraries

In [None]:
!pip install ucimlrepo --quiet
!pip install seaborn --quiet
!pip install openpyxl==3.0.10 --quiet

# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression

# Load California Housing Dataset

In [None]:
housing = fetch_california_housing(as_frame=True)
X = pd.DataFrame(data=housing.data, columns=housing.feature_names)
y = pd.DataFrame(data=housing.target, columns=['MedHouseVal'])

**EDA of the dataset**

In [None]:
# View first few rows
print(X.head())

# Get summary statistics
print(X.describe())

In [None]:
sns.pairplot(X)
plt.show()

In [None]:
correlation_matrix = X.corr()
# Check the available columns in the correlation matrix
print(correlation_matrix.columns)

# Assuming 'MedHouseVal' is in a separate DataFrame called 'y':
correlations_with_target = X.corrwith(y['MedHouseVal'])
print(correlations_with_target.sort_values(ascending=False))

**Split data into training and test**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X[['MedInc']], y, test_size=0.2, random_state=42)  # Replace with your selected features

In [None]:
print(f"X_train shape: {X_train.shape} type: {type(X_train)}")
print(f"y_train shape: {y_train.shape} type: {type(X_train)}")
print(f"X_test shape: {X_test.shape} type: {type(X_train)}")
print(f"y_test shape: {y_test.shape} type: {type(X_train)}")

# Simple Linear Regression with Gradient Descent

In [None]:
class SimpleLinearRegressionGD:
    def __init__(self):
        self.intercept = None
        self.slope = None
        self.intercept_history = []
        self.slope_history = []
        self.cost_history = []

    def fit(self, X, y, learning_rate=0.001, num_iterations=1000):
        print("Inside the fit method...")
        X = X.values.flatten()
        y = y.values.flatten()

        # Initialize parameters
        self.intercept = 0
        self.slope = 0

        m = len(y)

        for _ in range(num_iterations):
            # Calculate partial derivatives
            y_pred = self.predict(X)
            dJ_dintercept = (1/m) * np.sum(y_pred - y)
            dJ_dslope = (1/m) * np.sum((y_pred - y) * X)

            # Update parameters
            self.intercept -= learning_rate * dJ_dintercept
            self.slope -= learning_rate * dJ_dslope
            self.intercept_history.append(self.intercept)
            self.slope_history.append(self.slope)
            self.cost_history.append(np.mean((y_pred - y)**2))

        print("Learned intercept:", self.intercept)
        print("Learned slope:", self.slope)

    def predict(self, X):
        #X = X.values.flatten()
        return self.intercept + self.slope * X

**Training and predicting**

In [None]:
print(X_train.shape, y_train.shape, X_train.dtypes, y_train.dtypes)
print(X_test.shape, y_test.shape, X_test.dtypes, y_test.dtypes)
# Assuming X_train and y_train are already defined
model = SimpleLinearRegressionGD()
#try:
print("Starting the fit method...")
model.fit(X_train, y_train)
print("Fit method complete.")
#except Exception as e:
#    print(f"An error occurred during fitting: {e}")
print(f"self.intercept = {model.intercept}")
print(f"self.slope = {model.slope}")
# Make predictions on new data
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print('y_pred:', y_pred.shape)
print('MSE:', mse)
print('RMSE:', rmse)
print('R-squared:', r2)

**Plot the learning curve**

In [None]:
# Assuming you have trained your model as 'model'
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(y=model.intercept_history, mode='lines', name='Intercept'))
fig.add_trace(go.Scatter(y=model.slope_history, mode='lines', name='Slope'))
fig.add_trace(go.Scatter(y=model.cost_history, mode='lines', name='Cost'))

fig.update_layout(title='Training History',
                  xaxis_title='Iteration',
                  yaxis_title='Value')

fig.show()

**Plot the search space of simple linear regression**

In [None]:
intercept_history = []
slope_history = []
cost_history = []

# Generate a range of values for slope and intercept
slope_range = np.linspace(-10, 10, 100)
intercept_range = np.linspace(-10, 10, 100)

# Create a meshgrid from the ranges
slope_grid, intercept_grid = np.meshgrid(slope_range, intercept_range)

# Calculate the cost function (MSE) for each combination of slope and intercept
# Assuming X_train and y_train are defined from your previous code
cost_grid = np.zeros_like(slope_grid)
for i in range(slope_grid.shape[0]):
    for j in range(slope_grid.shape[1]):
        y_pred = slope_grid[i, j] * X_train.values + intercept_grid[i, j]
        cost_grid[i, j] = np.mean((y_pred - y_train.values)**2)



In [None]:
import plotly.graph_objects as go
import numpy as np



# Create a Plotly figure
fig = go.Figure(data=[go.Surface(z=cost_grid, x=slope_grid, y=intercept_grid)])

# Update the layout
fig.update_layout(title='Search Space for Single Linear Regression',
                  scene=dict(xaxis_title='Slope',
                             yaxis_title='Intercept',
                             zaxis_title='Cost (MSE)'))

# Display the plot
fig.show()

**Actual vs predicted plot**

In [None]:


import plotly.graph_objects as go

# Assuming X_test, y_test, and y_pred are pandas DataFrames
fig = go.Figure()

# Add a scatter plot for the actual values
fig.add_trace(go.Scatter(x=X_test.values.flatten(), y=y_test.values.flatten(), mode='markers', name='Actual', marker=dict(symbol='circle')))

# Add a scatter plot for the predicted values
fig.add_trace(go.Scatter(x=X_test.values.flatten(), y=y_pred.values.flatten(), mode='markers', name='Predicted', marker=dict(symbol='diamond')))

# Update the layout
fig.update_layout(title='Actual vs Predicted Values',
                  xaxis_title='MedInc',
                  yaxis_title='MedHouseVal')

# Display the plot
fig.show()

**Plot Intercept, Slope and Cost**

In [None]:
fig = go.Figure(data=[go.Scatter3d(
    x=model.intercept_history,
    y=model.slope_history,
    z=model.cost_history,
    mode='markers',
    marker=dict(
        size=5,
        color=model.cost_history,                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
)])

fig.update_layout(
    title='3D Scatter Plot',
    scene=dict(
        xaxis_title='Intercept History',
        yaxis_title='Slope History',
        zaxis_title='Cost History'
    )
)

fig.show()

**Plot Intercept, Slope and Cost with respect to search space**

In [None]:
import numpy as np

# Define the range for intercept and slope
intercept_range = np.linspace(min(model.intercept_history), max(model.intercept_history), 50)
slope_range = np.linspace(min(model.slope_history), max(model.slope_history), 50)

# Create a mesh grid
intercept_grid, slope_grid = np.meshgrid(intercept_range, slope_range)

# Calculate the cost for each point in the grid (assuming a simple linear regression cost function)
cost_grid = np.zeros(intercept_grid.shape)
for i in range(intercept_grid.shape[0]):
    for j in range(intercept_grid.shape[1]):
        intercept = intercept_grid[i, j]
        slope = slope_grid[i, j]
        # Assuming model.X and model.y are your data points
        predictions = intercept + slope * X_train.values
        cost_grid[i, j] = np.mean((predictions - y_train.values) ** 2)

# Add the search space to the plot
fig.add_trace(go.Surface(
    x=intercept_grid,
    y=slope_grid,
    z=cost_grid,
    colorscale='Viridis',
    opacity=0.5,
    showscale=False
))

fig.show()

# Simple Linear Regression with Stochastic Gradient Descent

In [None]:
class SimpleLinearRegressionSGD:
    def __init__(self):
        self.intercept = None
        self.slope = None
        self.intercept_history = []
        self.slope_history = []
        self.cost_history = []

    def fit(self, X, y, learning_rate=0.001, num_iterations=1000):
        print("Inside the fit method...")
        X = X.values.flatten()
        y = y.values.flatten()

        # Initialize parameters
        self.intercept = 0
        self.slope = 0

        m = len(y)

        for _ in range(num_iterations):
            # Shuffle data for stochasticity
            random_index = np.random.randint(0, m)
            xi = X[random_index]
            yi = y[random_index]

            # Calculate prediction for the single data point
            y_pred_i = self.predict(xi)

            # Calculate partial derivatives for the single data point
            dJ_dintercept = 2 * (y_pred_i - yi)
            dJ_dslope = 2 * (y_pred_i - yi) * xi

            # Update parameters
            self.intercept -= learning_rate * dJ_dintercept
            self.slope -= learning_rate * dJ_dslope

            # Store history (optional)
            self.intercept_history.append(self.intercept)
            self.slope_history.append(self.slope)

            # Calculate cost for the entire dataset (optional)
            y_pred_all = self.predict(X)
            cost = np.mean((y_pred_all - y)**2)
            self.cost_history.append(cost)

        print("Learned intercept:", self.intercept)
        print("Learned slope:", self.slope)

    def predict(self, X):
        if isinstance(X, np.ndarray):  # Handle both single value and array inputs
            return self.intercept + self.slope * X
        else:
            return self.intercept + self.slope * np.array(X)  # Convert to array if needed

**Training and predicting**

In [None]:
print(X_train.shape, y_train.shape, X_train.dtypes, y_train.dtypes)
print(X_test.shape, y_test.shape, X_test.dtypes, y_test.dtypes)
# Assuming X_train and y_train are already defined
model = SimpleLinearRegressionSGD()
#try:
print("Starting the fit method...")
model.fit(X_train, y_train)
print("Fit method complete.")
#except Exception as e:
#    print(f"An error occurred during fitting: {e}")
print(f"self.intercept = {model.intercept}")
print(f"self.slope = {model.slope}")
# Make predictions on new data
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print('y_pred:', y_pred.shape)
print('MSE:', mse)
print('RMSE:', rmse)
print('R-squared:', r2)

**Plot Intercept, Slope and Cost**

In [None]:
fig = go.Figure(data=[go.Scatter3d(
    x=model.intercept_history,
    y=model.slope_history,
    z=model.cost_history,
    mode='markers',
    marker=dict(
        size=5,
        color=model.cost_history,                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
)])

fig.update_layout(
    title='3D Scatter Plot',
    scene=dict(
        xaxis_title='Intercept History',
        yaxis_title='Slope History',
        zaxis_title='Cost History'
    )
)

fig.show()

**Plot Intercept, Slope and Cost with respect to search space**

In [None]:
import numpy as np

# Define the range for intercept and slope
intercept_range = np.linspace(min(model.intercept_history), max(model.intercept_history), 50)
slope_range = np.linspace(min(model.slope_history), max(model.slope_history), 50)

# Create a mesh grid
intercept_grid, slope_grid = np.meshgrid(intercept_range, slope_range)

# Calculate the cost for each point in the grid (assuming a simple linear regression cost function)
cost_grid = np.zeros(intercept_grid.shape)
for i in range(intercept_grid.shape[0]):
    for j in range(intercept_grid.shape[1]):
        intercept = intercept_grid[i, j]
        slope = slope_grid[i, j]
        # Assuming model.X and model.y are your data points
        predictions = intercept + slope * X_train.values
        cost_grid[i, j] = np.mean((predictions - y_train.values) ** 2)

# Add the search space to the plot
fig.add_trace(go.Surface(
    x=intercept_grid,
    y=slope_grid,
    z=cost_grid,
    colorscale='Viridis',
    opacity=0.5,
    showscale=False
))

fig.show()

# Simple Linear Regression with Adam

In [None]:
import numpy as np

class SimpleLinearRegressionAdam:
    def __init__(self):
        self.intercept = None
        self.slope = None
        self.intercept_history = []
        self.slope_history = []
        self.cost_history = []

    def fit(self, X, y, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8, num_iterations=1000):
        print("Inside the fit method...")
        X = X.values.flatten()
        y = y.values.flatten()

        # Initialize parameters
        self.intercept = 0
        self.slope = 0

        m = len(y)

        # Initialize Adam parameters
        v_intercept = 0
        v_slope = 0
        s_intercept = 0
        s_slope = 0
        t = 0

        for _ in range(num_iterations):
            t += 1  # Iteration counter

            # Shuffle data (for stochasticity)
            random_index = np.random.randint(0, m)
            xi = X[random_index]
            yi = y[random_index]

            # Calculate prediction for the single data point
            y_pred_i = self.predict(xi)

            # Calculate partial derivatives for the single data point
            dJ_dintercept = 2 * (y_pred_i - yi)
            dJ_dslope = 2 * (y_pred_i - yi) * xi

            # Update biased first moment estimates
            v_intercept = beta1 * v_intercept + (1 - beta1) * dJ_dintercept
            v_slope = beta1 * v_slope + (1 - beta1) * dJ_dslope

            # Update biased second raw moment estimates
            s_intercept = beta2 * s_intercept + (1 - beta2) * dJ_dintercept**2
            s_slope = beta2 * s_slope + (1 - beta2) * dJ_dslope**2

            # Compute bias-corrected first moment estimates
            v_intercept_corrected = v_intercept / (1 - beta1**t)
            v_slope_corrected = v_slope / (1 - beta1**t)

            # Compute bias-corrected second raw moment estimates
            s_intercept_corrected = s_intercept / (1 - beta2**t)
            s_slope_corrected = s_slope / (1 - beta2**t)

            # Update parameters using Adam
            self.intercept -= learning_rate * v_intercept_corrected / (np.sqrt(s_intercept_corrected) + epsilon)
            self.slope -= learning_rate * v_slope_corrected / (np.sqrt(s_slope_corrected) + epsilon)

            # Store history (optional)
            self.intercept_history.append(self.intercept)
            self.slope_history.append(self.slope)

            # Calculate cost for the entire dataset (optional)
            y_pred_all = self.predict(X)
            cost = np.mean((y_pred_all - y)**2)
            self.cost_history.append(cost)

        print("Learned intercept:", self.intercept)
        print("Learned slope:", self.slope)

    def predict(self, X):
        if isinstance(X, np.ndarray):  # Handle both single value and array inputs
            return self.intercept + self.slope * X
        else:
            return self.intercept + self.slope * np.array(X)  # Convert to array if needed

**Training and predicting**

In [None]:
print(X_train.shape, y_train.shape, X_train.dtypes, y_train.dtypes)
print(X_test.shape, y_test.shape, X_test.dtypes, y_test.dtypes)
# Assuming X_train and y_train are already defined
model = SimpleLinearRegressionAdam()
#try:
print("Starting the fit method...")
model.fit(X_train, y_train)
print("Fit method complete.")
#except Exception as e:
#    print(f"An error occurred during fitting: {e}")
print(f"self.intercept = {model.intercept}")
print(f"self.slope = {model.slope}")
# Make predictions on new data
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print('y_pred:', y_pred.shape)
print('MSE:', mse)
print('RMSE:', rmse)
print('R-squared:', r2)

**Plot Intercept, Slope and Cost**

In [None]:
fig = go.Figure(data=[go.Scatter3d(
    x=model.intercept_history,
    y=model.slope_history,
    z=model.cost_history,
    mode='markers',
    marker=dict(
        size=5,
        color=model.cost_history,                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
)])

fig.update_layout(
    title='3D Scatter Plot',
    scene=dict(
        xaxis_title='Intercept History',
        yaxis_title='Slope History',
        zaxis_title='Cost History'
    )
)

fig.show()

**Plot Intercept, Slope and Cost with respect to search space**

In [None]:
# Define the range for intercept and slope
intercept_range = np.linspace(min(model.intercept_history), max(model.intercept_history), 50)
slope_range = np.linspace(min(model.slope_history), max(model.slope_history), 50)

# Create a mesh grid
intercept_grid, slope_grid = np.meshgrid(intercept_range, slope_range)

# Calculate the cost for each point in the grid (assuming a simple linear regression cost function)
cost_grid = np.zeros(intercept_grid.shape)
for i in range(intercept_grid.shape[0]):
    for j in range(intercept_grid.shape[1]):
        intercept = intercept_grid[i, j]
        slope = slope_grid[i, j]
        # Assuming model.X and model.y are your data points
        predictions = intercept + slope * X_train.values
        cost_grid[i, j] = np.mean((predictions - y_train.values) ** 2)

# Add the search space to the plot
fig.add_trace(go.Surface(
    x=intercept_grid,
    y=slope_grid,
    z=cost_grid,
    colorscale='Viridis',
    opacity=0.5,
    showscale=False
))

fig.show()

# Basic ANN

In [None]:
import numpy as np

class SimpleANN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.weight_history = []
        self.cost_history = []

        # Initialize weights with random values
        self.weights1 = np.random.randn(self.input_size, self.hidden_size)
        self.bias1 = np.zeros((1, self.hidden_size))
        self.weights2 = np.random.randn(self.hidden_size, self.output_size)
        self.bias2 = np.zeros((1, self.output_size))

    def forward(self, X):
        # Forward propagation
        self.hidden_layer_input = np.dot(X, self.weights1) + self.bias1
        self.hidden_layer_output = self.sigmoid(self.hidden_layer_input)
        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights2) + self.bias2
        self.predicted_output = self.sigmoid(self.output_layer_input)
        return self.predicted_output

    def sigmoid(self, z):
        # Sigmoid activation function
        return 1 / (1 + np.exp(-z))

    def sigmoid_derivative(self, z):
        # Derivative of sigmoid function
        return self.sigmoid(z) * (1 - self.sigmoid(z))

    def backward(self, X, y, learning_rate):
        # Backpropagation
        error = y - self.predicted_output
        d_predicted_output = error * self.sigmoid_derivative(self.output_layer_input)

        error_hidden_layer = d_predicted_output.dot(self.weights2.T)
        d_hidden_layer = error_hidden_layer * self.sigmoid_derivative(self.hidden_layer_input)

        # Update weights and biases
        self.weights2 += self.hidden_layer_output.T.dot(d_predicted_output) * learning_rate
        self.bias2 += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate
        self.weights1 += X.T.dot(d_hidden_layer) * learning_rate
        self.bias1 += np.sum(d_hidden_layer, axis=0, keepdims=True) * learning_rate
        self.weight_history.append([self.weights1.copy(), self.weights2.copy()])

    def train(self, X, y, learning_rate, num_iterations):
        # Training loop
        for _ in range(num_iterations):
            self.forward(X)
            self.backward(X, y, learning_rate)
            predictions = self.forward(X)
            cost = np.mean((predictions - y) ** 2)
            self.cost_history.append(cost)

# Example usage:
# Create a SimpleANN with 2 inputs, 5 hidden neurons, and 1 output
model = SimpleANN(input_size=2, hidden_size=5, output_size=1)

# Some sample input data and labels
X = np.array([[0.5, 0.3], [0.2, 0.7]])
y = np.array([[0.8], [0.3]])

# Train the model
model.train(X, y, learning_rate=0.1, num_iterations=1000)

# Get predictions after training
predictions = model.forward(X)
print(predictions)

In [None]:
# Select two weights to visualize (adjust indices as needed)
weight1_idx = (0, 0)  # First weight in the first layer
weight2_idx = (0, 0)  # First weight in the second layer

# Create a grid of weight values
weight1_values = np.linspace(-10, 10, 30)
weight2_values = np.linspace(-10, 10, 30)
weight1_grid, weight2_grid = np.meshgrid(weight1_values, weight2_values)

# Evaluate cost for each weight combination
cost_grid = np.zeros_like(weight1_grid)
for i in range(weight1_grid.shape[0]):
    for j in range(weight1_grid.shape[1]):
        # Temporarily modify the selected weights
        model.weights1[weight1_idx] = weight1_grid[i, j]
        model.weights2[weight2_idx] = weight2_grid[i, j]

        # Calculate cost
        predictions = model.forward(X)
        cost = np.mean((predictions - y) ** 2)
        cost_grid[i, j] = cost

        # Reset weights to their original values (important!)
        model.weights1[weight1_idx] = model.weight_history[-1][0][weight1_idx]
        model.weights2[weight2_idx] = model.weight_history[-1][1][weight2_idx]

# Create the 3D surface plot
fig = go.Figure(data=[go.Surface(z=cost_grid, x=weight1_grid, y=weight2_grid)])
fig.update_layout(title='Search Space Visualization', scene=dict(xaxis_title='Weight 1', yaxis_title='Weight 2', zaxis_title='Cost'))
fig.show()

# Lab Task


1.   Predict the california house prise using basic ANN. The basic ANN will have three layers. 1) Input layer with one input, 2) Hidden layer with N number of neurons and use sigmod activation function, 3)Output layer with one output. Use the three different Gradient Descent algorithms (Gradient Descent,  Stochatic Gradient Descent, and  Adam Optimizer) as back propagation
2.   Plot the Actual vs Predicted House Price
3.   Plot the learning curve
4.   Plot weight, cost during in each iteration into the search space for
      *   Gradient Descent
      *   Stochatic Gradient Descent
      *   Adam Optimizer

**Submision**

1.   Present the code to instructor.
2.   Prepare report where you analysize the three different gradient descent algorithms and its performance.
3. Submit the code and report in classroom in a zip file




