<a href="https://colab.research.google.com/github/np03cs4a240249-ctrl/ai/blob/main/week5workshop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [72]:
def cost_function(X, Y, W):
    """ Parameters:
    This function finds the Mean Square Error.
    Input parameters:
    X: Feature Matrix
    Y: Target Matrix
    W: Weight Matrix
    Output Parameters:
    cost: accumulated mean square error.
    """
    # Your code here:
    m=len(Y)

    j = np.sum((X.dot(W)-Y)** 2)/(2*m)

    return j

In [73]:
import numpy as np

# Test data
X_test = np.array([[1, 2], [3, 4], [5, 6]])
Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])

# Compute cost
cost = cost_function(X_test, Y_test, W_test)

# Check the cost
if np.isclose(cost, 0):
    print("Proceed Further")
else:
    print("Something went wrong: Reimplement the cost function")
    print("Cost function output:", cost)


Proceed Further


In [74]:
from google.colab import drive
drive.mount('/content/drive')




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [75]:
def gradient_descent(X, Y, W, alpha, iterations):
    """
    Perform gradient descent to optimize the parameters of a linear regression model.
    Parameters:
    X (numpy.ndarray): Feature matrix (m x n).
    Y (numpy.ndarray): Target vector (m x 1).
    W (numpy.ndarray): Initial guess for parameters (n x 1).
    alpha (float): Learning rate.
    iterations (int): Number of iterations for gradient descent.
    Returns:


    tuple: A tuple containing the final optimized parameters (W) and the history of cost values
    .
    W (numpy.ndarray): Updated parameters (n x 1).
    cost_history (list): History of cost values over iterations.
    """
    # Initialize cost history
    cost_history = [0] * iterations

    # Number of samples
    m = len(Y)

    for iteration in range(iterations):
        # Step 1: Hypothesis Values
        Y_pred = X.dot(W)

        # Step 2: Difference between Hypothesis and Actual Y
        loss = Y_pred - Y

        # Step 3: Gradient Calculation
        dw = (X.T.dot(loss)) / m

        # Step 4: Updating Values of W using Gradient
        W = W - alpha * dw

        # Step 5: New Cost Value
        cost = cost_function(X, Y, W)
        cost_history[iteration] = cost

    return W, cost_history

In [76]:
import numpy as np

# Generate random test data
np.random.seed(0) # For reproducibility
X = np.random.rand(100, 3) # 100 samples, 3 features
Y = np.random.rand(100)
W = np.random.rand(3) # Initial guess for parameters
# Set hyperparameters
alpha = 0.01
iterations = 1000
# Test the gradient_descent function
final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)
# Print the final parameters and cost history
print("Final Parameters:", final_params)
print("Cost History:", cost_history)

Final Parameters: [0.20551667 0.54295081 0.10388027]
Cost History: [np.float64(0.10711197094660153), np.float64(0.10634880599939901), np.float64(0.10559826315680616), np.float64(0.10486012948320558), np.float64(0.1041341956428534), np.float64(0.10342025583900626), np.float64(0.1027181077540776), np.float64(0.1020275524908062), np.float64(0.10134839451441931), np.float64(0.1006804415957737), np.float64(0.1000235047554587), np.float64(0.09937739820884377), np.float64(0.09874193931205609), np.float64(0.09811694850887098), np.float64(0.09750224927850094), np.float64(0.0968976680842672), np.float64(0.09630303432313951), np.float64(0.09571818027612913), np.float64(0.09514294105952065), np.float64(0.09457715457692842), np.float64(0.09402066147216397), np.float64(0.09347330508290015), np.float64(0.09293493139511913), np.float64(0.09240538899833017), np.float64(0.09188452904154543), np.float64(0.0913722051899995), np.float64(0.09086827358260123), np.float64(0.09037259279010502), np.float64(0.08

In [77]:
def rmse(Y, Y_pred):
    """
    This Function calculates the Root Mean Squres.
    Input Arguments:
    Y: Array of actual(Target) Dependent Varaibles.
    Y_pred: Array of predeicted Dependent Varaibles.
    Output Arguments:
    rmse: Root Mean Square.
    """
    # Your Code Here
    rmse = np.sqrt(np.mean ((Y - Y_pred )** 2))
    return rmse

In [78]:
import numpy as np

def r2(Y, Y_pred):
    """
    This function calculates the R Squared score.

    Input Arguments:
    Y: Array of actual (target) dependent variables
    Y_pred: Array of predicted dependent variables

    Output:
    r2: R Squared score
    """
    mean_y = np.mean(Y)

    # Total sum of squares
    ss_tot = np.sum((Y - mean_y) ** 2)

    # Residual sum of squares
    ss_res = np.sum((Y - Y_pred) ** 2)

    # R-squared calculation
    r2 = 1 - (ss_res / ss_tot)

    return r2


In [79]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Main Function
def main():
    # Step 1: Load the dataset
    data = pd.read_csv('/content/drive/MyDrive/Bishal AI Nasty/student.csv')

    # Step 2: Split the data into features (X) and target (Y)
    X = data[['Math', 'Reading']].values   # Features
    Y = data['Writing'].values             # Target

    # Step 3: Split the data into training and test sets (80% train, 20% test)
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.2, random_state=42
    )

    # Step 4: Initialize weights, learning rate, and iterations
    W = np.zeros(X_train.shape[1])  # Initialize weights
    alpha = 0.00001                 # Learning rate
    iterations = 1000               # Number of iterations

    # Step 5: Perform Gradient Descent
    W_optimal, cost_history = gradient_descent(
        X_train, Y_train, W, alpha, iterations
    )

    # Step 6: Make predictions on the test set
    Y_pred = np.dot(X_test, W_optimal)

    # Step 7: Evaluate the model
    model_rmse = rmse(Y_test, Y_pred)
    model_r2 = r2(Y_test, Y_pred)

    # Step 8: Output the results
    print("Final Weights:", W_optimal)
    print("Cost History (First 10 iterations):", cost_history[:10])
    print("RMSE on Test Set:", model_rmse)
    print("R-Squared on Test Set:", model_r2)


# Execute the main function
if __name__ == "__main__":
    main()

Final Weights: [0.34811659 0.64614558]
Cost History (First 10 iterations): [np.float64(2013.165570783755), np.float64(1640.286832599692), np.float64(1337.0619994901585), np.float64(1090.4794892850578), np.float64(889.9583270083234), np.float64(726.8940993009545), np.float64(594.2897260808594), np.float64(486.4552052951635), np.float64(398.7634463599484), np.float64(327.4517147324688)]
RMSE on Test Set: 5.2798239764188635
R-Squared on Test Set: 0.8886354462786421
