In [None]:
import numpy as np
import pandas as pd


In [6]:

def cost_function(X, Y, W):
    m = X.shape[0]
    y_pred = np.dot(X, W)
    cost = (1 / (2 * m)) * np.sum((y_pred - Y) ** 2)
    return cost


In [16]:
def gradient_descent(X, Y, W, alpha, iterations):
    """
    Perform gradient descent to optimize the parameters of a linear regression model.
    Parameters:
    X (numpy.ndarray): Feature matrix (m x n).
    Y (numpy.ndarray): Target vector (m x 1).
    W (numpy.ndarray): Initial guess for parameters (n x 1).
    alpha (float): Learning rate.
    iterations (int): Number of iterations for gradient descent.
    Returns:
    tuple: A tuple containing the final optimized parameters (W_update) and the history of cost values
    .
    W_update (numpy.ndarray): Updated parameters (n x 1).
    cost_history (list): History of cost values over iterations.
    """
    # Initialize cost history
    cost_history = [0] * iterations
    # Number of samples
    m = len(Y)
    for iteration in range(iterations):
        # Step 1: Hypothesis Values
        Y_pred = np.dot(X,W)
        # Step 2: Difference between Hypothesis and Actual Y
        loss = Y_pred - Y
        # Step 3: Gradient Calculation
        dw = (1 / m) * np.dot(X.T, loss)
        # Step 4: Updating Values of W using Gradient
        W_update = W - alpha * dw
        # Step 5: New Cost Value
        cost = cost_function(X, Y, W_update)
        cost_history[iteration] = cost
    return W_update, cost_history

In [17]:
from google.colab import files
uploaded = files.upload()

Saving student.csv to student.csv


In [19]:
def rmse(Y, Y_pred):
    """
    This Function calculates the Root Mean Square Error
    """
    rmse = np.sqrt(np.mean((Y - Y_pred) ** 2))
    return rmse


In [20]:
def r2(Y, Y_pred):
    """
    This Function calculates the R Squared Error
    """
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)
    ss_res = np.sum((Y - Y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2


In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split

def main():
    # Step 1: Load the dataset
    data = pd.read_csv("student.csv")

    # Step 2: Features and Target
    X = data[['Math', 'Reading']].values
    Y = data['Writing'].values

    # Step 3: Train-Test Split
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.2, random_state=42
    )

    # Step 4: Initialize parameters
    W = np.zeros(X_train.shape[1])
    alpha = 0.00001
    iterations = 1000

    # Step 5: Train Model
    W_optimal, cost_history = gradient_descent(
        X_train, Y_train, W, alpha, iterations
    )

    # Step 6: Predictions
    Y_pred = np.dot(X_test, W_optimal)

    # Step 7: Evaluation
    model_rmse = rmse(Y_test, Y_pred)
    model_r2 = r2(Y_test, Y_pred)

    # Step 8: Results
    print("Final Weights:", W_optimal)
    print("Cost History (First 10):", cost_history[:10])
    print("RMSE on Test Set:", model_rmse)
    print("R2 Score on Test Set:", model_r2)

if __name__ == "__main__":
    main()


Final Weights: [0.04797833 0.05020199]
Cost History (First 10): [np.float64(2013.165570783755), np.float64(2013.165570783755), np.float64(2013.165570783755), np.float64(2013.165570783755), np.float64(2013.165570783755), np.float64(2013.165570783755), np.float64(2013.165570783755), np.float64(2013.165570783755), np.float64(2013.165570783755), np.float64(2013.165570783755)]
RMSE on Test Set: 63.36563528655014
R2 Score on Test Set: -15.04041794561271
