In [11]:
import pandas as pd
import numpy as np

def train_test_split(X, y, test_size=0.3, random_state=42):
    np.random.seed(random_state)
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)

    test_split_size = int(len(X) * test_size)
    test_indices = indices[:test_split_size]
    train_indices = indices[test_split_size:]

    X_train, X_test = X[train_indices], X[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]

    return X_train, X_test, y_train, y_test

In [10]:
def cost_function(X, Y, W):
    m = len(Y)
    y_pred = np.dot(X, W)
    cost = (1 / (2 * m)) * np.sum((y_pred - Y) ** 2)
    return cost

In [12]:
def gradient_descent(X, Y, W, alpha, iterations):
    m = len(Y)
    cost_history = []

    for _ in range(iterations):
        Y_pred = np.dot(X, W)
        loss = Y_pred - Y
        dw = (1 / m) * np.dot(X.T, loss)
        W = W - alpha * dw
        cost = cost_function(X, Y, W)
        cost_history.append(cost)

    return W, cost_history


In [13]:
def rmse(Y, Y_pred):
    return np.sqrt(np.mean((Y - Y_pred) ** 2))

In [14]:
def r2(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)
    ss_res = np.sum((Y - Y_pred) ** 2)
    return 1 - (ss_res / ss_tot)

In [15]:
def rmse(Y, Y_pred):
    return np.sqrt(np.mean((Y - Y_pred) ** 2))


def r2(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)
    ss_res = np.sum((Y - Y_pred) ** 2)
    return 1 - (ss_res / ss_tot)


def main():
    # Step 1: Load dataset
    data = pd.read_csv('student.csv')

    # Step 2: Features and Target
    X = data[['Math', 'Reading']].values
    Y = data['Writing'].values

    # Step 3: Train-test split
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.2, random_state=42
    )

    # Step 4: Initialize parameters
    W = np.zeros(X_train.shape[1])
    alpha = 0.0001
    iterations = 1000

    # Step 5: Gradient Descent
    W_optimal, cost_history = gradient_descent(
        X_train, Y_train, W, alpha, iterations
    )

    # Step 6: Predictions
    Y_pred = np.dot(X_test, W_optimal)

    # Step 7: Evaluation
    model_rmse = rmse(Y_test, Y_pred)
    model_r2 = r2(Y_test, Y_pred)

    # Step 8: Results
    print("Final Weights:", W_optimal)
    print("Cost History (First 10):", cost_history[:10])
    print("RMSE on Test Set:", model_rmse)
    print("R-Squared on Test Set:", model_r2)


if __name__ == "__main__":
    main()

Final Weights: [0.0894932  0.89504864]
Cost History (First 10): [np.float64(17.813797177522098), np.float64(16.983149024878305), np.float64(16.925140245010397), np.float64(16.867870818076216), np.float64(16.811093513105355), np.float64(16.754804026075387), np.float64(16.69899816573971), np.float64(16.64367177688582), np.float64(16.588820740001896), np.float64(16.53444097097003)]
RMSE on Test Set: 4.792607360540954
R-Squared on Test Set: 0.908240340333986
