In [1]:
import pandas as pd
import numpy as np
data = pd.read_csv("student.csv")

In [3]:
print("Top 5 rows of dataset:")
print(data.head())
print("\nBottom 5 rows of dataset:")
print(data.tail())

Top 5 rows of dataset:
   Math  Reading  Writing
0    48       68       63
1    62       81       72
2    79       80       78
3    76       83       79
4    59       64       62

Bottom 5 rows of dataset:
     Math  Reading  Writing
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72


In [5]:
print("Dataset Information:")
data.info()

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Math     1000 non-null   int64
 1   Reading  1000 non-null   int64
 2   Writing  1000 non-null   int64
dtypes: int64(3)
memory usage: 23.6 KB


In [8]:
print("Descriptive Statistics:")
print(data.describe())

Descriptive Statistics:
              Math      Reading      Writing
count  1000.000000  1000.000000  1000.000000
mean     67.290000    69.872000    68.616000
std      15.085008    14.657027    15.241287
min      13.000000    19.000000    14.000000
25%      58.000000    60.750000    58.000000
50%      68.000000    70.000000    69.500000
75%      78.000000    81.000000    79.000000
max     100.000000   100.000000   100.000000


In [15]:
# Split data into Features (X) and Label (Y)
X = data[['Math', 'Reading']].values   # Features: Math and Reading scores
Y = data['Writing'].values             # Label: Writing score

# Display first few rows of X and Y
print("\nFeature Matrix (X):")
print(X[:5])

print("\nLabel Vector (Y):")
print(Y[:5])



Feature Matrix (X):
[[48 68]
 [62 81]
 [79 80]
 [76 83]
 [59 64]]

Label Vector (Y):
[63 72 78 79 62]


In [18]:
import numpy as np
import pandas as pd

data = pd.read_csv("student.csv")

# Extract features (Math, Reading) and label (Writing)
features = data[['Math', 'Reading']].to_numpy()
labels = data['Writing'].to_numpy()

# Create matrices in required form
X = features.T
Y = labels
W = np.zeros(X.shape[0])

print("Feature Matrix X (d x n):")
print(X[:, :5])

print("\nWeight Vector W (d):")
print(W)

print("\nLabel Vector Y (n):")
print(Y[:5])

# Prediction rule (no bias term)
Y_pred = W @ X
print("\nPredicted Y (first 5):")
print(Y_pred[:5])


Feature Matrix X (d x n):
[[48 62 79 76 59]
 [68 81 80 83 64]]

Weight Vector W (d):
[0. 0.]

Label Vector Y (n):
[63 72 78 79 62]

Predicted Y (first 5):
[0. 0. 0. 0. 0.]


In [20]:
X = data[['Math', 'Reading']].values
Y = data['Writing'].values

# Shuffle indices for randomness
n = len(X)
indices = np.arange(n)
np.random.seed(42)          # reproducibility
np.random.shuffle(indices)

# 80-20 Split
train_size_80 = int(0.8 * n)
train_idx_80, test_idx_20 = indices[:train_size_80], indices[train_size_80:]

X_train_80, X_test_20 = X[train_idx_80], X[test_idx_20]
Y_train_80, Y_test_20 = Y[train_idx_80], Y[test_idx_20]

print("80-20 Split:")
print("Training set size:", X_train_80.shape[0])
print("Test set size:", X_test_20.shape[0])

# 70-30 Split
train_size_70 = int(0.7 * n)
train_idx_70, test_idx_30 = indices[:train_size_70], indices[train_size_70:]

X_train_70, X_test_30 = X[train_idx_70], X[test_idx_30]
Y_train_70, Y_test_30 = Y[train_idx_70], Y[test_idx_30]

print("\n70-30 Split:")
print("Training set size:", X_train_70.shape[0])
print("Test set size:", X_test_30.shape[0])


80-20 Split:
Training set size: 800
Test set size: 200

70-30 Split:
Training set size: 700
Test set size: 300


In [22]:
import numpy as np

# Define the cost function
def cost_function(X, Y, W):
    """
    Parameters:
    X: Feature Matrix (d x n)
    Y: Target Vector (n,)
    W: Weight Vector (d,)

    Returns:
    cost: Mean Squared Error (MSE) with 1/(2n) scaling
    """
    n = X.shape[1]                     # number of samples
    Y_pred = W @ X                     # hypothesis hθ(X) = W^T X
    errors = Y_pred - Y                # difference between prediction and actual
    cost = (1 / (2 * n)) * np.sum(errors ** 2)
    return cost
# Given matrices
X = np.array([[1, 3, 5],
              [2, 4, 6]])
Y = np.array([3, 7, 11])
W = np.array([1, 1])

# Compute cost
cost_value = cost_function(X, Y, W)
print("Computed Cost:", cost_value)


Computed Cost: 0.0


In [23]:
import numpy as np

def cost_function(X, Y, W):
    n = X.shape[0]
    Y_pred = X @ W
    errors = Y_pred - Y
    cost = (1 / (2 * n)) * np.sum(errors ** 2)
    return cost

# Test case
X_test = np.array([[1, 2],
                   [3, 4],
                   [5, 6]])
Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])
cost = cost_function(X_test, Y_test, W_test)

if cost == 0:
    print("Proceed Further")
else:
    print("Something went wrong: Reimplement the cost function")

print("Cost function output:", cost)


Proceed Further
Cost function output: 0.0


In [None]:
def gradient_descent(X, Y, alpha=0.01, epochs=1000):
    """
    Perform gradient descent to learn weights W.
    Assumes no bias term (b = 0).

    Parameters:
    X: Feature matrix (m x d)
    Y: Target vector (m,)
    alpha: Learning rate
    epochs: Number of iterations

    Returns:
    W: Learned weight vector (d,)
    """
    m, d = X.shape
    W = np.zeros(d)   # initialize weights

    for _ in range(epochs):
        Y_pred = X @ W                 # predictions
        errors = Y_pred - Y            # loss
        gradient = (1/m) * (X.T @ errors)  # derivative wrt W
        W -= alpha * gradient          # update rule

    return W

# Example run with the test dataset
W_learned = gradient_descent(X_test, Y_test, alpha=0.01, epochs=1000)
print("Learned Weights:", W_learned)


In [24]:
import numpy as np

def gradient_descent(X, Y, alpha=0.01, epochs=1000):
    m, d = X.shape          # m = samples, d = features
    W = np.zeros(d)         # start with weights = 0

    for _ in range(epochs): # repeat many times
        Y_pred = X @ W      # predict values
        loss = Y_pred - Y   # error
        gradient = (1/m) * (X.T @ loss)  # slope
        W = W - alpha * gradient         # update weights

    return W

X_test = np.array([[1, 2],
                   [3, 4],
                   [5, 6]])   # features
Y_test = np.array([3, 7, 11]) # targets

W_learned = gradient_descent(X_test, Y_test, alpha=0.01, epochs=1000)
print("Learned Weights:", W_learned)


Learned Weights: [0.9463076  1.04238709]


In [28]:
import numpy as np

# Cost function (from To-Do-4)
def cost_function(X, Y, W):
    m = len(Y)
    Y_pred = X @ W
    errors = Y_pred - Y
    cost = (1 / (2 * m)) * np.sum(errors ** 2)
    return cost

# Gradient Descent Implementation
def gradient_descent(X, Y, W, alpha, iterations):
    """
    Perform gradient descent to optimize the parameters of a linear regression model.

    Parameters:
    X (numpy.ndarray): Feature matrix (m x n)
    Y (numpy.ndarray): Target vector (m,)
    W (numpy.ndarray): Initial guess for parameters (n,)
    alpha (float): Learning rate
    iterations (int): Number of iterations

    Returns:
    W_update (numpy.ndarray): Updated parameters (n,)
    cost_history (list): History of cost values over iterations
    """
    cost_history = [0] * iterations
    m = len(Y)

    for iteration in range(iterations):
        # Step 1: Hypothesis Values
        Y_pred = X @ W

        # Step 2: Difference between Hypothesis and Actual Y
        loss = Y_pred - Y

        # Step 3: Gradient Calculation
        dw = (1 / m) * (X.T @ loss)

        # Step 4: Updating Values of W using Gradient
        W = W - alpha * dw
        W_update = W

        # Step 5: New Cost Value
        cost = cost_function(X, Y, W_update)
        cost_history[iteration] = cost

    return W_update, cost_history


In [30]:
import numpy as np

# Cost function
def cost_function(X, Y, W):
    m = len(Y)
    Y_pred = X @ W
    errors = Y_pred - Y
    cost = (1 / (2 * m)) * np.sum(errors ** 2)
    return cost

# Gradient Descent
def gradient_descent(X, Y, W, alpha, iterations):
    cost_history = [0] * iterations
    m = len(Y)

    for iteration in range(iterations):
        # Step 1: Hypothesis
        Y_pred = X @ W

        # Step 2: Loss
        loss = Y_pred - Y

        # Step 3: Gradient
        dw = (1 / m) * (X.T @ loss)

        # Step 4: Update weights
        W = W - alpha * dw
        W_update = W

        # Step 5: Cost
        cost = cost_function(X, Y, W_update)
        cost_history[iteration] = cost

    return W_update, cost_history

# Test Case
np.random.seed(0)  # reproducibility
X = np.random.rand(100, 3)   # 100 samples, 3 features
Y = np.random.rand(100)      # targets
W = np.random.rand(3)        # initial weights

alpha = 0.01
iterations = 1000

final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)

print("Final Parameters:", final_params)
print("Final Cost:", cost_history[-1])


Final Parameters: [0.20551667 0.54295081 0.10388027]
Final Cost: 0.05435492255484332


In [31]:
import numpy as np

def rmse(Y, Y_pred):
    """
    Root Mean Square Error (RMSE)

    Parameters:
    Y (numpy.ndarray): Actual target values
    Y_pred (numpy.ndarray): Predicted values

    Returns:
    float: RMSE value
    """
    n = len(Y)
    return np.sqrt(np.sum((Y - Y_pred) ** 2) / n)

np.random.seed(0)
X = np.random.rand(100, 3)   # 100 samples, 3 features
Y = np.random.rand(100)      # targets
W = np.random.rand(3)        # initial weights

alpha = 0.01
iterations = 1000
final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)

# Predictions with learned weights
Y_pred = X @ final_params

# Evaluate RMSE
rmse_value = rmse(Y, Y_pred)
print("RMSE:", rmse_value)


RMSE: 0.32971176064812524


In [34]:
def rmse(Y, Y_pred):
    """
    Calculates how far off the predictions are from the actual values on average.

    Parameters:
    Y (array): Actual target values (what really happened)
    Y_pred (array): Predicted values from the model

    Returns:
    float: RMSE value — lower means better predictions
    """
    n = len(Y)
    error = Y - Y_pred
    rmse = np.sqrt(np.sum(error**2) / n)
    return rmse
# test
Y = np.array([3, 7, 11])
Y_pred = np.array([2.8, 7.2, 10.9])
print("RMSE:", rmse(Y, Y_pred))

RMSE: 0.17320508075688779


In [37]:
import numpy as np

def r2_score(Y, Y_pred):
    """
    Calculate R-squared (coefficient of determination) to check how well
    the regression model explains the variation in the actual data.

    R² tells us the proportion of the variance in the target values (Y)
    that can be explained by the predictions (Y_pred).
    - R² = 1 means perfect fit (predictions match actual values exactly).
    - R² = 0 means the model does not explain any variation.
    - Negative R² means the model performs worse than just predicting the mean.

    Parameters:
    Y (array): Actual target values (what really happened)
    Y_pred (array): Predicted values from the model

    Returns:
    float: R² value (closer to 1 means better fit)
    """
    # Total variation in actual values (how far each value is from the mean)
    sst = np.sum((Y - np.mean(Y))**2)

    # Residual variation (how far each value is from its prediction)
    ssr = np.sum((Y - Y_pred)**2)

    # R² formula: 1 - (unexplained variation / total variation)
    r2 = 1 - (ssr / sst)
    return r2


In [39]:
import numpy as np

# Model Evaluation - R²
def r2(Y, Y_pred):
    """
    Calculates how well the model's predictions match the actual values.

    This function returns the R-squared score, which tells us how much of the variation
    in the actual data is explained by the model. A score close to 1 means a good fit.

    Parameters:
    Y (array): Actual target values
    Y_pred (array): Predicted values from the model

    Returns:
    float: R² score (closer to 1 means better fit)
    """
    mean_y = np.mean(Y)                      # average of actual values

    # Total variation in actual values (from the mean)
    ss_tot = np.sum((Y - mean_y) ** 2)

    # Unexplained variation (from predictions)
    ss_res = np.sum((Y - Y_pred) ** 2)

    # R² formula
    r2 = 1 - (ss_res / ss_tot)
    return r2

# test
Y = np.array([3, 7, 11])
Y_pred = np.array([2.8, 7.2, 10.9])
print("R² Score:", r2(Y, Y_pred))

R² Score: 0.9971875


In [41]:
import numpy as np

# Cost function
def cost_function(X, Y, W):
    m = len(Y)
    Y_pred = X @ W
    errors = Y_pred - Y
    return (1 / (2 * m)) * np.sum(errors ** 2)

# Gradient Descent
def gradient_descent(X, Y, W, alpha, iterations):
    cost_history = []
    m = len(Y)

    for _ in range(iterations):
        Y_pred = X @ W              # predictions
        loss = Y_pred - Y           # error
        dw = (1 / m) * (X.T @ loss) # slope
        W = W - alpha * dw          # update weights
        cost_history.append(cost_function(X, Y, W))
    return W, cost_history

# RMSE
def rmse(Y, Y_pred):
    n = len(Y)
    return np.sqrt(np.sum((Y - Y_pred) ** 2) / n)

# R²
def r2(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)   # total variation
    ss_res = np.sum((Y - Y_pred) ** 2)   # error variation
    return 1 - (ss_res / ss_tot)

# Main Function
def run_linear_regression():
    np.random.seed(0)
    X = np.random.rand(100, 3)   # 100 samples, 3 features
    Y = np.random.rand(100)      # actual values
    W_init = np.random.rand(3)   # starting weights

    # Set learning rate and iterations
    alpha = 0.01
    iterations = 1000

    # Train model
    final_W, cost_history = gradient_descent(X, Y, W_init, alpha, iterations)

    # Predictions
    Y_pred = X @ final_W

    # Evaluate model
    print("Final Weights:", final_W)
    print("Final Cost:", cost_history[-1])
    print("RMSE:", rmse(Y, Y_pred))
    print("R² Score:", r2(Y, Y_pred))

run_linear_regression()


Final Weights: [0.20551667 0.54295081 0.10388027]
Final Cost: 0.05435492255484332
RMSE: 0.32971176064812524
R² Score: -0.34175367492079367


In [43]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Gradient Descent function
def gradient_descent(X, Y, W, alpha, iterations):
    cost_history = []
    m = len(Y)

    for _ in range(iterations):
        Y_pred = X @ W              # predictions
        loss = Y_pred - Y           # error
        dw = (1 / m) * (X.T @ loss) # slope
        W = W - alpha * dw          # update weights
        cost = (1 / (2 * m)) * np.sum(loss ** 2)  # cost function
        cost_history.append(cost)
    return W, cost_history

# RMSE function
def rmse(Y, Y_pred):
    n = len(Y)
    return np.sqrt(np.sum((Y - Y_pred) ** 2) / n)

# R² function
def r2(Y, Y_pred):
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)   # total variation
    ss_res = np.sum((Y - Y_pred) ** 2)   # error variation
    return 1 - (ss_res / ss_tot)

# Main Function
def main():
    data = pd.read_csv("student.csv")
    # Prepare features (X) and target (Y)
    X = data[["Math", "Reading"]].values   # inputs: Math & Reading marks
    Y = data["Writing"].values

    # Split into training (80%) and test (20%)
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=0.2, random_state=42
    )

    # Initialize weights and hyperparameters
    W = np.zeros(X_train.shape[1])   # start with zeros
    alpha = 0.00001                  # learning rate
    iterations = 1000                # number of steps

    # Train model using Gradient Descent
    W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)

    # Make predictions on test set
    Y_pred = np.dot(X_test, W_optimal)

    # Evaluate model
    print("Final Weights:", W_optimal)
    print("Cost History (First 10):", cost_history[:10])
    print("RMSE on Test Set:", rmse(Y_test, Y_pred))
    print("R² on Test Set:", r2(Y_test, Y_pred))

if __name__ == "__main__":
    main()


Final Weights: [0.34811659 0.64614558]
Cost History (First 10): [np.float64(2471.69875), np.float64(2013.165570783755), np.float64(1640.286832599692), np.float64(1337.0619994901588), np.float64(1090.479489285058), np.float64(889.9583270083235), np.float64(726.8940993009545), np.float64(594.2897260808594), np.float64(486.4552052951634), np.float64(398.7634463599482)]
RMSE on Test Set: 5.2798239764188635
R² on Test Set: 0.8886354462786421
