In [None]:
import pandas as pd
import numpy as np

In [None]:
# 1. Read the dataset
df = pd.read_csv('student.csv')
print("Dataset loaded successfully")

Dataset loaded successfully


In [None]:
# 2. Print top 5 and bottom 5 rows
print("\nTop 5 rows:")
print(df.head())

print("\nBottom 5 rows:")
print(df.tail())



Top 5 rows:
   Math  Reading  Writing
0    48       68       63
1    62       81       72
2    79       80       78
3    76       83       79
4    59       64       62

Bottom 5 rows:
     Math  Reading  Writing
995    72       74       70
996    73       86       90
997    89       87       94
998    83       82       78
999    66       66       72


In [None]:
# 3. Print dataset information
print("\nDataset Information:")
df.info()


Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   Math     1000 non-null   int64
 1   Reading  1000 non-null   int64
 2   Writing  1000 non-null   int64
dtypes: int64(3)
memory usage: 23.6 KB


In [None]:
# 4. Descriptive statistics
print("\nDescriptive Statistics:")
print(df.describe())


Descriptive Statistics:
              Math      Reading      Writing
count  1000.000000  1000.000000  1000.000000
mean     67.290000    69.872000    68.616000
std      15.085008    14.657027    15.241287
min      13.000000    19.000000    14.000000
25%      58.000000    60.750000    58.000000
50%      68.000000    70.000000    69.500000
75%      78.000000    81.000000    79.000000
max     100.000000   100.000000   100.000000


In [None]:
# 5. Split data into Feature (X) and Label (Y)
# First check the actual column names
print("\nColumn names in dataset:")
print(df.columns.tolist())



Column names in dataset:
['Math', 'Reading', 'Writing']


In [None]:
# Use the correct column names from the dataset
X = df.iloc[:, [0, 1]]  # First two columns (math and reading)
y = df.iloc[:, 2]  # Third column (writing)

print("\nFeature matrix (X) shape:", X.shape)
print("Label vector (y) shape:", y.shape)

print("\nFeatures (X):")
print(X.head())

print("\nLabel (y):")
print(y.head())


Feature matrix (X) shape: (1000, 2)
Label vector (y) shape: (1000,)

Features (X):
   Math  Reading
0    48       68
1    62       81
2    79       80
3    76       83
4    59       64

Label (y):
0    63
1    72
2    78
3    79
4    62
Name: Writing, dtype: int64


In [None]:
# To-Do 2: Create matrices for linear regression (Y = W^T X)

# Assumption: No bias or intercept

print("Assumption: No bias or intercept term in the model")
print("Model equation: Y = W^T X")

# Create the matrices according to Y = W^T X

# X matrix: shape (d x n) where d = number of features, n = number of samples
X_matrix = X.T.values  # Transpose to get (d x n)

# Y matrix: shape (n x 1) where n = number of samples
Y_matrix = y.values.reshape(-1, 1)  # Reshape to column vector

# W matrix: shape (d x 1) where d = number of features
d = X_matrix.shape[0]  # number of features
W_matrix = np.zeros((d, 1))  # Weight vector initialized with zeros

print(f"\nMatrix dimensions:")
print(f"X matrix: {X_matrix.shape} (d x n)")
print(f"W matrix: {W_matrix.shape} (d x 1)")
print(f"Y matrix: {Y_matrix.shape} (n x 1)")

print(f"\nwhere d = {d} features, n = {X_matrix.shape[1]} samples")

print("\nCreated matrices:")

print("\nW matrix (weight vector):")
print("W =")
print(W_matrix)
print(f"Shape: {W_matrix.shape}, W ∈ R^d where d = {d}")

print("\nX matrix (feature matrix):")
print("X =")
print(X_matrix[:, :5])  # Show first 5 samples
print(f"Shape: {X_matrix.shape}, X ∈ R^(d×n) where d = {d}, n = {X_matrix.shape[1]}")

print("\nY matrix (target vector):")
print("Y =")
print(Y_matrix[:5])  # Show first 5 values
print(f"Shape: {Y_matrix.shape}, Y ∈ R^n where n = {Y_matrix.shape[0]}")

print("\nNote: The feature matrix X does not include a column of 1s,")
print("as it assumes the absence of a bias term in the model.")

Assumption: No bias or intercept term in the model
Model equation: Y = W^T X

Matrix dimensions:
X matrix: (2, 1000) (d x n)
W matrix: (2, 1) (d x 1)
Y matrix: (1000, 1) (n x 1)

where d = 2 features, n = 1000 samples

Created matrices:

W matrix (weight vector):
W =
[[0.]
 [0.]]
Shape: (2, 1), W ∈ R^d where d = 2

X matrix (feature matrix):
X =
[[48 62 79 76 59]
 [68 81 80 83 64]]
Shape: (2, 1000), X ∈ R^(d×n) where d = 2, n = 1000

Y matrix (target vector):
Y =
[[63]
 [72]
 [78]
 [79]
 [62]]
Shape: (1000, 1), Y ∈ R^n where n = 1000

Note: The feature matrix X does not include a column of 1s,
as it assumes the absence of a bias term in the model.


In [None]:
# To-Do 3: Split dataset into training and test sets

from sklearn.model_selection import train_test_split

# Split using 80-20 ratio (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Dataset split completed (80-20 split)")
print(f"\nTraining set:")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

print(f"\nTest set:")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

print(f"\nTotal samples: {len(X)}")
print(f"Training samples: {len(X_train)} ({len(X_train)/len(X)*100:.1f}%)")
print(f"Test samples: {len(X_test)} ({len(X_test)/len(X)*100:.1f}%)")

Dataset split completed (80-20 split)

Training set:
X_train shape: (800, 2)
y_train shape: (800,)

Test set:
X_test shape: (200, 2)
y_test shape: (200,)

Total samples: 1000
Training samples: 800 (80.0%)
Test samples: 200 (20.0%)


In [None]:
# To-Do 4: Building a Cost Function

import numpy as np

def cost_function(X, Y, W):
    """
    This function finds the Mean Square Error.

    Parameters:
    X: Feature Matrix (d x n) or (n x d)
    Y: Target Matrix (n,) or (n x 1)
    W: Weight Matrix (d,) or (d x 1)

    Output:
    cost: accumulated mean square error
    """
    # Ensure proper shapes
    if X.ndim == 2 and X.shape[0] < X.shape[1]:
        # X is (d x n), transpose it to (n x d)
        X = X.T

    if W.ndim == 1:
        W = W.reshape(-1, 1)

    if Y.ndim == 1:
        Y = Y.reshape(-1, 1)

    # Number of samples
    n = X.shape[0]

    # Predictions: Y_pred = X * W
    Y_pred = np.dot(X, W)

    # Mean Square Error: MSE = (1/n) * sum((Y - Y_pred)^2)
    cost = (1 / n) * np.sum((Y - Y_pred) ** 2)

    return cost

print("Cost function defined successfully")
print("\nFunction: cost_function(X, Y, W)")
print("Calculates Mean Square Error (MSE)")

Cost function defined successfully

Function: cost_function(X, Y, W)
Calculates Mean Square Error (MSE)


In [None]:
# To-Do 5: Testing the Cost Function

import numpy as np

# Test case
X_test = np.array([[1, 2], [3, 4], [5, 6]])
Y_test = np.array([3, 7, 11])
W_test = np.array([1, 1])

cost = cost_function(X_test, Y_test, W_test)

if cost == 0:
    print("Proceed Further")
else:
    print("something went wrong: Reimplement a cost function")

print("Cost function output:", cost_function(X_test, Y_test, W_test))

Proceed Further
Cost function output: 0.0


In [None]:
# To-Do 6: Implement Gradient Descent

import numpy as np

def gradient_descent(X, Y, W, alpha, iterations):
    """
    Perform gradient descent to optimize the parameters of a linear regression model.

    Parameters:
    X (numpy.ndarray): Feature matrix (m x n).
    Y (numpy.ndarray): Target vector (m x 1).
    W (numpy.ndarray): Initial guess for parameters (n x 1).
    alpha (float): Learning rate.
    iterations (int): Number of iterations for gradient descent.

    Returns:
    tuple: A tuple containing the final optimized parameters (W_update) and the history of cost values.
    W_update (numpy.ndarray): Updated parameters (n x 1).
    cost_history (list): History of cost values over iterations.
    """
    # Initialize cost history
    cost_history = [0] * iterations

    # Number of samples
    m = len(Y)

    for iteration in range(iterations):
        # Step 1: Hypothesis Values
        Y_pred = np.dot(X, W)

        # Step 2: Difference between Hypothesis and Actual Y
        loss = Y_pred - Y

        # Step 3: Gradient Calculation
        dw = (1 / m) * np.dot(X.T, loss)

        # Step 4: Updating Values of W using Gradient
        W_update = W - alpha * dw

        # Step 5: New Cost Value
        cost = cost_function(X, Y, W_update)
        cost_history[iteration] = cost

        # Update W for next iteration
        W = W_update

    return W_update, cost_history

print("Gradient descent function defined successfully")

Gradient descent function defined successfully


In [None]:
# To-Do 7: Test Gradient Descent function

import numpy as np

# Generate random test data
np.random.seed(0)  # For reproducibility
X = np.random.rand(100, 3)  # 100 samples, 3 features
Y = np.random.rand(100)
W = np.random.rand(3)  # Initial guess for parameters

# Set hyperparameters
alpha = 0.01
iterations = 1000

# Test the gradient_descent function
final_params, cost_history = gradient_descent(X, Y, W, alpha, iterations)

# Print the final parameters and cost history
print("Final Parameters:", final_params)
print("Cost History:", cost_history)

Final Parameters: [0.20551667 0.54295081 0.10388027]
Cost History: [np.float64(0.21422394189320307), np.float64(0.21269761199879803), np.float64(0.21119652631361235), np.float64(0.20972025896641117), np.float64(0.2082683912857068), np.float64(0.2068405116780125), np.float64(0.2054362155081552), np.float64(0.2040551049816124), np.float64(0.20269678902883861), np.float64(0.2013608831915474), np.float64(0.2000470095109174), np.float64(0.19875479641768753), np.float64(0.19748387862411218), np.float64(0.19623389701774197), np.float64(0.1950044985570019), np.float64(0.1937953361685344), np.float64(0.19260606864627902), np.float64(0.19143636055225827), np.float64(0.1902858821190413), np.float64(0.18915430915385684), np.float64(0.18804132294432793), np.float64(0.18694661016580033), np.float64(0.18586986279023826), np.float64(0.18481077799666035), np.float64(0.18376905808309085), np.float64(0.182744410379999), np.float64(0.18173654716520246), np.float64(0.18074518558021005), np.float64(0.179770

In [None]:
# To-Do 8: Implementation of RMSE

import numpy as np

def rmse(Y, Y_pred):
    """
    This Function calculates the Root Mean Squares.

    Input Arguments:
    Y: Array of actual(Target) Dependent Variables.
    Y_pred: Array of predicted Dependent Variables.

    Output Arguments:
    rmse: Root Mean Square Error.
    """
    rmse = np.sqrt(np.mean((Y - Y_pred) ** 2))
    return rmse

print("RMSE function defined successfully")
print("\nFunction: rmse(Y, Y_pred)")
print("Calculates Root Mean Square Error")

RMSE function defined successfully

Function: rmse(Y, Y_pred)
Calculates Root Mean Square Error


In [None]:
# To-Do 9: Implementation of R-Squared Error

import numpy as np

def r2(Y, Y_pred):
    """
    This Function calculates the R Squared Error.

    Input Arguments:
    Y: Array of actual(Target) Dependent Variables.
    Y_pred: Array of predicted Dependent Variables.

    Output Arguments:
    r2: R Squared Error.
    """
    mean_y = np.mean(Y)
    ss_tot = np.sum((Y - mean_y) ** 2)
    ss_res = np.sum((Y - Y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

print("R-Squared function defined successfully")
print("\nFunction: r2(Y, Y_pred)")
print("Calculates R-Squared (coefficient of determination)")

R-Squared function defined successfully

Function: r2(Y, Y_pred)
Calculates R-Squared (coefficient of determination)


In [None]:
# To-Do 10: Compiling everything together

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def main():
    # Step 1: Load the dataset
    data = pd.read_csv('student.csv')

    # Step 2: Split the data into features (X) and target (Y)
    X = data.iloc[:, [0, 1]].values  # Features: Math and Reading marks
    Y = data.iloc[:, 2].values  # Target: Writing marks

    # Step 3: Split the data into training and test sets (80% train, 20% test)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    # Step 4: Initialize weights (W) to zeros, learning rate and number of iterations
    W = np.zeros(X_train.shape[1])  # Initialize weights
    alpha = 0.00001  # Learning rate
    iterations = 1000  # Number of iterations for gradient descent

    # Step 5: Perform Gradient Descent
    W_optimal, cost_history = gradient_descent(X_train, Y_train, W, alpha, iterations)

    # Step 6: Make predictions on the test set
    Y_pred = np.dot(X_test, W_optimal)

    # Step 7: Evaluate the model using RMSE and R-Squared
    model_rmse = rmse(Y_test, Y_pred)
    model_r2 = r2(Y_test, Y_pred)

    # Step 8: Output the results
    print("Final Weights:", W_optimal)
    print("Cost History (First 10 iterations):", cost_history[:10])
    print("RMSE on Test Set:", model_rmse)
    print("R-Squared on Test Set:", model_r2)

# Execute the main function
if __name__ == "__main__":
    main()

Final Weights: [0.34811659 0.64614558]
Cost History (First 10 iterations): [np.float64(4026.33114156751), np.float64(3280.573665199384), np.float64(2674.1239989803175), np.float64(2180.9589785701155), np.float64(1779.9166540166468), np.float64(1453.788198601909), np.float64(1188.5794521617188), np.float64(972.910410590327), np.float64(797.5268927198968), np.float64(654.9034294649376)]
RMSE on Test Set: 5.2798239764188635
R-Squared on Test Set: 0.8886354462786421
