<a href="https://colab.research.google.com/github/priyansuapk/ml-lab-exam/blob/main/linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# Load dataset (Boston Housing dataset)
data = fetch_california_housing()
X = data.data
y = data.target

# Add a column of ones to X for the intercept term (bias)
X = np.c_[np.ones(X.shape[0]), X]

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression from scratch (Normal Equation)
class LinearRegressionScratch:
    def __init__(self):
        self.coef_ = None

    def fit(self, X, y):
        # Normal Equation to find the optimal coefficients
        self.coef_ = np.linalg.inv(X.T @ X) @ X.T @ y

    def predict(self, X):
        return X @ self.coef_

# Train the model
model = LinearRegressionScratch()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluation Metrics

# Mean Absolute Error (MAE)
MAE = np.mean(np.abs(y_test - y_pred))

# Mean Squared Error (MSE)
MSE = np.mean((y_test - y_pred) ** 2)

# R-squared (R²)
ss_total = np.sum((y_test - np.mean(y_test)) ** 2)
ss_residual = np.sum((y_test - y_pred) ** 2)
R2 = 1 - (ss_residual / ss_total)

# Adjusted R-squared
n = X_test.shape[0]  # number of test samples
p = X_test.shape[1] - 1  # number of features (excluding intercept)
adj_R2 = 1 - (1 - R2) * (n - 1) / (n - p - 1)

# Print the results
print(f"Mean Absolute Error (MAE): {MAE}")
print(f"Mean Squared Error (MSE): {MSE}")
print(f"R-squared (R²): {R2}")
print(f"Adjusted R-squared: {adj_R2}")


Mean Absolute Error (MAE): 0.5332001304929304
Mean Squared Error (MSE): 0.5558915986959396
R-squared (R²): 0.5757877060319202
Adjusted R-squared: 0.5749637928608242
