In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("boston.csv")
print(df.head())

      CRIM    ZN  INDUS  CHAS    NOX     RM   AGE     DIS  RAD    TAX  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296.0   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242.0   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242.0   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222.0   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222.0   

   PTRATIO       B  LSTAT  MEDV  
0     15.3  396.90   4.98  24.0  
1     17.8  396.90   9.14  21.6  
2     17.8  392.83   4.03  34.7  
3     18.7  394.63   2.94  33.4  
4     18.7  396.90   5.33  36.2  


In [2]:
X = df.drop(columns=["MEDV"]).values
y = df["MEDV"].values.reshape(-1, 1) 
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
class LinearRegressionScratch:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def initialize_parameters(self, n_features):
        self.weights = np.zeros((n_features, 1))
        self.bias = 0

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

    def compute_loss(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2) 

    def train(self, X, y):
        m, n_features = X.shape
        self.initialize_parameters(n_features)

        for epoch in range(self.epochs):
            y_pred = self.predict(X)
            loss = self.compute_loss(y, y_pred)
            dw = (-2/m) * np.dot(X.T, (y - y_pred))
            db = (-2/m) * np.sum(y - y_pred)
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

    def evaluate(self, X, y):
        y_pred = self.predict(X)
        mse = self.compute_loss(y, y_pred)
        rmse = np.sqrt(mse)
        mae = np.mean(np.abs(y - y_pred))
        return {"MSE": mse, "RMSE": rmse, "MAE": mae}

    def predict_single(self, input_features):
        input_features = np.array(input_features).reshape(1, -1)
        input_features = scaler.transform(input_features)  
        return self.predict(input_features)[0, 0]

In [4]:
model = LinearRegressionScratch(learning_rate=0.01, epochs=1000)
model.train(X_train, y_train)
metrics = model.evaluate(X_test, y_test)
print("Model Performance:", metrics)

Epoch 0, Loss: 606.5554
Epoch 100, Loss: 32.3744
Epoch 200, Loss: 22.7718
Epoch 300, Loss: 22.2111
Epoch 400, Loss: 21.9988
Epoch 500, Loss: 21.8795
Epoch 600, Loss: 21.8072
Epoch 700, Loss: 21.7609
Epoch 800, Loss: 21.7298
Epoch 900, Loss: 21.7080
Model Performance: {'MSE': 24.710996767050904, 'RMSE': 4.971015667552347, 'MAE': 3.2094516410676195}


In [9]:
sample = X_test[23] 
prediction = model.predict_single(sample)
print(prediction)

26.80683883482739


In [8]:
def r2_score(y_actual, y_pred):
    ss_res = np.sum((y_actual - y_pred) ** 2)
    ss_tot = np.sum((y_actual - np.mean(y_actual)) ** 2) 
    return 1 - (ss_res / ss_tot)
y_pred_test = model.predict(X_test) 
r2 = r2_score(y_test, y_pred_test)
print(f"R² Score: {r2:.4f}")

R² Score: 0.6630
