In [212]:
class LinearRegression():
    def __init__(self, iterations = 1000, learning_rate = 0.01):
        self.lr = learning_rate
        self.iterations = iterations
        self.w = 0.0 #Slope
        self.b = 0.0 #Intercept
        self.cost_history = []
    def fit(self, X_train_scaled, y_train):
        n = len(X_train_scaled)
        for iteration in range(self.iterations):
            total_cost = 0.0
            gradient_w = 0.0 
            gradient_b = 0.0
            for i in range(n):
                 # 1. Make prediction for this point
                 y_pred_i = self.w * X_train_scaled[i, 0] + self.b
                 # 2. Calculate error for this point
                 error_i = y_pred_i - y_train[i]
                 # 3. Accumulate gradients 
                 gradient_w += error_i * X_train_scaled[i, 0]
                 gradient_b += error_i
           
                 # 4. Accumulate cost (MSE)
                 total_cost += error_i **2
            # 5. Average gradients (divide by n)
            gradient_w = (2/n) * gradient_w
            gradient_b = (2/n) * gradient_b

            # 6. Update parameters (Gradient Descent)
            self.w = self.w - self.lr * gradient_w
            self.b = self.b - self.lr * gradient_b

            # 7. Calculate and store average cost
            avg_cost = total_cost /n 
            self.cost_history.append(avg_cost)

            # 8. Print progress
            if iteration % 100 == 0:
                print(f"Iteration {iteration}: Cost = {avg_cost:.2f}, w={self.w:.2f}, b={self.b:.2f}")

    def predict(self, X_train_scaled):
        predictions = []
        for i in range(len(X_train_scaled)):
            pred = self.w * X_train_scaled[i] + self.b
            predictions.append(pred)

        return np.array(predictions)
     

In [214]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

In [216]:
df = pd.read_csv("data/Salary_Data.csv.xls")

In [218]:
df.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343.0
1,1.3,46205.0
2,1.5,37731.0
3,2.0,43525.0
4,2.2,39891.0


In [220]:
#Choosing the independent and the dependent variable.
X = df.iloc[:,0].values
y = df.iloc[:,-1].values

In [222]:
print(X.shape)
print(y.shape)

(30,)
(30,)


In [224]:
#Reshaping the X values
X = X.reshape(-1,1)
print(X.shape)

(30, 1)


In [226]:
#Train and Test Split
X_train, X_test, y_train, y_test = train_test_split (
    X, y,
    test_size = 0.2,
    random_state = 2
)

In [228]:
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print()
print(f"y_train shape: {y_train.shape}")
print(f"y_test shape: {y_test.shape}")

X_train shape: (24, 1)
X_test shape: (6, 1)

y_train shape: (24,)
y_test shape: (6,)


In [230]:
#Scaling 
from sklearn.preprocessing import StandardScaler
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1,1)).flatten()

X_test_scaled = scaler.transform(X_test)
y_test_scaled = scaler.transform(y_test.reshape(-1,1)).flatten()

In [233]:
#Creat and train the model
print("Training custom Linear Regression model.. ")
model = LinearRegression(iterations = 1000, learning_rate = 0.01)
model.fit(X_train_scaled, y_train_scaled)

Training custom Linear Regression model.. 
Iteration 0: Cost = 1.00, w=0.02, b=0.00
Iteration 100: Cost = 0.05, w=0.85, b=0.00
Iteration 200: Cost = 0.03, w=0.97, b=-0.00
Iteration 300: Cost = 0.03, w=0.98, b=-0.00
Iteration 400: Cost = 0.03, w=0.98, b=-0.00
Iteration 500: Cost = 0.03, w=0.98, b=-0.00
Iteration 600: Cost = 0.03, w=0.98, b=-0.00
Iteration 700: Cost = 0.03, w=0.98, b=-0.00
Iteration 800: Cost = 0.03, w=0.98, b=-0.00
Iteration 900: Cost = 0.03, w=0.98, b=-0.00


In [237]:
#Make predictions on the test data
y_pred_scaled = model.predict(X_test_scaled)

#Unscale predictions back to the original units
y_pred_original = scaler_y.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()

In [253]:
#Evaluate 
from sklearn.metrics import mean_absolute_error, r2_score

mse = np.mean((y_test - y_pred_original)**2)
mae = mean_absolute_error(y_test, y_pred_original)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred_original)


print(f"MSE: {mse:,.2f}")
print(f"MAE: {mae:,.2f}")
print(f"RMSE:{rmse:,.2f}")
print(f"R²:  {r2:.2f}\n")
print(f"final parameters: w = {model.w:.3f}, b = {model.b:.3f}")


MSE: 56,137,509.81
MAE: 6,802.78
RMSE:7,492.50
R²:  0.89

final parameters: w = 0.983, b = -0.000
