In [72]:
from typing_extensions import runtime
import numpy as np

class LinearRegression:
  def __init__(self):
    self.slope= None
    self.intercept= None

  def fit(self, X, y):
    X= np.array(X)
    y= np.array(y)

    X_mean= np.mean(X)
    y_mean= np.mean(y)

    num= np.sum((X - X_mean) * (y - y_mean))
    deno= np.sum((X - X_mean)**2)

    if deno == 0:
      raise ValueError("All the values of X are likely to be the same.")

    self.slope= num / deno
    self.intercept= y_mean - (self.slope * X_mean)
    print("Model Trained")
    print("Slope and Intercept: ",self.slope, self.intercept)

  def predict(self, X):
    X= np.array(X)
    if self.slope is None or self.intercept is None:
      raise RuntimeError("Model has not been trained yet.")
    return self.intercept + (X * self.slope)

  def score(self, X, y):
    y_true = np.array(y)
    y_predicted = self.predict(X)
    tss = np.sum((y_true - np.mean(y_true))**2)
    rss = np.sum((y_true - y_predicted)**2)

    r_squared = 1 - (rss / tss)
    return r_squared

In [73]:
import pandas as pd
salary_df= pd.read_csv('Salary_dataset.csv')
np.array(salary_df['Salary']).mean()

np.float64(76004.0)

In [74]:
X= np.array(salary_df['YearsExperience'])
y= np.array(salary_df['Salary'])

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test= train_test_split(X, y, test_size= 0.2, random_state=42)

model= LinearRegression()
model.fit(X_train, y_train)

Model Trained
Slope and Intercept:  9423.815323030978 24380.201479473697


In [75]:
predictions = model.predict(X_test)
score = model.score(X_test, y_test)

print(f"Actual values:    {y_test}")
print(f"Predicted values: {predictions.round(2)}")
print(f"Model R-squared Score: {score:.4f} ({score:.2%})")

Actual values:    [112636.  67939. 113813.  83089.  64446.  57190.]
Predicted values: [115791.21  71499.28 102597.87  75268.8   55478.79  60190.7 ]
Model R-squared Score: 0.9024 (90.24%)
