# Univariate Linear Regression

In [1]:
import numpy as np
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [9]:
df = [
    [35.0, 179.0],
    [42.0, 200.0],
    [50.0, 221.0],
    [60.0, 263.0],
    [67.0, 280.0],
    [75.0, 314.0],
    [80.0, 327.0],
    [90.0, 360.0],
    [95.0, 377.0],
    [100.0, 391.0],
    [110.0, 425.0],
    [120.0, 462.0],
    [130.0, 493.0],
    [140.0, 521.0],
    [150.0, 552.0],
    [160.0, 582.0],
    [175.0, 631.0],
    [190.0, 675.0],
    [210.0, 740.0],
    [230.0, 804.0]
]


In [10]:
df = pd.DataFrame(df, columns=['feature', 'target'])

In [11]:
print(df.head())

   feature  target
0     35.0   179.0
1     42.0   200.0
2     50.0   221.0
3     60.0   263.0
4     67.0   280.0


In [12]:
print(df.shape)

(20, 2)


In [13]:
X = df['feature']
y = df['target']

In [14]:
print(X.head())
print(y.head())

0    35.0
1    42.0
2    50.0
3    60.0
4    67.0
Name: feature, dtype: float64
0    179.0
1    200.0
2    221.0
3    263.0
4    280.0
Name: target, dtype: float64


In [16]:
X_mean = X.mean()
X_min = X.min()
X_max = X.max()
X_std = X.std(ddof=0)

y_mean =y.mean()
y_min = y.min()
y_max = y.max()
y_std = y.std(ddof=0)

print(f"{X_mean:.2f} {X_min:.2f} {X_max:.2f} {X_std:.2f}")
print(f"{y_mean:.2f} {y_min:.2f} {y_max:.2f} {y_std:.2f}")

115.45 35.00 230.00 55.22
439.85 179.00 804.00 177.34


In [17]:
alpha = 0.01
epochs = 1000

In [18]:
X_norm = (X - X_mean) / (X_std)

In [19]:
print(X_norm.head())

0   -1.456972
1   -1.330200
2   -1.185317
3   -1.004215
4   -0.877443
Name: feature, dtype: float64


In [28]:
def gradient_descent(X, y, alpha=0.01, epochs=1000):
    theta0, theta1 = 0, 0
    
    n = len(X)

    for epoch in range(epochs):
        y_cap = theta0 + (theta1 * X)
        grad0 = sum((y_cap - y)) / n
        grad1 = sum(((y_cap - y) * X)) / n

        theta0 -= alpha * grad0
        theta1 -= alpha * grad1

    return theta0, theta1

In [29]:
theta0, theta1 = gradient_descent(X_norm, y)

In [30]:
print(theta0, theta1)

439.83101112682647 177.29315298210062


In [31]:
def mse(X, y):
    y_cap = theta0 + (theta1 * X)
    n = len(X)
    return sum((y_cap - y) ** 2) / (2 * n)

In [32]:
error = mse(X_norm, y)

In [33]:
print(error)

7.57582866937003


In [38]:
def predict(val):
    X = (val - X_mean) / X_std
    return theta0 + (theta1 * X)

In [39]:
print(predict(150))
print(predict(200))

550.7651054617813
711.3064865543353


# Multi Variate Linear Regression