In [1]:
import numpy as np
import pandas as pd

#### Linear Regression Simple Form

In [2]:
# Assume we have only three features
xi = [453, 11, 86]

# Bias term, the baseline prediction value when all inputs are zero
w0 = 7.17

# Assume we have weights for all three features
w = [0.01, 0.04, 0.002]


def linear_regression(xi):
    """A simple linear regression function

    Args:
        xi (list): List of numeric values representing an instance in a dataset.

    Returns:
        float: Prediction value.
    """
    n = len(xi)
    
    pred = w0
    w = [0.01, 0.04, 0.002]
    for j in range(n):
        pred = pred + w[j] * xi[j]
    return pred

In [3]:
linear_regression(xi)

12.312

#### Linear Regression Vector Form

In [4]:
def dot(xi, w):
    """A dot function, which will multiply two vectors.

    Args:
        xi (list): List of features.
        w (list): List of corresponding weights.

    Returns:
        float: Prediction value.
    """
    n = len(xi)
    
    res = 0.0
    
    for j in range(n):
        res = res + xi[j] * w[j]
    
    return res

In [5]:
w_new = [w0] + w

In [6]:
w_new

[7.17, 0.01, 0.04, 0.002]

In [7]:
def linear_regression(xi):
    """A linear regression function which will use our custom dot function.

    Args:
        xi (list): Feature vector.

    Returns:
        float: Prediction value.
    """
    xi = [1] + xi
    return dot(xi, w_new)

In [8]:
linear_regression(xi)

12.312

##### Let's write it for multiple instances

In [9]:
w0 = 7.17
w = [0.01, 0.04, 0.002]
w_new = [w0] + w

In [10]:
# Assume there are three instances
x1  = [1, 148, 24, 1385]
x2  = [1, 132, 25, 2031]
x3 = [1, 453, 11, 86]

X = [x1, x2, x3]
X = np.array(X)
X

array([[   1,  148,   24, 1385],
       [   1,  132,   25, 2031],
       [   1,  453,   11,   86]])

In [11]:
def linear_regression(X):
    """Linear Regression using numpy matrix dot product.

    Args:
        X (np.array): Input feature matrix.

    Returns:
        np.array: Prediction values. 
    """
    return X.dot(w_new)

In [12]:
linear_regression(X)

array([12.38 , 13.552, 12.312])

#### Training a linear regression model (calculating weights based in training data)

In [13]:
def train_linear_regression(X, y):
    pass

In [14]:
def train_linear_regression(X, y):
    """Linear regression function which will calculate weights based on,
    input features and label.

    Args:
        X (np.array): Feature matrix.
        y (list or np.array): Labels.

    Returns:
        tuple(float, np.array): Bias and Weights.
    """
    ones = np.ones(X.shape[0])
    X = np.column_stack([ones, X])

    XTX = X.T.dot(X)
    XTX_inv = np.linalg.inv(XTX)
    w_full = XTX_inv.dot(X.T).dot(y)
    
    return w_full[0], w_full[1:]

In [15]:
from sklearn.datasets import load_diabetes

In [16]:
data_bunch = load_diabetes(as_frame=True)
df = data_bunch["data"]
df["target"] = data_bunch["target"]

In [17]:
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [18]:
w_trained_0, w_trained = train_linear_regression(data_bunch["data"].to_numpy(), data_bunch["target"].to_numpy())

In [19]:
w_trained_0

np.float64(-4.146127885462647e-13)

In [20]:
w_trained

array([ 6.74305056e-12, -3.69482223e-13, -1.36162193e-11, -2.30615527e-12,
        4.31583658e-11, -2.91464630e-11, -2.42224019e-11, -1.36530787e-11,
       -9.94049287e-12, -8.11795076e-13,  1.00000000e+00])

In [21]:
y_pred = w_trained_0 + data_bunch["data"].to_numpy().dot(w_trained)

In [22]:
y_pred[0]

np.float64(150.99999999999943)

In [23]:
y_pred[1]

np.float64(75.0000000000003)

#### Linear Regression using scikit-learn

In [24]:
from sklearn.linear_model import LinearRegression

In [25]:
lm = LinearRegression()

In [26]:
lm.fit(data_bunch["data"].to_numpy(), data_bunch["target"].to_numpy())

In [27]:
y_pred_sk = lm.predict(data_bunch["data"].to_numpy())

In [28]:
y_pred_sk[0]

np.float64(151.0)

In [29]:
y_pred_sk[1]

np.float64(74.99999999999999)