In [87]:
import numpy as np
from linear_regression import LinearRegression
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

In [88]:
class LinearRegression:
    """Linear regression model.
    """
    def __init__(self):
        w = None        # Weights
        b = None        # Intercept
        yhat = None     # Model predictions
        mse = None      # Mean squared error
        

    def fit(self, X, y):
        """
        Calculates the w and b parameters for two given matrices.

        Parameters:
        -----------
            X : numpy array
                A numpy array of dimensions N x p containing the dataset features.
            
            y : numpy array
                A numpy array of dimensions N x 1 containing the labels.
        """
        # Check if given parameters are o type ndarray
        if not isinstance(X, np.ndarray) or not isinstance(y, np.ndarray):
            raise TypeError('X and y are None. fit function must be run first.')
        
        # Check if the given matrix parameter shapes are comatible for dot product.
        if X.shape[0] != y.shape[0]:
            raise ValueError   
     
        # Insert ones on last column of X
        X = np.insert(X, X.shape[1], 1, axis=1)

        # Calculate dot product
        res = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))
       
        # Assign result to w and b class attributes.
        self.w = res[:-1]
        self.b = res[-1]
    
    def predict(self, X):
        """
        Make predictions on new X features. 'fit' function must be run first
        so that w and b parameters are calculated.
        """
        # Check if 
        if self.w is None or self.b is None:
            raise ValueError('X and/or y are None. "fit" function must be run first.')
        
        if not isinstance(X, np.ndarray):
            raise TypeError
        
        return np.dot(X, self.w) + self.b                   # CHECK IF NP DOT IS REQUIRED OR JUST MULTIPLICATION
        
    def evaluate(self, X, y):
        if self.w is None or self.b is None:
            raise ValueError('X and y are None. fit function must be run first.')
        
        self.yhat = self.predict(X)
        self.mse = 1 / len(X) * np.dot((self.yhat - y).T, (self.yhat - y))           # CHECK IF NP DOT IS REQUIRED OR JUST MULTIPLICATION

        return self.yhat, self.mse

    def print_vars(self):
        print(f'W: {self.w}')
        print(f'b: {self.b}')
        print(f'yhat: {self.yhat}')
        print(f'MSE: {self.mse}')


In [89]:
# We are looking for 2x + 1
x_train = np.array([[2],
                    [3],
                    [4],
                    [5]])
y_train = np.array([[5],
                    [7],
                    [9],
                    [11]])

In [90]:
lr2 = LinearRegression()
lr2.fit(x_train, y_train)

In [91]:
x_test = np.array([[6],
                   [7]])
y_test = np.array([[13],
                   [15]])

In [92]:
yhat, mse = lr2.evaluate(x_test, y_test)

In [93]:
yhat = lr2.predict(x_test)

In [94]:
yhat

array([[13.],
       [15.]])

In [95]:
yhat, mse

(array([[13.],
        [15.]]),
 array([[1.57772181e-28]]))

In [96]:
lr2.print_vars()

W: [[2.]]
b: [1.]
yhat: [[13.]
 [15.]]
MSE: [[1.57772181e-28]]


In [97]:
lr = LinearRegression()

In [98]:
X, y = fetch_california_housing(data_home='data/', download_if_missing=True, return_X_y = True)

In [99]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [106]:
len(X_train), len(X_test), len(y_train), len(y_test)

(14448, 6192, 14448, 6192)

In [100]:
lr.fit(X_train, y_train)

In [101]:
yhat = lr.predict(X_test)
yhat

array([0.72604907, 1.76743383, 2.71092161, ..., 2.07465531, 1.57371395,
       1.82744133])

In [102]:
yhat, mse = lr.evaluate(X_test, y_test)
yhat, mse

(array([0.72604907, 1.76743383, 2.71092161, ..., 2.07465531, 1.57371395,
        1.82744133]),
 0.5305677824770385)

In [103]:
is_equal_result = []
for pred, true in zip(yhat, y_test):
    print(f'Model prediction: {pred} | Actuall: {true}')
    is_equal_result.append(np.equal(pred, true))

Model prediction: 0.7260490726149982 | Actuall: 0.477
Model prediction: 1.7674338262315814 | Actuall: 0.458
Model prediction: 2.7109216111081693 | Actuall: 5.00001
Model prediction: 2.8351472701890117 | Actuall: 2.186
Model prediction: 2.606958065441212 | Actuall: 2.78
Model prediction: 2.0107385611444926 | Actuall: 1.587
Model prediction: 2.6406738645218297 | Actuall: 1.982
Model prediction: 2.167061608337711 | Actuall: 1.575
Model prediction: 2.740120557303598 | Actuall: 3.4
Model prediction: 3.9036152554894628 | Actuall: 4.466
Model prediction: 0.9444186089870215 | Actuall: 1.232
Model prediction: 1.9012370716029636 | Actuall: 2.539
Model prediction: 1.7635338673914518 | Actuall: 2.151
Model prediction: 2.2528041910462306 | Actuall: 2.205
Model prediction: 2.5379897344644746 | Actuall: 2.198
Model prediction: 1.9221752938484258 | Actuall: 1.362
Model prediction: 2.387973566920465 | Actuall: 1.784
Model prediction: 2.0122883060842085 | Actuall: 1.875
Model prediction: 2.2241117222878