In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.preprocessing import StandardScaler as scaler
from sklearn.model_selection import train_test_split as tts

---
# **Linear regression model**

In [2]:
class LinearRegression():
    def __init__(self, lr = 0.01, n_iterations = 100):
        self.lr = lr
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iterations):
            
            predictions = np.dot(X, self.weights) + self.bias

            dw = 1/n_samples*np.dot(X.T, predictions-y)
            db = 1/n_samples*np.sum(predictions-y)

            self.weights -= self.lr*dw
            self.bias -= self.lr*db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

---
## **Usage Code**

In [3]:
def dataloader():

    df = pd.read_csv('../data/winequality-red.csv')
    
    return df

def preprocessor(df):
    y = df['quality'].to_numpy()
    X = df.drop(['quality'], axis=1).to_numpy()

    X = scaler().fit_transform(X)
    X, y = shuffle(X,y, random_state=42)

    return X, y

In [4]:
if __name__=='__main__':
    X, y = preprocessor(dataloader())

    print("Feature Shape:", X.shape)
    print("Target Shape:", y.shape)

    train_X, test_X, train_y , test_y = tts(X, y, random_state=42)

    print("Number of examples in training set:", train_X.shape[0])
    print("Number of examples in test set:", test_X.shape[0])
    print()

    lr = LinearRegression(lr = 0.1, n_iterations=100)
    lr.fit(train_X, train_y)

    train_pred = lr.predict(train_X)
    test_pred = lr.predict(test_X)
    print("Training Error:")
    print('MSE:', mse(train_y, train_pred))
    print('RMSE:', np.sqrt(mse(train_y, train_pred)))
    print('MAE:', mae(train_y, train_pred))
    print()
    print("Testing Error:")
    print('MSE:', mse(test_y, test_pred))
    print('RMSE:', np.sqrt(mse(test_y, test_pred)))
    print('MAE:', mae(test_y, test_pred))

Feature Shape: (1599, 11)
Target Shape: (1599,)
Number of examples in training set: 1199
Number of examples in test set: 400

Training Error:
MSE: 0.41797268794431747
RMSE: 0.6465080726056849
MAE: 0.4966631106333683

Testing Error:
MSE: 0.41830771675530815
RMSE: 0.6467671271449317
MAE: 0.5071089775338561
