In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score

import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("infor_Data.csv")

In [3]:
# Splitting the DataFrame into training and testing sets with a 70-30 split ratio
# and specifying a random seed for reproducibility
train, test = train_test_split(df, test_size = 0.3, random_state=40)

# Split the data again to simplify the problem with curr practice
x_train = train.iloc[:, :-1] # All columns except the last one
y_train = train.iloc[:, -1]  # The last column

x_test = test.iloc[:, :-1]
y_test = test.iloc[:, -1]
print(x_test.shape)

(2411, 9)


In [4]:
# Create Linear Regression model
model = LinearRegression()

# Fit model to data
model.fit(x_train, y_train)

# Prediction
y_pred = model.predict(x_test)
y_pred = y_pred.reshape(-1, 1)
print(y_pred)


[[220.70635726]
 [176.78106211]
 [208.00422326]
 ...
 [243.69894981]
 [189.44562637]
 [ 91.24957496]]


In [15]:
import numpy as np
class Multiple_LinearRegression:
    def __init__(self, learning_rate = 0.0000001, n_iter = 1000):
        """
        This is a numpy array that :
        each column is correspond to each feature
        each row is corresponf to each observation
        """
        self.weights = None
        self.bias = None
        self.learning_rate = learning_rate
        self.n_iter = n_iter

    def compute(self, features, labels):
        """
        features (obs, fea)
        labels (obs, 1)
        """
        n_observation, n_feature = features.shape # [obs, fea]
        self.weights = np.ones(n_feature).reshape(-1, 1) # (fea, 1)
        self.bias = 0
                
        for iter in range(self.n_iter):
            # Calculate the predicted values
            labels_pred = np.dot(features, self.weights) + self.bias # (obs, 1)
            labels = labels.reshape(n_observation, 1)
                        
            # Compute the gradients
            dw = np.dot(features.T, (labels_pred - labels)) / n_observation # (fea, 1)
            db = np.sum(labels_pred - labels) / n_observation 
            
            # Update weights and bias
            self.weights -= dw * self.learning_rate # Error cause wrong shape, expect (fea, 1) but return (fea, obs)
            self.bias -= db * self.learning_rate

    def fit(self, features, labels):
        # Handle input parameters
        # For features
        features = features.values
        # For lable
        labels = labels.values
         
        self.compute(features, labels)
    
    """
    The @property decorator in Python is used to define "getter" methods for class attributes. 
    It allows you to access an attribute like a method, but without using parentheses.
    """
    @property
    def coef(self):
        return self.weights
    
    @property
    def intercept(self):
        return self.bias
    
    def predict(self, features):
        features = features.values
        return np.dot(features, self.weights) + self.bias
    
    def R_Squared(label_true, label_pred):
        """
        Compute R-squared (coefficient of determination) for a linear regression model.
        
        Parameters:
        - label_true: Array-like, true values of the dependent variable.
        - label_pred: Array-like, predicted values of the dependent variable from the model.

        Returns:
        - R-squared value, a float between 0 and 1.
        """
        # Calculate the mean of the true values
        mean_label_true = np.mean(label_true)
        # Calculate the total sum of squares 
        ss_fit = np.sum((label_pred - mean_label_true)**2)
        # Calculate the residual sum of squares
        ss_mean = np.sum((label_true - mean_label_true)**2)
        # Calculate R-squared
        r_squared = 1 - (ss_fit/ss_mean)

        return r_squared
    
    def MSE(self, label_true, label_pred):
        """
        Compute the Mean Squared Error (MSE) for a linear regression model.

        Parameters:
        - label_true: Array-like, true values of the dependent variable.
        - label_pred: Array-like, predicted values of the dependent variable from the model.

        Returns:
        - Mean Squared Error (MSE), a non-negative float.
        """
        label_true = label_true.values
        mse = np.mean((label_pred - label_true)**2)

        return mse
    
    def RMSE(self, label_true, label_pred):
        """
        Compute the Root Mean Squared Error (RMSE) for a linear regression model.

        Parameters:
        - label_true: Array-like, true values of the dependent variable.
        - label_pred: Array-like, predicted values of the dependent variable from the model.

        Returns:
        - Root Mean Squared Error (RMSE), a non-negative float.
        """
        rmse = np.sqrt(self.MSE(label_true, label_pred))

        return rmse

    def MAE(self, label_true, label_pred):
        """
        Compute the Mean Absolute Error (MAE) for a linear regression model.

        Parameters:
        - label_true: Array-like, true values of the dependent variable.
        - label_pred: Array-like, predicted values of the dependent variable from the model.

        Returns:
        - Mean Absolute Error (MAE), a non-negative float.
        """
        mae = np.mean(np.abs(label_pred - label_true))

        return mae
    


In [16]:
"""Manual"""
model_Manual = Multiple_LinearRegression()

# Compute coeficient
model_Manual.fit(x_train, y_train)


# Predicttion
y_predManual = model_Manual.predict(x_test)

print(y_predManual)


[[218.08690473]
 [206.64906286]
 [178.69563439]
 ...
 [183.2193274 ]
 [214.7314077 ]
 [149.24244768]]


In [17]:
print(y_test.shape)

(2411,)


In [18]:
print(model_Manual.MSE(y_test, y_predManual))

4225.478259640543
