<a href="https://colab.research.google.com/github/susan291-gifs/SussieAssignment/blob/main/LinearRegression_asspgnment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

###Problem 1

In [None]:
class ScratchLinearRegression():
    """
    Scratch implementation of linear regression

    Parameters
    ----------
    num_iter : int
      Number of iterations
    lr : float
      Learning rate
    no_bias : bool
      True if no bias term is included
    verbose : bool
      True to output the learning process

    Attributes
    ----------
    self.coef_ : of the following form. ndarray, shape (n_features,)
      Parameters
    self.loss : of the following form. ndarray, shape (self.iter,)
      Record losses on training data
    self.val_loss : of the following form. ndarray, shape (self.iter,)
      Record loss on validation data
    """

    def __init__(self, num_iter, lr, no_bias, verbose):
        self.iter = num_iter
        self.lr = lr
        self.no_bias = no_bias
        self.verbose = verbose
        self.loss = np.zeros(self.iter)
        self.val_loss = np.zeros(self.iter)

    def _linear_hypothesis(self, X):
        """
        Compute a linear hypothetical function

        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
          Training data

        Returns
        -------
        h_theta : of the following form. ndarray, shape (n_samples, 1)
          Estimated result by linear hypothetical function

        """
        if not self.no_bias:
            X = np.hstack((np.ones((X.shape[0], 1)), X))

        h_theta = np.dot(X, self.coef_)

        return h_theta

    def fit(self, X, y, X_val=None, y_val=None):
        """
        Learn linear regression. If validation data is entered, the loss and accuracy for it are also calculated for each iteration.
        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
            Features of training data
        y : of the following form. ndarray, shape (n_samples, )
            Correct answer value of training data
        X_val : of the following form. ndarray, shape (n_samples, n_features)
            Features of verification data
        y_val : of the following form. ndarray, shape (n_samples, )
            Correct value of verification data
        """
        if self.verbose:
            print(f'Training on {X.shape[0]} samples')

        if not self.no_bias:
            X = np.hstack((np.ones((X.shape[0], 1)), X))

        self.coef_ = np.random.randn(X.shape[1])

        for i in range(self.iter):
            h_theta = self._linear_hypothesis(X)

            error = h_theta - y

            gradient = np.dot(X.T, error) / X.shape[0]

            self.coef_ -= self.lr * gradient

            self.loss[i] = np.mean((error) ** 2)

            if X_val is not None and y_val is not None:
                h_theta_val = self._linear_hypothesis(X_val)

                error_val = h_theta_val - y_val

                self.val_loss[i] = np.mean((error_val) ** 2)

            if self.verbose:
                print(f'Iteration {i+1}, Training Loss: {self.loss[i]}')
                if X_val is not None and y_val is not None:
                    print(f'Iteration {i+1}, Validation Loss: {self.val_loss[i]}')

    def predict(self, X):
        """
        Estimate using linear regression.
        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
            sample
        Returns
        -------
            of the following form. ndarray, shape (n_samples, 1)
            Estimated result by linear regression
        """
        h_theta = self._linear_hypothesis(X)

        return h_theta

###Problem 2

In [None]:
class ScratchLinearRegression():
    """
    Scratch implementation of linear regression

    Parameters
    ----------
    num_iter : int
      Number of iterations
    lr : float
      Learning rate
    no_bias : bool
      True if no bias term is included
    verbose : bool
      True to output the learning process

    Attributes
    ----------
    self.coef_ : of the following form. ndarray, shape (n_features,)
      Parameters
    self.loss : of the following form. ndarray, shape (self.iter,)
      Record losses on training data
    self.val_loss : of the following form. ndarray, shape (self.iter,)
      Record loss on validation data
    """

    def __init__(self, num_iter, lr, no_bias, verbose):
        self.iter = num_iter
        self.lr = lr
        self.no_bias = no_bias
        self.verbose = verbose
        self.loss = np.zeros(self.iter)
        self.val_loss = np.zeros(self.iter)

    def _linear_hypothesis(self, X):
        """
        Compute a linear hypothetical function

        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
          Training data

        Returns
        -------
        h_theta : of the following form. ndarray, shape (n_samples, 1)
          Estimated result by linear hypothetical function

        """
        if not self.no_bias:
            X = np.hstack((np.ones((X.shape[0], 1)), X))

        h_theta = np.dot(X, self.coef_)

        return h_theta

    def _gradient_descent(self, X, error):
        """
        Update parameters using gradient descent

        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
          Training data
        error : of the following form. ndarray, shape (n_samples, 1)
          Error between predicted and actual values

        """
        gradient = np.dot(X.T, error) / X.shape[0]

        self.coef_ -= self.lr * gradient

    def fit(self, X, y, X_val=None, y_val=None):
        """
        Learn linear regression. If validation data is entered, the loss and accuracy for it are also calculated for each iteration.
        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
            Features of training data
        y : of the following form. ndarray, shape (n_samples, )
            Correct answer value of training data
        X_val : of the following form. ndarray, shape (n_samples, n_features)
            Features of verification data
        y_val : of the following form. ndarray, shape (n_samples, )
            Correct value of verification data
        """
        if self.verbose:
            print(f'Training on {X.shape[0]} samples')

        if not self.no_bias:
            X = np.hstack((np.ones((X.shape[0], 1)), X))

        self.coef_ = np.random.randn(X.shape[1])

        for i in range(self.iter):
            h_theta = self._linear_hypothesis(X)

            error = h_theta - y

            self._gradient_descent(X, error)

            self.loss[i] = np.mean((error) ** 2)

            if X_val is not None and y_val is not None:
                h_theta_val = self._linear_hypothesis(X_val)

                error_val = h_theta_val - y_val

                self.val_loss[i] = np.mean((error_val) ** 2)

            if self.verbose:
                print(f'Iteration {i+1}, Training Loss: {self.loss[i]}')
                if X_val is not None and y_val is not None:
                    print(f'Iteration {i+1}, Validation Loss: {self.val_loss[i]}')

    def predict(self, X):
        """
        Estimate using linear regression.
        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
            sample
        Returns
        -------
            of the following form. ndarray, shape (n_samples, 1)
            Estimated result by linear regression
        """
        h_theta = self._linear_hypo


###Problem 3

In [None]:
class ScratchLinearRegression():
    """
    Scratch implementation of linear regression

    Parameters
    ----------
    num_iter : int
      Number of iterations
    lr : float
      Learning rate
    no_bias : bool
      True if no bias term is included
    verbose : bool
      True to output the learning process

    Attributes
    ----------
    self.coef_ : of the following form. ndarray, shape (n_features,)
      Parameters
    self.loss : of the following form. ndarray, shape (self.iter,)
      Record losses on training data
    self.val_loss : of the following form. ndarray, shape (self.iter,)
      Record loss on validation data
    """

    def __init__(self, num_iter, lr, no_bias, verbose):
        self.iter = num_iter
        self.lr = lr
        self.no_bias = no_bias
        self.verbose = verbose
        self.loss = np.zeros(self.iter)
        self.val_loss = np.zeros(self.iter)

    def _linear_hypothesis(self, X):
        """
        Compute a linear hypothetical function

        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
          Training data

        Returns
        -------
        h_theta : of the following form. ndarray, shape (n_samples, 1)
          Estimated result by linear hypothetical function

        """
        if not self.no_bias:
            X = np.hstack((np.ones((X.shape[0], 1)), X))

        h_theta = np.dot(X, self.coef_)

        return h_theta

    def _gradient_descent(self, X, error):
        """
        Update parameters using gradient descent

        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
          Training data
        error : of the following form. ndarray, shape (n_samples, 1)
          Error between predicted and actual values

        """
        gradient = np.dot(X.T, error) / X.shape[0]

        self.coef_ -= self.lr * gradient

    def fit(self, X, y, X_val=None, y_val=None):
        """
        Learn linear regression. If validation data is entered, the loss and accuracy for it are also calculated for each iteration.
        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
            Features of training data
        y : of the following form. ndarray, shape (n_samples, )
            Correct answer value of training data
        X_val : of the following form. ndarray, shape (n_samples, n_features)
            Features of verification data
        y_val : of the following form. ndarray, shape (n_samples, )
            Correct value of verification data
        """
        if self.verbose:
            print(f'Training on {X.shape[0]} samples')

        if not self.no_bias:
            X = np.hstack((np.ones((X.shape[0], 1)), X))

        self.coef_ = np.random.randn(X.shape[1])

        for i in range(self.iter):
            h_theta = self._linear_hypothesis(X)

            error = h_theta - y

            self._gradient_descent(X, error)

            self.loss[i] = np.mean((error) ** 2)

            if X_val is not None and y_val is not None:
                h_theta_val = self._linear_hypothesis(X_val)

                error_val = h_theta_val - y_val

                self.val_loss[i] = np.mean((error_val) ** 2)

            if self.verbose:
                print(f'Iteration {i+1}, Training Loss: {self.loss[i]}')
                if X_val is not None and y_val is not None:
                    print(f'Iteration {i+1}, Validation Loss: {self.val_loss[i]}')

    def predict(self, X):
        """
        Estimate using linear regression.
        Parameters
        ----------
        X : of the following form. ndarray, shape (n_samples, n_features)
            sample
        Returns
        -------
        h_theta : of the following form. ndarray, shape (n_samples, 1)
            Estimated result by linear regression
        """
        h_theta = self._linear_hypothesis(X)
        return h_theta


###Problem 4

In [None]:
def MSE(y_pred, y):
    """
    Calculation of mean square error

    Parameters
    ----------
    y_pred : of the following form. ndarray, shape (n_samples,)
      Estimated value
    y : of the following form. ndarray, shape (n_samples,)
      Correct answer value

    Returns
    ----------
    mse : numpy.float
      Mean squared error
    """
    mse = np.mean((y_pred - y) ** 2)
    return mse


###Problem 5

In [None]:
def _compute_cost(self, X, y):
    """
    Compute the cost function for linear regression.

    Parameters
    ----------
    X : numpy.ndarray, shape (n_samples, n_features)
        Features of training data.
    y : numpy.ndarray, shape (n_samples,)
        Correct answer value of training data.

    Returns
    -------
    cost : numpy.float
        Value of the cost function.
    """

    m = len(y)


    h_theta = self._linear_hypothesis(X)


    cost = np.sum((h_theta - y) ** 2) / (2 * m)

    return cost


###Problem 6

In [None]:
train = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/train.csv')


X = train[['GrLivArea', 'YearBuilt']].values
y = train['SalePrice'].values


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y.reshape(-1, 1)).flatten()


X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

In [None]:
class ScratchLinearRegression():
    def __init__(self, num_iter=1000, lr=0.01):
        self.num_iter = num_iter
        self.lr = lr

    def _linear_hypothesis(self, X):
        return np.dot(X, self.theta)

    def fit(self, X, y):
        self.theta = np.zeros(X.shape[1])
        m = len(y)

        for _ in range(self.num_iter):
            error = self._linear_hypothesis(X) - y
            gradient = np.dot(X.T, error) / m
            self.theta -= self.lr * gradient

    def predict(self, X):
        return self._linear_hypothesis(X)

scratch_lr = ScratchLinearRegression()
scratch_lr.fit(X_train, y_train)

y_pred_scratch = scratch_lr.predict(X_val)

mse_scratch = mean_squared_error(y_val, y_pred_scratch)
print("Mean Squared Error (Scratch):", mse_scratch)
print("Mean Squared Error Comparison:")
print("Scratch Implementation:", mse_scratch)
print("Scikit-learn Implementation:", mse_sklearn)

Mean Squared Error (Scratch): 0.39560888091490665
Mean Squared Error Comparison:
Scratch Implementation: 0.39560888091490665
Scikit-learn Implementation: 0.39569344443628673


###Problem 7

In [None]:
def plot_learning_curve(train_loss, val_loss):
    """
    Plot the learning curve for training and validation loss.

    Parameters
    ----------
    train_loss : ndarray
        Training loss values recorded during training.
    val_loss : ndarray
        Validation loss values recorded during training.
    """

    iterations = np.arange(1, len(train_loss) + 1)


    plt.figure(figsize=(10, 6))
    plt.plot(iterations, train_loss, label='Training Loss', color='blue')
    plt.plot(iterations, val_loss, label='Validation Loss', color='orange')
    plt.title('Learning Curve')
    plt.xlabel('Iterations')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()