In [18]:
# Import necessary libraries
import numpy as np  # NumPy is a library for numerical operations
import matplotlib.pyplot as plt  # Matplotlib is a library for data visualization

In [21]:
class LinearRegCustom:
    """
    A custom implementation of simple linear regression using gradient descent.

    Parameters:
    - lr (float): Learning rate for gradient descent.

    Attributes:
    - coef_ (float): The coefficient representing the linear relationship.
    - intercept_ (float): The intercept term (bias) in the linear model.
    """

    # Constructor
    def __init__(self, lr=0.1):
        """
        Initialize the LinearRegCustom class.

        Parameters:
        - lr (float): Learning rate for gradient descent.
        """
        self.lr = lr

    # Training function, i.e., fit
    def fit(self, X, y):
        """
        Fit the model to the training data.

        Parameters:
        - X (numpy.ndarray): Feature variable (independent variable) values.
        - y (numpy.ndarray): Target variable (dependent variable) values.

        Returns:
        - errors (list): List of errors at each iteration during training.
        """
        self._X = X  # _X mock behavior like private
        self._y = y.reshape(-1, 1)  # Ensure correct shape for calculations

        # Initialize coefficients and intercept at random values
        self.coef_ = numpy.random.random()
        self.intercept_ = numpy.random.random()

        errors = []  # List to store errors during training

        # Perform gradient descent (e.g., 50 iterations)
        for i in range(500):
            self.gradient_descend()
            errors.append(self.error())
        return errors

    def gradient_descend(self):
        """
        Update coefficients and intercept using gradient descent.
        """
        d_coef, d_intercept = self.gradient()
        self.coef_ -= d_coef * self.lr
        self.intercept_ -= d_intercept * self.lr

    def gradient(self):
        """
        Compute the gradient of the cost function with respect to coefficients and intercept.

        Returns:
        - d_coef (float): Change in coefficient.
        - d_intercept (float): Change in intercept.
        """
        yh = self.predict(self._X)

        d_coef = ((yh - self._y) * self._X).mean()
        d_intercept = (yh - self._y).mean()

        return d_coef, d_intercept

    def predict(self, X):
        """
        Predict target variable values based on feature variable values.

        Parameters:
        - X (numpy.ndarray): Feature variable (independent variable) values.

        Returns:
        - yh (numpy.ndarray): Predicted target variable values.
        """
        return X * self.coef_ + self.intercept_

    def error(self):
        """
        Calculate the mean squared error (MSE) for the model's predictions.

        Returns:
        - error (float): Mean squared error.
        """
        return ((self.predict(self._X) - self._y) ** 2).sum()


In [22]:
# Import the necessary libraries
from sklearn.datasets import fetch_california_housing
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [23]:
# Load the California Housing dataset
california_housing = fetch_california_housing()

In [24]:
# Split the data into features (X) and the target variable (y)
X = california_housing.data
y = california_housing.target

In [25]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [26]:
# Train a linear regression model on the data
model = LinearRegression()
model.fit(X_train, y_train)

In [27]:
# Make predictions for the test data
predictions = model.predict(X_test[:1])

In [28]:
# Print the predicted value for the first test sample and the features for that sample
print("Predicted value for the first test sample:", predictions[0])
print("Features for the first test sample:")
print(X_test[:1])

Predicted value for the first test sample: 0.7256346156569862
Features for the first test sample:
[[ 1.68120000e+00  2.50000000e+01  4.19220056e+00  1.02228412e+00
   1.39200000e+03  3.87743733e+00  3.60600000e+01 -1.19010000e+02]]
