In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [2]:
class LinearRegressionMultiFeature:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient Descent
        for _ in range(self.n_iters):
            y_predicted = np.dot(X, self.weights) + self.bias

            # Calculate gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias


In [3]:
# Load the California Housing Dataset
data = fetch_california_housing()
X, y = data.data, data.target

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
# Train the custom Linear Regression model
lr_multi = LinearRegressionMultiFeature(learning_rate=0.02, n_iters=1000)
lr_multi.fit(X_train, y_train)


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  self.weights -= self.learning_rate * dw
  self.bias -= self.learning_rate * db


In [30]:
X_test

array([[   1.6812    ,   25.        ,    4.19220056, ...,    3.87743733,
          36.06      , -119.01      ],
       [   2.5313    ,   30.        ,    5.03938356, ...,    2.67979452,
          35.14      , -119.46      ],
       [   3.4801    ,   52.        ,    3.97715472, ...,    1.36033229,
          37.8       , -122.44      ],
       ...,
       [   9.2298    ,   25.        ,    7.23767606, ...,    2.79049296,
          37.31      , -122.05      ],
       [   2.785     ,   36.        ,    5.28902954, ...,    2.58860759,
          36.77      , -119.76      ],
       [   3.5521    ,   17.        ,    3.98883929, ...,    3.72991071,
          34.22      , -118.37      ]])

In [26]:
# Make predictions
y_pred = lr_multi.predict(X_test)

In [28]:
y_pred

array([nan, nan, nan, ..., nan, nan, nan])

In [7]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Weights:", lr_multi.weights)
print("Bias:", lr_multi.bias)
print(f"Mean Squared Error: {mse:.4f}")

ValueError: Input contains NaN.

In [None]:
from sklearn.linear_model import LinearRegression
R_learning = LinearRegression()
R_learning.fit(X_train, y_train)

In [None]:
prediction = R_learning.predict(X_test)

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, prediction)
print("Weights:", R_learning.coef_)
print("Bias:", R_learning.intercept_)
print(f"Mean Squared Error: {mse:.4f}")

In [None]:

# Visualization 1: Predicted vs Actual values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, color='blue', alpha=0.5)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.title("Actual vs Predicted Values")
plt.show()

In [None]:
# Visualization 2: Residual Plot (Error distribution)
residuals = y_pred - y_test
plt.figure(figsize=(10, 6))
plt.scatter(y_pred, residuals, color='green', alpha=0.5)
plt.hlines(y=0, xmin=min(y_pred), xmax=max(y_pred), colors='red', linewidth=2)
plt.xlabel("Predicted Values")
plt.ylabel("Residuals (Error)")
plt.title("Residual Plot")
plt.show()