In [152]:
from typing import Optional
from abc import ABC, abstractmethod

import numpy as np

# LinearRegression Interface

In [153]:
class LinearRegression(ABC):
    """
    Abstract Base Class for Linear Regression Models.
    Defines the core methods to be implemented by derived classes.
    """
    
    def __init__(self, X: np.ndarray, y: np.ndarray) -> None:
        """
        Initialize the regression model.
        
        Parameters:
            X (np.ndarray): Design matrix (n_samples x n_features).
            y (np.ndarray): Response variable (n_samples x 1).
        """

        self.X = X
        self.y = y
        self.coefficients: Optional[np.ndarray] = None

        return

    @abstractmethod
    def fit(self) -> None:
        """
        Compute the regression coefficients. Must be implemented by subclasses.
        """
        raise NotImplementedError

    @abstractmethod
    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Predict response values for new data.
        
        Parameters:
            X (np.ndarray): New data for prediction.
        
        Returns:
            np.ndarray: Predicted values.
        """
        raise NotImplementedError

    def _add_constant(self) -> None:
        """
        Add a constant column to the design matrix for the intercept term.
        """

        self.X = np.hstack((np.ones((self.X.shape[0], 1)), self.X))

        return

In [149]:
class GLS(LinearRegression):
    """
    Generalized Least Squares Regression Model.
    Handles heteroskedasticity and autocorrelation in residuals by incorporating the residual covariance matrix (omega).
    """
    
    def __init__(
        self,
        X: np.ndarray,
        y: np.ndarray,
        omega: Optional[np.ndarray] = None
    ) -> None:
        """
        Initialize the GLS model.
        
        Parameters:
            X (np.ndarray): Design matrix (n_samples x n_features).
            y (np.ndarray): Response variable (n_samples x 1).
            omega (Optional[np.ndarray]): Covariance matrix of residuals (n_samples x n_samples). Defaults to None (OLS case).
        """

        super().__init__(X=X, y=y)
        
        self.omega = omega if omega is not None else np.eye(len(y))

        return

    def fit(self) -> None:
        """
        Fit the GLS model using the generalized normal equation: (X'W^(-1)X)^(-1)X'W^(-1)y.
        """

        self._add_constant()

        # Store the inverse of the residual covariance matrix to optimize latency and avoid inverting the matrix twice 
        omega_inverse = np.linalg.inv(self.omega)

        # Solve the GLS equation
        self.coefficients = np.linalg.solve(self.X.T @ omega_inverse @ self.X, self.X.T @ omega_inverse @ self.y)

        return

    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Predict response values for new data.
        
        Parameters:
            X (np.ndarray): New data for prediction.
        
        Returns:
            np.ndarray: Predicted values.
        """
        
        if X.shape[1] + 1 == self.X.shape[1]:
            X = np.hstack((np.ones((X.shape[0], 1)), X))

        return X @ self.coefficients

In [150]:
y_train = np.random.randn(1_000, 1)
X_train = np.random.randn(1_000, 10)

y_test = np.random.randn(1_000, 1)
X_test = np.random.randn(1_000, 10)

gls: GLS = GLS(X=X_train, y=y_test)
gls.fit()

y_hat = gls.predict(X=X_test)

np.var(y_hat - y_test, ddof=1)

1.0146527197187467

# OLS LinearRegression Implementation 

In [151]:
class OLS(GLS):
    """
    Ordinary Least Squares Regression Model.
    Inherits from LinearRegression and implements its methods.
    """
    
    def __init__(self, X: np.ndarray, y: np.ndarray) -> None:
        """
        Initialize the OLS model.
        
        Parameters:
            X (np.ndarray): Design matrix (n_samples x n_features).
            y (np.ndarray): Response variable (n_samples x 1).
        """

        super().__init__(X=X, y=y, omega=None)
        
        return

Test

In [134]:
y_train = np.random.randn(1_000, 1)
X_train = np.random.randn(1_000, 10)

y_test = np.random.randn(1_000, 1)
X_test = np.random.randn(1_000, 10)

ols: OLS = OLS(X=X_train, y=y_test)
ols.fit()

y_hat = ols.predict(X_test)

np.var(y_hat - y_test, ddof=1)

1.0263146477395808

# GLS LinearRegression Implementation

In [None]:
class GLS(LinearRegression):
    """
    Generalized Least Squares Regression Model.
    Extends OLS to handle heteroskedasticity and autocorrelation.
    """
    
    def __init__(self, X: np.ndarray, y: np.ndarray, omega: np.ndarray):
        """
        Initialize the GLS model.
        
        Parameters:
            X (np.ndarray): Design matrix (n_samples x n_features).
            y (np.ndarray): Response variable (n_samples x 1).
            omega (np.ndarray): Covariance matrix of residuals (n_samples x n_samples).
        """
        super().__init__(X, y)
        self.omega = omega  # Covariance matrix of residuals
        self.coefficients = None

    def fit(self):
        """
        Fit the GLS model by solving the generalized normal equation.
        """
        self._add_constant()  # Add intercept term
        omega_inv = np.linalg.inv(self.omega)  # Inverse of covariance matrix
        XTWX = self.X.T @ omega_inv @ self.X
        XTWy = self.X.T @ omega_inv @ self.y
        self.coefficients = np.linalg.inv(XTWX) @ XTWy

    def predict(self, X_new: np.ndarray) -> np.ndarray:
        """
        Predict response values for new data.
        
        Parameters:
            X_new (np.ndarray): New data for prediction.
        
        Returns:
            np.ndarray: Predicted values.
        """
        if X_new.shape[1] + 1 == self.X.shape[1]:
            X_new = np.hstack((np.ones((X_new.shape[0], 1)), X_new))
        return X_new @ self.coefficients

class PanelRegression:
    """
    Panel Regression Model for cross-sectional and time-series data.
    Leverages GLS for panel data over multiple time periods.
    """
    
    def __init__(self, X: np.ndarray, y: np.ndarray, omega: np.ndarray):
        """
        Initialize the Panel Regression model.
        
        Parameters:
            X (np.ndarray): Design matrix (n_entities x n_features x n_time_periods).
            y (np.ndarray): Response variable (n_entities x n_time_periods).
            omega (np.ndarray): Covariance matrix of residuals (n_entities x n_entities).
        """
        self.X = X  # 3D design matrix
        self.y = y  # Response matrix
        self.omega = omega  # Covariance matrix
        self.coefficients = None

    def fit(self):
        """
        Fit the Panel Regression model by applying GLS across time periods.
        """
        n_entities, n_features, n_time_periods = self.X.shape
        self.coefficients = np.zeros((n_features + 1, n_time_periods))  # Including intercept

        for t in range(n_time_periods):
            X_t = self.X[:, :, t]
            y_t = self.y[:, t]
            gls = GLS(X_t, y_t, self.omega)
            gls.fit()
            self.coefficients[:, t] = gls.coefficients.flatten()

    def predict(self, X_new: np.ndarray) -> np.ndarray:
        """
        Predict response values for new data across all time periods.
        
        Parameters:
            X_new (np.ndarray): New data for prediction (n_entities x n_features x n_time_periods).
        
        Returns:
            np.ndarray: Predicted values (n_entities x n_time_periods).
        """
        n_entities, n_features, n_time_periods = X_new.shape
        predictions = np.zeros((n_entities, n_time_periods))

        for t in range(n_time_periods):
            X_t = X_new[:, :, t]
            if X_t.shape[1] + 1 == self.coefficients.shape[0]:
                X_t = np.hstack((np.ones((X_t.shape[0], 1)), X_t))
            predictions[:, t] = X_t @ self.coefficients[:, t]

        return predictions
