In [358]:
import matplotlib.pyplot as plt
import numpy as np 
import seaborn as sns

from sklearn.model_selection import train_test_split
from scipy.optimize import minimize

from sklearn.metrics import mean_squared_error

In [471]:
class LocalRegression:

    def __init__(self, k_neighbors: int = 5) -> None:
        self.k_neighbors = k_neighbors
    
    def fit(self, X_train, y_train) -> None:
        assert X_train.shape[0] == y_train.shape[0], 'X and y are not equal in dim (0)'
        assert X_train.ndim > 1, 'X should be at least 2-dim'
        self.X_train = X_train
        self.y_train = y_train
        self.fraction = self.k_neighbors / self.X_train.shape[0]

    def predict(self, X_test):
        predictions = []
        
        for x_0 in X_test:
            distances = np.linalg.norm(self.X_train - x_0, axis=1)
            sample_size = int(self.fraction * len(self.X_train))
            k_nearest_idx = np.argsort(distances)[:sample_size]
            
            k_neighbors_X = self.X_train[k_nearest_idx]
            k_neighbors_y = self.y_train[k_nearest_idx]
            
            epsilon = 1e-8
            dk = distances[k_nearest_idx][-1]  
            
            weights = (1 - distances[k_nearest_idx] / (dk + epsilon)) ** 2
            regression_params = self._weighted_least_squares(k_neighbors_X, k_neighbors_y, weights)
            
            prediction = np.dot(regression_params, x_0)
            predictions.append(prediction)

        return np.array(predictions)

    def _weighted_least_squares(self, X, y, weights):
        criterion = lambda params: np.sum(weights * np.square(y - np.dot(params, X.T)))
        initial_params = np.random.random(X.shape[1])
        result = minimize(criterion, initial_params, method='L-BFGS-B', options={'maxiter': 100000})
        return result.x

In [472]:
np.random.seed(42)


num_samples = 100
bias_term = np.ones((num_samples, 1))
X_without_bias = np.random.rand(num_samples, 4)
X = np.concatenate([bias_term, X_without_bias], axis=1)
epsilon = np.random.normal(0, 0.1, num_samples)  
beta_0, beta_1, beta_2, beta_3, beta_4  = 1, 4, 5, -4, 10
y = beta_0 + beta_1 * X[:, 0] ** 2 + beta_2 * X[:, 1] ** 3 + beta_3 * X[:, 2] ** 4 + beta_4 * X[:, 3] ** 4 + epsilon  

In [475]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42, shuffle=True)

In [479]:
localReg = LocalRegression(k_neighbors=10)
localReg.fit(X_train, y_train)

In [480]:
preds = localReg.predict(X_test)

In [481]:
test_mse = mean_squared_error(y_test, preds)
test_mse

3.407612725396322