In [1]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

def smote_regression(X, y, N, k):
    """
    SMOTE algorithm for regression.

    Parameters:
    - X: np.array, input features of shape (n_samples, n_features)
    - y: np.array, target variable of shape (n_samples,)
    - N: int, desired number of synthetic samples
    - k: int, number of nearest neighbors

    Returns:
    - X_augmented: np.array, augmented input features
    - y_augmented: np.array, augmented target variable
    """
    # Initialize list to store synthetic samples
    synthetic_samples_X = []
    synthetic_samples_y = []

    # Fit nearest neighbors model
    nn = NearestNeighbors(n_neighbors=k)
    nn.fit(X)

    # Loop over each sample
    for i in range(len(X)):
        # Find k-nearest neighbors
        neighbors = nn.kneighbors([X[i]], return_distance=False)
        for neighbor in neighbors[0]:
            if neighbor != i:
                # Compute difference
                delta_x = X[neighbor] - X[i]
                delta_y = y[neighbor] - y[i]

                # Generate synthetic sample
                r = np.random.rand()
                synthetic_x = X[i] + r * delta_x
                synthetic_y = y[i] + r * delta_y

                # Append synthetic sample to the list
                synthetic_samples_X.append(synthetic_x)
                synthetic_samples_y.append(synthetic_y)

    # Randomly select N synthetic samples
    synthetic_samples_X = np.array(synthetic_samples_X)
    synthetic_samples_y = np.array(synthetic_samples_y)

    indices = np.random.choice(len(synthetic_samples_X), N, replace=False)
    synthetic_samples_X = synthetic_samples_X[indices]
    synthetic_samples_y = synthetic_samples_y[indices]

    # Combine original and synthetic samples
    X_augmented = np.vstack([X, synthetic_samples_X])
    y_augmented = np.hstack([y, synthetic_samples_y])

    return X_augmented, y_augmented
