# Supervised Learning with the kNN Based Moving Least Squares Method

In [91]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.datasets import make_blobs
import numpy as np

In [92]:
from sklearn.datasets import load_breast_cancer

dataset = load_breast_cancer()

In [93]:
def make_forge():
    # a carefully hand-designed dataset lol
    X, y = make_blobs(centers=2, random_state=4, n_samples=30)
    y[np.array([7, 27])] = 0
    mask = np.ones(len(X), dtype=bool)
    mask[np.array([0, 1, 5, 26])] = 0
    X, y = X[mask], y[mask]
    return X, y

In [94]:
print("dataset.keys():\n", dataset.keys())
print("Shape of data (X):", dataset.data.shape)
print("Shape of terget (y):", dataset.target.shape)
print("Feature names:", dataset.feature_names)
print("Target names:", dataset.target_names)

dataset.keys():
 dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
Shape of data (X): (569, 30)
Shape of terget (y): (569,)
Feature names: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Target names: ['malignant' 'benign']


In [95]:
X, y = dataset.data, dataset.target
X, y = make_forge()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [96]:
X

array([[ 9.96346605,  4.59676542],
       [11.0329545 , -0.16816717],
       [11.54155807,  5.21116083],
       [ 8.69289001,  1.54322016],
       [ 8.1062269 ,  4.28695977],
       [ 8.30988863,  4.80623966],
       [11.93027136,  4.64866327],
       [ 9.67284681, -0.20283165],
       [ 8.34810316,  5.13415623],
       [ 8.67494727,  4.47573059],
       [ 9.17748385,  5.09283177],
       [10.24028948,  2.45544401],
       [ 8.68937095,  1.48709629],
       [ 8.92229526, -0.63993225],
       [ 9.49123469,  4.33224792],
       [ 9.25694192,  5.13284858],
       [ 7.99815287,  4.8525051 ],
       [ 8.18378052,  1.29564214],
       [ 8.7337095 ,  2.49162431],
       [ 9.32298256,  5.09840649],
       [10.06393839,  0.99078055],
       [ 9.50048972, -0.26430318],
       [ 8.34468785,  1.63824349],
       [ 9.50169345,  1.93824624],
       [ 9.15072323,  5.49832246],
       [11.563957  ,  1.3389402 ]])

## Prediction with the kNN Regressor

In [97]:
from sklearn.neighbors import KNeighborsRegressor

# instantiate the model and set the number of neighbors to consider to 3
reg = KNeighborsRegressor(n_neighbors=4)
# fit the model using the training data and training targets
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
print("Test set predictions:\n", y_pred)

Test set predictions:
 [1.   0.   1.   0.   0.75 0.   0.  ]


In [98]:
print("Test set R^2: {:.2f}".format(r2_score(y_test, y_pred)))

Test set R^2: 0.38


In [99]:
y_pred

array([1.  , 0.  , 1.  , 0.  , 0.75, 0.  , 0.  ])

## Prediction with the kNN based MLS Approximator

In [100]:
from sigmaepsilon.math.approx import MLSApproximator

approximator = MLSApproximator(X_train, y_train, k=4)
y_pred = approximator.approximate(X_test)
print("Test set predictions:\n", y_pred)

Test set predictions:
 [1.   0.   1.   0.   0.75 0.   0.  ]


In [101]:
print("Test set R^2: {:.2f}".format(r2_score(y_test, y_pred)))

Test set R^2: 0.38


In [102]:
y_pred

array([1.  , 0.  , 1.  , 0.  , 0.75, 0.  , 0.  ])

## Prediction with cubic weight function

In [110]:
from sigmaepsilon.math.approx import moving_least_squares, CubicWeightFunction

w = CubicWeightFunction(core=[0.0, 0.0], supportdomain=[1.0, 1.0])
approx = moving_least_squares(X_train, y_train, deg=2, order=2, w=w)

y_pred = np.array([approx(x)[0] for x in X_test]).flatten()

In [111]:
print("Test set R^2: {:.2f}".format(r2_score(y_test, y_pred)))

Test set R^2: -0.46


In [112]:
y_pred

array([ 0.375     ,  0.        ,  1.17407227,  0.        , -0.04366246,
        0.        ,  0.        ])