# Supervised Learning with the kNN Based Moving Least Squares Method

In [78]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [79]:
from sklearn.datasets import load_breast_cancer

dataset = load_breast_cancer()

In [80]:
print("dataset.keys():\n", dataset.keys())
print("Shape of data (X):", dataset.data.shape)
print("Shape of terget (y):", dataset.target.shape)
print("Feature names:", dataset.feature_names)
print("Target names:", dataset.target_names)

dataset.keys():
 dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
Shape of data (X): (569, 30)
Shape of terget (y): (569,)
Feature names: ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Target names: ['malignant' 'benign']


In [81]:
X, y = dataset.data, dataset.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

## Prediction with the kNN Regressor

In [82]:
from sklearn.neighbors import KNeighborsRegressor

# instantiate the model and set the number of neighbors to consider to 3
reg = KNeighborsRegressor(n_neighbors=4)
# fit the model using the training data and training targets
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
print("Test set predictions:\n", y_pred)

Test set predictions:
 [0.25 1.   1.   0.25 1.   1.   1.   1.   1.   1.   0.5  0.75 1.   0.25
 0.25 0.   1.   0.   0.   0.   0.25 0.25 1.   1.   0.   0.75 1.   1.
 1.   0.   1.   0.   1.   0.   1.   0.25 1.   0.   1.   0.   0.25 1.
 0.   1.   0.25 0.   1.   1.   1.   0.   0.   1.   0.   1.   1.   1.
 1.   1.   1.   0.   0.   0.   1.   1.   0.   0.75 0.   0.   0.   1.
 0.75 0.   1.   1.   0.   1.   1.   1.   1.   1.   0.   0.   0.   1.
 0.   1.   1.   1.   0.   0.   1.   0.   0.75 0.5  1.   1.   0.   1.
 1.   1.   1.   1.   1.   1.   0.   1.   0.   0.5  0.   0.25 1.   0.
 0.   0.75 1.   0.75 0.5  1.   1.   1.   1.   1.   1.   1.   0.   1.
 0.75 1.   1.   1.   0.   1.   1.   1.   1.   1.   1.   0.25 0.   1.
 1.   1.   0.  ]


In [83]:
print("Test set R^2: {:.2f}".format(r2_score(y_test, y_pred)))

Test set R^2: 0.75


In [84]:
y_pred.shape

(143,)

## Prediction with the kNN based MLS Approximator

In [85]:
from sigmaepsilon.math.approx import MLSApproximator

approximator = MLSApproximator(X_train, y_train, k=4)
y_pred = approximator.approximate(X_test)
print("Test set predictions:\n", y_pred)

Test set predictions:
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [86]:
print("Test set R^2: {:.2f}".format(r2_score(y_test, y_pred)))

Test set R^2: -1.70


In [87]:
y_pred.shape

(143,)

In [88]:
X_test.shape

(143, 30)