In [0]:
import matplotlib.pyplot as plt
import autograd.numpy as np
import autograd

from sklearn.model_selection import train_test_split
from scipy.integrate import solve_ivp
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.datasets import load_boston
from sklearn.metrics import r2_score
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from scipy.linalg import expm
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

# Load the dataset

In [0]:
boston = load_boston()
X, y = boston.data, boston.target

# Run baselines

In [0]:
def calc_score(gen_model, n_iter=100):
  r2_scores = []
  for _ in range(n_iter):
    X_scaled = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y)
    y_pred = gen_model().fit(X_train, y_train).predict(X_test)
    r2_scores.append(r2_score(y_test, y_pred))
  return  np.quantile(r2_scores, 0.25), np.median(r2_scores, ), np.quantile(r2_scores, 0.75)

In [4]:
calc_score(lambda: LinearRegression())

(0.6737992258706963, 0.717742885905629, 0.7425129480542201)

In [5]:
calc_score(lambda: Ridge())

(0.6816715411428667, 0.7119669782104736, 0.7455065392763538)

In [6]:
calc_score(lambda: KernelRidge())

(0.6578001821386379, 0.6838283738496159, 0.7201450287008737)

In [7]:
calc_score(lambda: Ridge())

(0.6876811741007891, 0.7272240440190129, 0.7547293167396051)

In [8]:
calc_score(lambda: RandomForestRegressor())

(0.8337409585833049, 0.8730538610235477, 0.894126937995729)

In [9]:
def gen_model():
  return Pipeline(steps=[('scaler', MinMaxScaler()),
                         ('knn', KNeighborsRegressor(n_neighbors=2))
                         ])

calc_score(gen_model)


(0.7035971593808577, 0.7609061665406363, 0.7888953186685794)

# Models (linear one is irrelevant)


In [0]:
class ModelF:
  def __init__(self, param):
    self.param = param

  def f(self, x, param):
    pass

  def apply(self, x):
    return self.f(x, self.param)

  def grad(self, x):
    def cur_f(param):
        return self.f(x, param)
    return autograd.grad(cur_f)(self.param)

In [0]:
class LinearF(ModelF):
  def __init__(self, x_dim):
    self.x_dim = x_dim 
    param = np.random.randn(x_dim + 1)
    param /= np.linalg.norm(param)
    super().__init__(param)

  def f(self, x, param):
    x = np.concatenate([x, [1]])
    return np.dot(x, param)


In [0]:
dim = len(X[0])
linear_f = LinearF(dim)

In [13]:
print(X[0])
print(linear_f.apply(X[0]))
print(linear_f.param)
d_param = linear_f.grad(X[0])

print(d_param)

[6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01
 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00]
28.752760827946183
[ 0.29871499 -0.02710037 -0.37296436 -0.2688844  -0.45728984  0.18251882
  0.31629371  0.16386581  0.16446996 -0.24773481  0.02230476  0.20516306
 -0.06243172 -0.4409114 ]
[6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01
 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00 1.000e+00]


In [0]:
class NN(ModelF):
  def __init__(self, x_dim, hidden_size=100):
    self.layer1_shape  = (x_dim + 1, hidden_size)
    self.layer2_shape = (hidden_size, 1) 

    param1 = np.random.randn(*self.layer1_shape) / np.sqrt(np.sum(self.layer1_shape) + 1.)
    param2 = np.random.randn(*self.layer2_shape) / np.sqrt(np.sum(self.layer2_shape) + 1.)

    param = np.concatenate([param1.reshape(-1), param2.reshape(-1)])
    super().__init__(param)


  def param_to_layers(self, param):
    n1 = np.prod(self.layer1_shape)
    n2 = np.prod(self.layer2_shape)
    assert len(param) == n1 + n2

    param1, param2  = param[:n1], param[n1:]
    return param1.reshape(self.layer1_shape), param2.reshape(self.layer2_shape) 

  def f(self, x, param):
    x = np.concatenate([x, [1]])
    layer1, layer2 = self.param_to_layers(param)
    x = np.dot(x, layer1)
    x = np.maximum(x, 0) #relu
    x = np.dot(x, layer2)
    return np.sum(x)

# Compute sin(a) and cos(a) by exp(matrix)

In [15]:
a = 30 / 180 * np.pi
expm(np.asarray([[0, -1], [1, 0]]) * a)


array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]])

# Train a regressor

In [0]:
class FunctionEstimator:
  def __init__(self, model_f, learning_rate):
    self.model_f = model_f
    self.lr = learning_rate
    self.right_vector = None
    self.grads = None

  def fit(self, X, y):
    self.right_vector = None

    n = len(X)

    grads   = self._grads(X)
    Theta_0 = np.dot(grads, grads.T)
    f0 = self.predict(X)

    exp_term = expm(- self.lr * Theta_0)
    inv_Theta_0 = np.linalg.inv(Theta_0)
    right_vector = np.dot(inv_Theta_0, np.dot(np.eye(n) - exp_term, y)) + \
                   np.dot(inv_Theta_0, np.dot(exp_term, f0))
    idd = np.dot(Theta_0, np.linalg.inv(Theta_0))
    
    self.grads = grads
    self.right_vector = right_vector     

  def predict(self, X):
    def predict_one(x):
      if self.right_vector is None:
        return self.model_f.apply(x)
      return np.dot(self._kernel_value(x), self.right_vector)
    
    return np.asarray([predict_one(x) for x in X])

  def _kernel_value(self, x):
    grad_x = self.model_f.grad(x)
    return np.dot(grad_x, self.grads.T)

  def _grads(self, X):
    return np.asarray([self.model_f.grad(x) for x in X])

In [17]:
dim = len(X[0])

for _ in range(10):
  model = FunctionEstimator(NN(dim), learning_rate=1.)
  X_scaled = MinMaxScaler(feature_range=(-1, 1))
  X_train, X_test, y_train, y_test = train_test_split(X_scaled.fit_transform(X), y)
  y_pred_train_before = model.predict(X_train)
  
  model.fit(X_train, y_train)
  test_score = r2_score(y_test, model.predict(X_test))
  train_score  = r2_score(y_train, model.predict(X_train))
  print(f"test_score {test_score} train_score {train_score}")
  print()

test_score 0.8612083349489982 train_score 0.89407660928637

test_score 0.6657240192655545 train_score 0.9120359617312257

test_score 0.7677696529139749 train_score 0.9015817008335495

test_score 0.7829043393223697 train_score 0.9239348489240387

test_score 0.7625958530181464 train_score 0.9073785590272501

test_score 0.8676199008853263 train_score 0.9132523839818919

test_score 0.8218143929997738 train_score 0.9142279977303175

test_score 0.8788995157644584 train_score 0.8953653401787621

test_score 0.8360874766570472 train_score 0.9165525146583021

test_score 0.7797886262832017 train_score 0.9274265275008722

