# 

# Quarterioni - SciML: Chapter 3.2.1-3.2.4

In [None]:
%pip install numpy matplotlib polars scipy scikit-learn

## Least square regression

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.linalg
import polars as pl

### Experience (E)

In [None]:
df = pl.read_csv(
    "https://raw.githubusercontent.com/ywchiu/riii/refs/heads/master/data/house-prices.csv"
)
df

In [None]:
# x_labels = ["SqFt", "Bedrooms", "Bathrooms"]
x_labels = ["SqFt"]
y_label = "Price"

### Candidate model

In [None]:
f = lambda x: m * x + q

### Training (least square)

In [None]:
x2_sum = df.select(pl.col(*x_labels).pow(2).sum()).to_numpy()[0]
x_sum = df.select(pl.col(*x_labels).sum()).to_numpy()[0]
x_y_sum = df.select((pl.col(*x_labels) * pl.col(y_label)).sum()).to_numpy()[0]
y_sum = df.select(pl.col(y_label).sum()).to_numpy()[0]
N = [len(df)]

In [None]:
A = np.array([np.hstack([x2_sum, x_sum]), np.hstack([x_sum, N])])
b = np.hstack([x_y_sum, y_sum])
print(A, b)

In [None]:
m, q = scipy.linalg.solve(A, b)

### Measure performance

In [None]:
x = df[x_labels].to_numpy()
y_pred = f(x)
y = df[y_label].to_numpy()

In [None]:
fig, ax = plt.subplots()
ax.scatter(x, y_pred)
ax.scatter(x, y)
plt.show()

In [None]:
MSE = ((y - y_pred) ** 2).sum() / len(y)
print(MSE)

In [None]:
RMSE = np.sqrt(MSE)
print(RMSE)

## Machine learning models

We assume there is a releation between the input and output data.

$$
f : x \mapsto y
$$

We need a rich enough hypothesis space.


What are wrong with e.g polynomials or fourier series (which we know are dense in $C(\Omega)$)?

1D: 

$$
f(x_1) = a_0 + a_1x_1 + a_2x_1^2 + \cdots
$$

2D (same degree polynomial):


$$
f(x_1, x_2) = a_{00} + a_{10}x_1 + a_{01}x_2 + a_{20}x_1^2 + a_{02}x_2^2   + a_{11}x_1x_2 + \cdots
$$


Number of parmameters of a degree $n$ polynomial with $k$ variables is

$$
\binom{k + n}{n}
$$

In [None]:
k = 2
n = 2
scipy.special.binom(k + n, n) - 1

In [None]:
k = 100
n = 100
scipy.special.binom(k + n, n) - 1

For neural networks we also have the [universal approximation theorem](https://www.deep-mind.org/2023/03/26/the-universal-approximation-theorem/#Universal_Approximation_Theorem).
For NN we can get away with fewer parameters