# 15. Multiple Regression

## Imports

In [1]:
import datetime
import random
import sys
from typing import Callable, TypeVar

import numpy as np
import plotly.graph_objects as go
import tqdm
from sklearn.datasets import make_regression

In [2]:
# Importing functions from another notebook with supress graph rendering

_print = print
globals()["print"] = lambda *args, **kwargs: None

_show = go.Figure.show
go.Figure.show = lambda *args, **kwargs: None

_stdout = sys.stdout
_stderr = sys.stderr


class SupressTqdm(object):
    def write(self, x):
        pass


sys.stdout = SupressTqdm()
sys.stderr = SupressTqdm()

%run -i ../04_linear_algebra/main.ipynb
%run -i ../05_statistics/main.ipynb
%run -i ../06_probability/main.ipynb
%run -i ../08_gradient_descent/main.ipynb
%run -i ../14_simple_linear_regression/main.ipynb

globals()["print"] = _print
go.Figure.show = _show
sys.stdout = _stdout
sys.stderr = _stderr

In [3]:
random.seed(0)

## Model

In [4]:
def predict(x: Vector, beta: Vector) -> float:
    return dot(x, beta)

## Fitting Model

In [5]:
def error(x: Vector, y: float, beta: Vector) -> float:
    return predict(x, beta) - y

In [6]:
def squared_error(x: Vector, y: float, beta: Vector) -> float:
    return error(x, y, beta)**2

In [7]:
x = [1, 2, 3]
y = 30
beta = [4, 4, 4]

assert error(x, y, beta) == -6
assert squared_error(x, y, beta) == 36

In [8]:
def sqerror_gradient(x: Vector, y: float, beta: Vector) -> Vector:
    err = error(x, y, beta)
    return [2 * err * x_i for x_i in x]


assert sqerror_gradient(x, y, beta) == [-12, -24, -36]

In [9]:
def least_squares_fit(xs: list[Vector], ys: Vector, learning_rate: float = 0.001, num_steps: int = 1000, batch_size: int = 1) -> Vector:
    guess = [random.random() for _ in xs[0]]

    for _ in tqdm.trange(num_steps, desc="[+] Least squares fitting: ", bar_format="{desc}|{bar:50}|"):
        for start in range(0, len(xs), batch_size):
            batch_xs = xs[start:start + batch_size]
            batch_ys = ys[start:start + batch_size]

            gradient = vector_mean([sqerror_gradient(x, y, guess) for x, y in zip(batch_xs, batch_ys)])
            guess = gradient_step(guess, gradient, -learning_rate)

    return guess

In [10]:
learning_rate = 0.001

In [11]:
X, y = make_regression(n_samples=100, n_features=3, noise=42, random_state=0)

In [12]:
X = np.insert(X, 0, 1.0, axis=1)

In [13]:
beta = least_squares_fit(X, y, learning_rate, 5000, 25)

[+] Least squares fitting: |██████████████████████████████████████████████████|


In [14]:
print(f"[+] Coefficients: {[round(b, 4) for b in beta]}")

[+] Coefficients: [np.float64(-10.0923), np.float64(41.4454), np.float64(13.9702), np.float64(58.0754)]


## Quality of Fitting

In [15]:
def multiple_r_squared(xs: list[Vector], ys: Vector, beta: Vector) -> float:
    sum_of_squared_errors = sum(error(x, y, beta)**2 for x, y in zip(xs, ys))
    return 1.0 - sum_of_squared_errors / total_sum_of_squares(ys)

In [16]:
print(f"[+] R2-score: {multiple_r_squared(X, y, beta):.4f}")

[+] R2-score: 0.7609


## Samples Reproduction

In [17]:
x = TypeVar("X")
stat = TypeVar("Stat")

In [18]:
def bootstrap_sample(data: list[x]) -> list[x]:
    return [random.choice(data) for _ in data]

In [19]:
def bootstrap_statistic(data: list[x], stats_fn: Callable[[list[x]], stat], num_samples: int) -> list[stat]:
    return [stats_fn(bootstrap_sample(data)) for _ in range(101)]

In [20]:
close_to_100 = [99.5 + random.random() for _ in range(101)]

In [21]:
far_from_100 = ([99.5 + random.random()] + [random.random() for _ in range(50)] + [200 + random.random() for _ in range(50)])

In [22]:
medians_close = bootstrap_statistic(close_to_100, median, 100)

assert standard_deviation(medians_close) < 1

In [23]:
medians_far = bootstrap_statistic(far_from_100, median, 100)

assert standard_deviation(medians_far) > 90

## Standard Errors of Regression Coefficients

In [24]:
def estimate_sample_beta(pairs: list[tuple[Vector, float]]) -> Vector:
    x_sample = [x for x, _ in pairs]
    y_sapmle = [y for _, y in pairs]
    beta = least_squares_fit(x_sample, y_sapmle, learning_rate, 5000, 25)
    print(f"[+] Bootstrap selection: {[round(b, 4) for b in beta]}")
    return beta

In [25]:
bootstrap_betas = bootstrap_statistic(list(zip(X, y)), estimate_sample_beta, 100)

[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-2.7188), np.float64(41.4377), np.float64(13.4025), np.float64(59.0156)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.7463), np.float64(42.3037), np.float64(15.6438), np.float64(56.4657)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-6.9171), np.float64(39.9924), np.float64(13.5681), np.float64(64.0523)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-6.4749), np.float64(40.02), np.float64(8.0039), np.float64(58.9258)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-5.62), np.float64(41.096), np.float64(7.0504), np.float64(56.4893)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-16.2884), np.float64(51.7653), np.float64(7.7193), np.float64(62.5027)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(3.506), np.float64(38.7795), np.float64(23.2955), np.float64(55.1777)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-15.0909), np.float64(48.4148), np.float64(13.7523), np.float64(57.5836)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.9605), np.float64(46.4848), np.float64(14.0479), np.float64(55.8431)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.0055), np.float64(41.8034), np.float64(18.2931), np.float64(58.5875)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-12.3416), np.float64(40.981), np.float64(5.2176), np.float64(63.5615)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.1416), np.float64(42.5831), np.float64(15.5124), np.float64(50.8931)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-5.8816), np.float64(35.6085), np.float64(16.5242), np.float64(60.9064)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.2982), np.float64(44.1726), np.float64(14.3263), np.float64(61.0714)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-19.4245), np.float64(49.4279), np.float64(13.9504), np.float64(61.114)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-15.132), np.float64(46.5109), np.float64(15.2934), np.float64(61.0367)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-8.1008), np.float64(44.8722), np.float64(17.6984), np.float64(58.1619)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.2328), np.float64(45.2951), np.float64(15.4793), np.float64(61.2301)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.3605), np.float64(40.1889), np.float64(9.9829), np.float64(61.3937)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.226), np.float64(41.9874), np.float64(13.4888), np.float64(62.0385)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-12.2817), np.float64(40.7917), np.float64(22.8085), np.float64(57.7441)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.6747), np.float64(42.1321), np.float64(7.2691), np.float64(50.8493)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-23.7311), np.float64(46.3607), np.float64(7.4488), np.float64(56.4677)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-15.8497), np.float64(37.0244), np.float64(8.7125), np.float64(60.9608)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.5764), np.float64(45.1946), np.float64(5.8754), np.float64(58.4819)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-8.6041), np.float64(43.6625), np.float64(17.3629), np.float64(58.3257)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-3.642), np.float64(41.1779), np.float64(10.1427), np.float64(57.7034)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-6.9832), np.float64(38.9156), np.float64(18.8344), np.float64(57.3058)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-15.1353), np.float64(43.1864), np.float64(8.3974), np.float64(57.5556)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.259), np.float64(34.967), np.float64(22.5823), np.float64(56.3476)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.5705), np.float64(44.9087), np.float64(13.5597), np.float64(60.8349)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-2.597), np.float64(42.3567), np.float64(11.7955), np.float64(55.1277)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.8189), np.float64(36.4045), np.float64(10.8443), np.float64(62.8556)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.7345), np.float64(39.31), np.float64(19.0893), np.float64(59.0377)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-14.7512), np.float64(39.0327), np.float64(16.5502), np.float64(60.1092)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-5.9258), np.float64(40.5055), np.float64(14.2679), np.float64(63.538)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.3488), np.float64(38.9724), np.float64(16.295), np.float64(61.9328)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.3936), np.float64(40.4386), np.float64(5.707), np.float64(57.3447)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.8969), np.float64(31.6253), np.float64(22.0649), np.float64(52.6728)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-8.6781), np.float64(43.796), np.float64(13.3528), np.float64(58.8387)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-14.601), np.float64(39.2684), np.float64(9.4846), np.float64(59.6591)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-19.8461), np.float64(37.7437), np.float64(13.6127), np.float64(59.5095)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.8946), np.float64(30.5719), np.float64(10.3508), np.float64(54.786)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-12.5109), np.float64(37.3125), np.float64(7.2957), np.float64(59.7586)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-16.3314), np.float64(46.6444), np.float64(22.1039), np.float64(55.3587)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-2.3019), np.float64(38.5043), np.float64(20.376), np.float64(58.1776)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.4704), np.float64(36.8258), np.float64(6.9711), np.float64(55.8163)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-5.6912), np.float64(38.1296), np.float64(13.0162), np.float64(55.0112)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.7458), np.float64(45.8178), np.float64(11.0112), np.float64(64.7516)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-8.3067), np.float64(36.8514), np.float64(8.9808), np.float64(56.5162)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-12.9676), np.float64(43.6766), np.float64(18.3149), np.float64(56.7248)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.0841), np.float64(38.2216), np.float64(17.6837), np.float64(56.8)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-8.761), np.float64(40.1135), np.float64(11.6554), np.float64(57.9953)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.1211), np.float64(40.7537), np.float64(8.9145), np.float64(57.6471)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.7336), np.float64(50.567), np.float64(9.3777), np.float64(61.9845)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-15.0393), np.float64(41.781), np.float64(6.6602), np.float64(62.1127)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.2911), np.float64(44.1438), np.float64(14.5326), np.float64(57.2207)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.0887), np.float64(47.5018), np.float64(17.3476), np.float64(65.7226)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.4671), np.float64(39.7263), np.float64(17.9575), np.float64(53.1531)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-8.9267), np.float64(40.5222), np.float64(17.7544), np.float64(61.851)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.2393), np.float64(39.4784), np.float64(4.7324), np.float64(58.645)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.755), np.float64(40.8605), np.float64(11.9093), np.float64(59.7508)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-14.1095), np.float64(37.5575), np.float64(19.1912), np.float64(55.2712)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.16), np.float64(46.525), np.float64(6.99), np.float64(57.5146)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-5.9208), np.float64(39.1371), np.float64(9.1543), np.float64(57.9909)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-12.3158), np.float64(40.3926), np.float64(19.1208), np.float64(58.6522)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-13.9891), np.float64(41.5751), np.float64(13.1257), np.float64(59.5136)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.6997), np.float64(49.7952), np.float64(13.451), np.float64(55.9218)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.9313), np.float64(41.0767), np.float64(5.2139), np.float64(57.7132)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-14.8287), np.float64(40.5024), np.float64(6.1211), np.float64(59.7046)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-12.0214), np.float64(42.1085), np.float64(3.9454), np.float64(57.5827)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.0337), np.float64(44.3839), np.float64(10.5169), np.float64(53.3063)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-15.0749), np.float64(43.9018), np.float64(14.5533), np.float64(66.7663)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.7672), np.float64(44.1504), np.float64(8.5931), np.float64(52.6963)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.3552), np.float64(36.9794), np.float64(10.8116), np.float64(60.7961)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-12.5968), np.float64(38.7978), np.float64(13.9166), np.float64(59.7239)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.1411), np.float64(40.7341), np.float64(13.2193), np.float64(60.2124)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-8.9232), np.float64(41.0104), np.float64(19.1606), np.float64(63.043)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-5.5377), np.float64(42.1524), np.float64(18.9364), np.float64(56.0009)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.3338), np.float64(41.0276), np.float64(7.3798), np.float64(54.5167)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-13.9204), np.float64(47.5774), np.float64(20.8052), np.float64(69.0214)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.8913), np.float64(41.0153), np.float64(10.7689), np.float64(62.3034)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-14.9728), np.float64(43.1839), np.float64(13.3018), np.float64(57.7406)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-12.1078), np.float64(47.9815), np.float64(14.9104), np.float64(61.082)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.8601), np.float64(38.0053), np.float64(21.4352), np.float64(59.3674)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-16.9241), np.float64(38.4035), np.float64(11.5885), np.float64(57.1873)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.3279), np.float64(52.1461), np.float64(10.0752), np.float64(60.9891)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-4.9717), np.float64(43.0638), np.float64(11.9859), np.float64(53.1013)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-1.2521), np.float64(46.6175), np.float64(6.9289), np.float64(57.6208)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-3.6229), np.float64(37.5771), np.float64(10.6112), np.float64(56.3456)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.6418), np.float64(42.8339), np.float64(11.4887), np.float64(60.0928)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-3.2748), np.float64(39.2478), np.float64(2.159), np.float64(58.2065)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-9.4564), np.float64(46.8484), np.float64(23.2302), np.float64(56.9975)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-15.7414), np.float64(44.701), np.float64(9.5349), np.float64(62.7967)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-6.5109), np.float64(44.7068), np.float64(19.9743), np.float64(58.4532)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-10.4191), np.float64(39.8402), np.float64(13.7294), np.float64(53.1657)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-7.7486), np.float64(43.1639), np.float64(13.3472), np.float64(56.9092)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-11.2982), np.float64(39.9621), np.float64(12.795), np.float64(63.0498)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-5.4053), np.float64(36.3863), np.float64(17.0522), np.float64(53.4014)]


[+] Least squares fitting: |██████████████████████████████████████████████████|


[+] Bootstrap selection: [np.float64(-13.0803), np.float64(39.3814), np.float64(11.421), np.float64(58.3909)]


[+] Least squares fitting: |██████████████████████████████████████████████████|

[+] Bootstrap selection: [np.float64(-9.714), np.float64(36.0215), np.float64(17.4381), np.float64(57.2069)]





In [26]:
bootstrap_standard_errors = [standard_deviation([beta[i] for beta in bootstrap_betas]) for i in range(4)]

In [27]:
print(f"[+] Standard deviations: {[round(error, 4) for error in bootstrap_standard_errors]}")

[+] Standard deviations: [4.1829, 3.9911, 4.9428, 3.3145]


In [28]:
def p_value(beta_hat_j: float, sigma_hat_j: float) -> float:
    if beta_hat_j > 0:
        return 2 * (1 - normal_cdf(beta_hat_j / sigma_hat_j))
    else:
        return 2 * normal_cdf(beta_hat_j / sigma_hat_j)

In [29]:
print(f"[+] P-value: {p_value(0, 4):.4f}")

[+] P-value: 1.0000


## Regularization

#### Ridge

In [30]:
def ridge_penalty(beta: Vector, alpha: float) -> float:
    return alpha * dot(beta[1:], beta[1:])

In [31]:
def squared_error_ridge(x: Vector, y: float, beta: Vector, alpha: float) -> float:
    return error(x, y, beta)**2 + ridge_penalty(beta, alpha)

In [32]:
def ridge_penalty_gradient(beta: Vector, alpha: float) -> Vector:
    return [0.0] + [2 * alpha * beta_j for beta_j in beta[1:]]

In [33]:
def sqerror_ridge_gradient(x: Vector, y: Vector, beta: Vector, alpha: float) -> Vector:
    return add(sqerror_gradient(x, y, beta), ridge_penalty_gradient(beta, alpha))

In [34]:
def least_squares_fit_ridge(xs: list[Vector], ys: Vector, alpha: float, learning_rate: float, num_steps: int, batch_size: int = 1) -> Vector:
    guess = [random.random() for _ in xs[0]]

    for i in range(num_steps):
        for start in range(0, len(xs), batch_size):
            batch_xs = xs[start:start + batch_size]
            batch_ys = ys[start:start + batch_size]

            gradient = vector_mean([sqerror_ridge_gradient(x, y, guess, alpha) for x, y in zip(batch_xs, batch_ys)])
            guess = gradient_step(guess, gradient, -learning_rate)

    return guess

In [35]:
beta_0 = least_squares_fit_ridge(X, y, 0.0, learning_rate, 5000, 25)

print(f"[+] Ridge coefficients with alpha 0: {[round(b, 4) for b in beta_0]}")

[+] Ridge coefficients with alpha 0: [np.float64(-10.0923), np.float64(41.4454), np.float64(13.9702), np.float64(58.0754)]


In [36]:
beta_0_1 = least_squares_fit_ridge(X, y, 0.1, learning_rate, 5000, 25)

print(f"[+] Ridge coefficients with alpha 0.1: {[round(b, 4) for b in beta_0_1]}")

[+] Ridge coefficients with alpha 0.1: [np.float64(-9.2208), np.float64(37.1664), np.float64(13.1095), np.float64(52.8417)]


In [37]:
beta_1 = least_squares_fit_ridge(X, y, 1, learning_rate, 5000, 25)

print(f"[+] Ridge coefficients with alpha 1: {[round(b, 4) for b in beta_1]}")

[+] Ridge coefficients with alpha 1: [np.float64(-5.5593), np.float64(19.1888), np.float64(8.2177), np.float64(29.2771)]


In [38]:
beta_10 = least_squares_fit_ridge(X, y, 10, learning_rate, 5000, 25)

print(f"[+] Ridge coefficients with alpha 10: {[round(b, 4) for b in beta_10]}")

[+] Ridge coefficients with alpha 10: [np.float64(-2.2852), np.float64(3.2667), np.float64(1.6908), np.float64(5.3794)]


#### Lasso

In [39]:
def lasso_penalty(beta: Vector, alpha: float) -> float:
    return alpha * sum(abs(beta_i) for beta_i in beta[1:])