### Multiple Regression
<br>
a - alpha<br>
b - beta<br>
i - ith user<br>
e - error term<br>

Equation - $y_{i}$ = $a_{}$ + $b_{1}$$x_{i1}$ + $b_{2}$$x_{i2}$ + ... + $b_{k}$$x_{ik}$ + $e_{i}$

beta  = [alpha, beta_1, beta_2,..., beta_k]<br>
x_i = [1, x_i1, x_i2,..., x_ik]<br>
<br>

In [29]:
inputs = [[123,123,243],[234,455,578],[454,565,900],[705,456,890]]

In [30]:
from typing import List
from scratch.linear_algebra import dot, Vector

def predict(x:Vector, beta: Vector) -> float:
    return dot(x,beta)

def error(x:Vector, y:float, beta:Vector) -> float:
    return predict(x,beta) - y

def squared_error(x:Vector, y:float, beta:Vector) -> float:
    return error(x,y,beta) ** 2

x = [1,2,3]
y = 30
beta = [4,4,4]

assert error(x,y,beta) == -6
assert squared_error(x,y,beta) == 36

In [31]:
def sqerror_gradient(x:Vector, y:float, beta:Vector) -> Vector:
    err = error(x,y,beta)
    return [2*err*x_i for x_i in x]

assert sqerror_gradient(x,y,beta) == [-12,-24,-36]

In [32]:
import random
import tqdm
from scratch.linear_algebra import vector_mean
from scratch.gradient_descent import gradient_step

In [33]:
def least_squares_fit(xs:List[Vector],
                     ys:List[float],
                     learning_rate: float=0.001,
                     num_steps: int = 1000,
                     batch_size: int = 1) -> Vector:
    guess = [random.random() for _ in xs[0]]
    for _ in tqdm.trange(num_steps, desc='least squares fit'):
        for start in range(0, len(x), batch_size):
            batch_xs = xs[start:start+batch_size]
            batch_ys = ys[start:start+batch_size]
            gradient = vector_mean([ sqerror_gradient(x,y,guess)
                                      for x,y in zip(batch_xs,batch_ys)])
            guess = gradient_step(guess,gradient,-learning_rate)
    return guess

In [34]:
from scratch.statistics import daily_minutes_good
from scratch.gradient_descent import gradient_step

random.seed(0)
learning_rate = 0.001
beta = least_squares_fit(inputs,daily_minutes_good,learning_rate,5000,25)
# ERROR ( no 'inputs' variable defined )

least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65964.98it/s]


In [38]:
inputs = [[123,123,243],[234,455,578],[454,565,900],[705,456,890]]
# inputs = [123,123,243,234,455,578,454,565,900,705,456,890]
from scratch.simple_linear_regression import total_sum_of_squares
def multiple_r_squared(xs:List[Vector], ys:Vector, beta:Vector) -> float:
    sum_of_squared_errors = sum(error(x,y,beta**2)
                               for x,y in zip(xs,ys))
    return 1.0 - sum_of_squared_errors/ total_sum_of_squares(ys)
assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68
# ERROR ( no 'inputs' variable defined )

TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'

<b>Digression: The Bootstrap</b>

In [39]:
from typing import TypeVar, Callable
X = TypeVar('X')
Stat = TypeVar('Stat')

def bootstrap_sample(data:List[X]) -> List[X]:
    return [random.choice(data) for _ in data]

def bootstrap_statistics(data:List[X],
                        stats_fn: Callable[[List[X]],Stat],
                        num_samples: int) -> List[Stat]:
    return [stats_fn(bootstrap_sample(data)) for _ in range(num_samples)]
                 


In [40]:
close_to_100 = [99.5 + random.random() for _ in range(101)]

far_from_100 = ([99.5 + random.random()] +
                [random.random() for _ in range(50)] +
                [200 + random.random() for _ in range(50)])

In [41]:
from scratch.statistics import median, standard_deviation
median_close = bootstrap_statistics(close_to_100,median,100)
median_far = bootstrap_statistics(far_from_100,median,100)
print(median_close)
print(median_far)

[100.07969501074561, 100.08761706417543, 100.08980118353116, 100.09628686158311, 100.09628686158311, 100.04869930383559, 100.04744091132842, 100.08980118353116, 100.05126724609055, 100.08338203945503, 100.16024537862239, 100.05126724609055, 100.09628686158311, 100.07565101416489, 100.1108869734438, 100.05126724609055, 100.08980118353116, 100.13014734041147, 100.09628686158311, 100.04059992494805, 100.08980118353116, 100.07969501074561, 100.1108869734438, 100.16024537862239, 100.11277317986861, 100.08761706417543, 100.07565101416489, 100.04028360697032, 100.1127831050407, 100.11277317986861, 100.06751074062068, 100.08980118353116, 100.11836899667533, 100.08980118353116, 100.11836899667533, 100.09628686158311, 100.11836899667533, 100.11836899667533, 100.06751074062068, 100.07565101416489, 100.13014734041147, 100.01127472136861, 100.09628686158311, 100.07565101416489, 100.09628686158311, 100.1127831050407, 100.08761706417543, 100.00794064252058, 100.07565101416489, 100.08338203945503, 100

In [42]:
from typing import Tuple
import datetime

def estimate_sample_beta(pairs:List[Tuple[Vector,float]]):
    x_sample = [x for x, _ in pairs]
    y_sample = [y for _, y in pairs]
    beta  = least_squares_fit(x_sample,y_sample,learning_rate,5000,25)
    print("bootstrap sample",beta)
    return beta

random.seed(0)
bootstrap_betas = bootstrap_statistics(list(zip(inputs, daily_minutes_good)),
estimate_sample_beta,
100)
# ERROR ( no 'inputs' variable defined )

least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62695.69it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64248.22it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65109.33it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 57601.56it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65090.54it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61503.67it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64247.24it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65940.09it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65081.24it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64241.93it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65967.05it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64274.22it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64078.02it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62642.13it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 59682.12it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61868.00it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62640.45it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65877.53it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61116.33it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61873.29it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61159.82it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62666.84it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 58956.62it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64298.46it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65958.55it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65939.05it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 52225.51it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62667.21it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65083.47it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 59680.59it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62667.03it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 59706.24it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62668.15it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61893.75it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]



least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62667.77it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 63436.07it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65085.08it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]



least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62644.56it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61892.65it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 58266.04it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]



least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61891.74it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 54474.59it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 60376.98it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 45981.81it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65080.84it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]



least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 52773.68it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65082.66it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64248.62it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64248.62it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65081.65it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64247.83it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]



least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61870.37it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64314.82it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 60375.41it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 66846.19it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65939.67it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 63435.68it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 66816.16it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64275.60it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 56309.36it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 56970.18it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61172.85it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61872.20it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 67719.75it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 56331.44it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 55685.57it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 63461.79it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64246.45it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65095.79it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62641.01it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61874.39it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 55092.38it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 66842.78it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 63462.56it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64277.37it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 66845.76it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61113.13it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65931.17it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]



least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65081.24it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65081.04it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61690.56it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65936.15it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61138.61it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62667.59it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61894.48it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64274.81it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62667.77it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 60402.54it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64275.79it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64274.41it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 64274.22it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 62641.76it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 61867.27it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 59683.65it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 58957.95it/s]
least squares fit:   0%|                                                                      | 0/5000 [00:00<?, ?it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]


least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 59658.35it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 49150.58it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65109.13it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]



least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65967.68it/s]
least squares fit: 100%|████████████████████████████████████████████████████████| 5000/5000 [00:00<00:00, 65081.04it/s]

bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]
bootstrap sample [nan, nan, nan]





In [43]:
bootstrap_standard_errors = [
                    standard_deviation([beta[i] for beta in bootstrap_betas])
for i in range(4)]
print(bootstrap_standard_errors)
# ERROR ( no 'inputs' variable defined )

IndexError: list index out of range

In [None]:
from scratch.probability import normal_cdf

def p_value(beta_hat_j: float, sigma_hat_j:float) -> float:
    if beta_hat_j > 0:
        return 2 * (1 - normal_cdf(beta_hat_j/sigma_hat_j))
    else:
        return 2 * normal_cdf(beta_hat_j/sigma_hat_j)

In [None]:
assert p_value(30.58, 1.27) < 0.001 # constant term
assert p_value(0.972, 0.103) < 0.001 # num_friends

<b>Regularization</b>

In [44]:
def ridge_penalty(beta:Vector, alpha:float)->float:
    return alpha*dot(beta[1:],beta[1:])

In [45]:
def squared_error_ridge(x: Vector,
                        y: float,
                        beta: Vector,
                        alpha: float) -> float:
    return error(x, y, beta) ** 2 + ridge_penalty(beta, alpha)



from scratch.linear_algebra import add

def ridge_penalty_gradient(beta: Vector, alpha: float) -> Vector:
    return [0.] + [2 * alpha * beta_j for beta_j in beta[1:]]

def sqerror_ridge_gradient(x: Vector,
                            y: float,
                            beta: Vector,
                            alpha: float) -> Vector:            
    return add(sqerror_gradient(x, y, beta),
                ridge_penalty_gradient(beta, alpha))


def least_squares_fit_ridge(xs:List[Vector],
                     ys:List[float],
                     learning_rate: float=0.001,
                     num_steps: int = 1000,
                     batch_size: int = 1) -> Vector:
    guess = [random.random() for _ in xs[0]]
    for _ in tqdm.trange(num_steps, desc='least squares fit'):
        for start in range(0, len(x), batch_size):
            batch_xs = xs[start:start+batch_size]
            batch_ys = ys[start:start+batch_size]
            gradient = vector_mean([ sqerror_ridge_gradient(x,y,guess)
                                      for x,y in zip(batch_xs,batch_ys)])
            guess = gradient_step(guess,gradient,-learning_rate)
    return guess

In [46]:
random.seed(0)
beta_0 = least_squares_fit_ridge(inputs, daily_minutes_good, 0.0, # alpha
learning_rate, 5000, 25)


# [30.51, 0.97, -1.85, 0.91]
assert 5 < dot(beta_0[1:], beta_0[1:]) < 6
assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0) < 0.69
# ERROR ( no 'inputs' variable defined )
beta_0_1 = least_squares_fit_ridge(inputs, daily_minutes_good, 0.1, # alpha
learning_rate, 5000, 25)
# [30.8, 0.95, -1.83, 0.54]
assert 4 < dot(beta_0_1[1:], beta_0_1[1:]) < 5
assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_0_1) < 0.69
beta_1 = least_squares_fit_ridge(inputs, daily_minutes_good, 1, # alpha
learning_rate, 5000, 25)
# [30.6, 0.90, -1.68, 0.10]
assert 3 < dot(beta_1[1:], beta_1[1:]) < 4
assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta_1) < 0.69
beta_10 = least_squares_fit_ridge(inputs, daily_minutes_good,10, # alpha
learning_rate, 5000, 25)
# [28.3, 0.67, -0.90, -0.01]
assert 1 < dot(beta_10[1:], beta_10[1:]) < 2
assert 0.5 < multiple_r_squared(inputs, daily_minutes_good, beta_10) < 0.6

TypeError: least_squares_fit_ridge() takes from 2 to 5 positional arguments but 6 were given

In [47]:
def lasso_penalty(beta, alpha):
    return alpha * sum(abs(beta_i) for beta_i in beta[1:])