### Fitting the Model

In [9]:
import random
from scratch.multiple_regression import inputs, sqerror_gradient, least_squares_fit
from scratch.statistics import daily_minutes_good # imports the data

Data looks like this:

In [10]:
[1,    # constant term
 49,   # number of friends
 4,    # work hours per day
 0]    # doesn't have PhD

[1, 49, 4, 0]

In [11]:
x = [1, 2, 3]
y = 30
beta = [4, 4, 4]

sqerror_gradient(x, y, beta)

[-12, -24, -36]

In [12]:
random.seed(0)
learning_rate = 0.001
beta = least_squares_fit(inputs, daily_minutes_good, learning_rate, 5000, 25)

least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2348.68it/s]


In [13]:
beta

[30.514795945185586, 0.9748274277323267, -1.8506912934343662, 0.91407780744768]

### Goodness of Fit

In [16]:
from scratch.multiple_regression import multiple_r_squared

assert 0.67 < multiple_r_squared(inputs, daily_minutes_good, beta) < 0.68

Compare this with `rsq = 0.33` in the linear case. But adding any new variables will always increase R-squared.

### The Bootstrap

In [19]:
from scratch.statistics import median, standard_deviation
from scratch.multiple_regression import bootstrap_statistic

# 101 points all very close to 100
close_to_100 = [99.5 + random.random() for _ in range(101)]

# 101 points, 50 of them near 0, 50 of them near 200
far_from_100 = ([99.5 + random.random()] +
                [random.random() for _ in range(50)] +
                [200 + random.random() for _ in range(50)])

The medians of both datasets are very close to 100, but std dev are not similar. Can use the bootstrap stats function to see this on only 10 samples from the data:

In [24]:
medians_close = bootstrap_statistic(close_to_100, median, 10)
medians_far = bootstrap_statistic(far_from_100, median, 10)
print([standard_deviation(medians_close), standard_deviation(medians_far)])

[0.07905788237281329, 98.99742355111857]


In [33]:
medians_close = bootstrap_statistic(close_to_100, median, 10)
medians_far = bootstrap_statistic(far_from_100, median, 10)
print([standard_deviation(medians_close), standard_deviation(medians_far)])

[0.0787539991050684, 102.8422228106884]


### Standard Errors of Regression Coefficients

In [42]:
from scratch.linear_algebra import Vector
from typing import List, Tuple

def estimate_sample_beta(pairs: List[Tuple[Vector, float]]):
    x_sample = [x for x, _ in pairs]
    y_sample = [y for _, y in pairs]
    beta = least_squares_fit(x_sample, y_sample, learning_rate, 5000, 25)
    print("bootstrap sample", beta)
    return beta

random.seed(0) # so that you get the same results as me

# This will take a couple of minutes!
bootstrap_betas = bootstrap_statistic(list(zip(inputs, daily_minutes_good)),
                                      estimate_sample_beta,
                                      100)

least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2342.65it/s]
least squares fit:   4%|▍         | 211/5000 [00:00<00:02, 2105.70it/s]

bootstrap sample [30.49402029547432, 1.0393791030498776, -1.9516851948558502, 0.7483721251697333]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2288.29it/s]
least squares fit:   4%|▍         | 219/5000 [00:00<00:02, 2187.69it/s]

bootstrap sample [30.149963287526045, 1.0005300432763113, -2.0650380122822543, 3.177179854834797]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2303.72it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2249.46it/s]

bootstrap sample [29.202826897693722, 1.0017089956376213, -1.5294248424787367, 0.9528580285760854]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2381.66it/s]
least squares fit:   4%|▍         | 222/5000 [00:00<00:02, 2215.72it/s]

bootstrap sample [31.29481217471851, 0.959264729494101, -1.9120875473727545, 0.039471107599519425]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2378.70it/s]
least squares fit:   5%|▍         | 230/5000 [00:00<00:02, 2298.57it/s]

bootstrap sample [32.124144227949955, 0.8569794405277468, -1.9936770520754086, 1.0416943131373024]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2370.47it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2242.47it/s]

bootstrap sample [31.8691994453096, 0.7748022870492418, -2.0087625702876446, -1.2407036547656678]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2327.98it/s]
least squares fit:   4%|▍         | 224/5000 [00:00<00:02, 2232.01it/s]

bootstrap sample [31.08119759650208, 0.998386254386918, -1.9833984114987815, 0.9567646217580389]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2383.02it/s]
least squares fit:   4%|▍         | 221/5000 [00:00<00:02, 2209.63it/s]

bootstrap sample [29.254530450577782, 0.9763387220017684, -1.7430339427043595, 1.9944240584590935]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2364.37it/s]
least squares fit:   5%|▍         | 230/5000 [00:00<00:02, 2293.92it/s]

bootstrap sample [31.649174199331632, 0.9389340937491032, -1.9733848473304205, -0.15249287969349437]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2368.52it/s]
least squares fit:   5%|▍         | 228/5000 [00:00<00:02, 2272.34it/s]

bootstrap sample [30.040109260720964, 1.0531247386421572, -1.7694878560354388, 1.302971911084249]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2352.89it/s]
least squares fit:   4%|▍         | 220/5000 [00:00<00:02, 2194.61it/s]

bootstrap sample [29.066927054721297, 1.2792640005590372, -1.937339904947856, 0.9183668519320846]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2365.44it/s]
least squares fit:   4%|▍         | 213/5000 [00:00<00:02, 2122.94it/s]

bootstrap sample [31.740476303331718, 0.9538879291586574, -2.0689725879612477, 1.4785830120835612]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2326.67it/s]
least squares fit:   4%|▍         | 222/5000 [00:00<00:02, 2216.87it/s]

bootstrap sample [29.46654084062671, 0.9837739845117637, -1.9915052407093472, 3.150029950640157]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2342.82it/s]
least squares fit:   4%|▍         | 212/5000 [00:00<00:02, 2117.07it/s]

bootstrap sample [30.97515705531374, 0.9420086669374396, -2.0367671746636065, 0.6323599067111714]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2318.87it/s]
least squares fit:   4%|▍         | 224/5000 [00:00<00:02, 2231.09it/s]

bootstrap sample [31.478778128163995, 0.8623617407485805, -1.8798782324632368, -0.11949170941208796]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2247.32it/s]
least squares fit:   5%|▍         | 228/5000 [00:00<00:02, 2278.44it/s]

bootstrap sample [33.87286992682308, 0.8824018752321863, -1.8978803929581156, -1.0333647107478692]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2370.87it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2262.84it/s]

bootstrap sample [29.272206898314987, 1.0899411603739348, -1.8911943299601002, 3.162677841885805]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2350.81it/s]
least squares fit:   4%|▍         | 221/5000 [00:00<00:02, 2206.27it/s]

bootstrap sample [30.83577809561691, 1.0242186355671827, -1.9209251081222494, 1.3383795133620962]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2352.02it/s]
least squares fit:   4%|▍         | 222/5000 [00:00<00:02, 2218.30it/s]

bootstrap sample [28.211162672015906, 1.458352440392638, -1.70241171517105, 0.94520401518726]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2291.27it/s]
least squares fit:   4%|▍         | 190/5000 [00:00<00:02, 1894.05it/s]

bootstrap sample [29.93552336056075, 0.9470529669956465, -1.8491245571618218, 0.8573641103651921]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2239.21it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2240.74it/s]

bootstrap sample [30.636052325886002, 0.9966176913889679, -1.8308401560119625, 0.13862673979220685]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2360.98it/s]
least squares fit:   4%|▍         | 215/5000 [00:00<00:02, 2148.19it/s]

bootstrap sample [30.855945311129382, 0.9925731301194982, -1.834813509355548, 1.9711641797749935]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2268.24it/s]
least squares fit:   4%|▍         | 220/5000 [00:00<00:02, 2197.80it/s]

bootstrap sample [29.77226728370608, 1.0493381798575807, -1.6999309651266667, 0.9221651877575128]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2285.96it/s]
least squares fit:   4%|▍         | 220/5000 [00:00<00:02, 2191.70it/s]

bootstrap sample [28.78470035821775, 0.9629668755117143, -1.7818333154132011, 1.905170320676074]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2319.30it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2261.85it/s]

bootstrap sample [31.769457992268443, 0.9040180814550004, -1.867677593282121, -0.7957987643064021]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2297.74it/s]
least squares fit:   5%|▍         | 231/5000 [00:00<00:02, 2305.59it/s]

bootstrap sample [30.06836087625883, 0.9237365767889174, -1.7326788050658604, 1.9044381512517516]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2350.22it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2265.31it/s]

bootstrap sample [29.248924522774857, 1.0251706036709467, -1.6396068581125103, 1.7875039505127974]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2294.86it/s]
least squares fit:   9%|▉         | 448/5000 [00:00<00:02, 2255.49it/s]

bootstrap sample [26.160390418551746, 1.3566609275406472, -1.880731098382104, 3.884946816272215]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2295.22it/s]
least squares fit:   5%|▍         | 231/5000 [00:00<00:02, 2302.82it/s]

bootstrap sample [31.9708823034869, 0.8717159490168253, -1.8037586194211703, -0.23788897755135452]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2348.16it/s]
least squares fit:   5%|▍         | 226/5000 [00:00<00:02, 2252.40it/s]

bootstrap sample [30.580903591168788, 0.9610711598856186, -1.8984859248085817, -0.0023187395782722853]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2288.43it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2243.27it/s]

bootstrap sample [31.433330253362577, 0.8768141821390377, -1.7328584033279486, -0.10210988051437449]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2304.23it/s]
least squares fit:   5%|▍         | 226/5000 [00:00<00:02, 2258.01it/s]

bootstrap sample [30.984236860566945, 1.036149466142919, -2.2200016449095226, 1.0886749895563579]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2346.59it/s]
least squares fit:   4%|▍         | 223/5000 [00:00<00:02, 2226.00it/s]

bootstrap sample [29.28237674477942, 1.0858388836439414, -1.74280602847473, 1.4397328297413239]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2306.04it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2246.71it/s]

bootstrap sample [30.65460047430859, 0.9454408039075628, -1.7320071301269266, -0.1485862182089119]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2284.89it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2241.41it/s]

bootstrap sample [29.118139496835955, 0.893808801696634, -1.9153563192896768, 2.0598345811489462]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2089.44it/s]
least squares fit:   4%|▍         | 223/5000 [00:00<00:02, 2225.79it/s]

bootstrap sample [29.95488463195938, 0.9940567914003665, -1.7605085370056377, 1.6096257131696945]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2253.01it/s]
least squares fit:   5%|▍         | 228/5000 [00:00<00:02, 2272.87it/s]

bootstrap sample [31.000164842544855, 0.962368315956188, -1.9115208623969564, 0.7473190835230143]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2185.65it/s]
least squares fit:   4%|▍         | 190/5000 [00:00<00:02, 1893.45it/s]

bootstrap sample [30.825204744897878, 0.8912590208026672, -1.770469093607492, 0.7459655949536621]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2280.37it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2264.64it/s]

bootstrap sample [29.366136812382027, 1.012558241060829, -1.6182773155952548, 1.017025703754098]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2284.01it/s]
least squares fit:   5%|▍         | 226/5000 [00:00<00:02, 2252.05it/s]

bootstrap sample [29.94295970143512, 1.0167217566773747, -1.5621167917565122, -0.1030904763985423]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2311.06it/s]
least squares fit:   5%|▍         | 228/5000 [00:00<00:02, 2274.44it/s]

bootstrap sample [29.962898858207343, 1.0652251821283687, -1.926924147663584, 2.0385736378519947]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2164.19it/s]
least squares fit:   4%|▍         | 196/5000 [00:00<00:02, 1956.22it/s]

bootstrap sample [30.525302041791818, 0.9658944102293198, -1.8870631894489638, 0.5367690208128764]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2295.01it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2247.54it/s]

bootstrap sample [30.6785320563259, 1.0139828545599132, -1.7817299670979692, 1.6026393229652947]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2383.69it/s]
least squares fit:   5%|▍         | 226/5000 [00:00<00:02, 2246.91it/s]

bootstrap sample [30.09073747870904, 1.0047123547747132, -1.9560265455918162, 2.75259429615735]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2326.88it/s]
least squares fit:   4%|▍         | 216/5000 [00:00<00:02, 2155.84it/s]

bootstrap sample [30.937038893678913, 0.9670590611928079, -2.1124811600264293, 0.3258045605146989]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2192.52it/s]
least squares fit:   4%|▍         | 202/5000 [00:00<00:02, 2013.10it/s]

bootstrap sample [28.78930855900356, 1.1730115746597942, -1.7835138640623003, 3.2623158308236095]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2280.85it/s]
least squares fit:   5%|▍         | 234/5000 [00:00<00:02, 2332.17it/s]

bootstrap sample [31.14754912309552, 0.9326436111603991, -1.7707952504307622, -1.099359064604309]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2280.20it/s]
least squares fit:   5%|▍         | 231/5000 [00:00<00:02, 2303.15it/s]

bootstrap sample [31.813727613195653, 0.9683784085384612, -2.019078886892217, 0.7501206686148623]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2365.38it/s]
least squares fit:   5%|▍         | 231/5000 [00:00<00:02, 2309.03it/s]

bootstrap sample [30.223398353230184, 0.9373764744862103, -1.5323607166675373, -0.01469994075209993]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2337.77it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2242.50it/s]

bootstrap sample [28.01777500444891, 1.091598878794665, -1.6190191022832499, 2.397154344588124]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2338.96it/s]
least squares fit:   9%|▉         | 462/5000 [00:00<00:01, 2316.62it/s]

bootstrap sample [29.342668886496593, 0.9815156932180105, -1.9184777914462317, 1.5482939749639446]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2355.98it/s]
least squares fit:   4%|▍         | 222/5000 [00:00<00:02, 2214.95it/s]

bootstrap sample [32.53937166649288, 1.060883971208886, -2.2704689582768722, 0.3681597653761555]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2342.15it/s]
least squares fit:   4%|▍         | 218/5000 [00:00<00:02, 2179.62it/s]

bootstrap sample [30.106198499206915, 0.9657134612613777, -1.7191529436530641, -0.6267619207221298]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2331.80it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2268.06it/s]

bootstrap sample [29.993282359977137, 0.9757399392816419, -1.9767875486880904, 2.048669364846268]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2350.71it/s]
least squares fit:   4%|▍         | 216/5000 [00:00<00:02, 2157.99it/s]

bootstrap sample [30.571136409586924, 1.066488813531558, -1.6618835177744289, -0.19985556821698586]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2330.47it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2243.60it/s]

bootstrap sample [30.9490097252882, 0.9597396222139453, -1.9214823753987709, 1.25885503487694]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2345.19it/s]
least squares fit:   4%|▍         | 224/5000 [00:00<00:02, 2232.87it/s]

bootstrap sample [31.887007554673076, 0.9506671496957437, -2.152653973374404, 1.6869486505999165]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2362.10it/s]
least squares fit:   4%|▍         | 225/5000 [00:00<00:02, 2246.91it/s]

bootstrap sample [29.081704350215187, 1.0495038787355986, -1.6920009023683746, 3.609080049949202]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2347.27it/s]
least squares fit:   4%|▍         | 218/5000 [00:00<00:02, 2174.35it/s]

bootstrap sample [31.479546830562978, 1.1296437640969312, -1.8930013630375897, 0.2328971438009536]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2328.93it/s]
least squares fit:   5%|▍         | 226/5000 [00:00<00:02, 2259.80it/s]

bootstrap sample [30.610973912805395, 1.0065894319911013, -1.836243246680104, 0.4499397217455247]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2347.71it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2266.68it/s]

bootstrap sample [31.80927695488258, 0.9821469730488941, -2.007959621103926, -0.2411398745050231]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2344.22it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2261.39it/s]

bootstrap sample [31.024210851804416, 0.9515774062029452, -1.9408222914617927, 0.6442854716394794]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2363.81it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2267.53it/s]

bootstrap sample [28.908141235990033, 1.0556273838810308, -1.7935754991375803, 2.082266951237433]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2368.59it/s]
least squares fit:   5%|▍         | 226/5000 [00:00<00:02, 2256.68it/s]

bootstrap sample [30.025383087071763, 0.9490311032868943, -1.8905462953821093, 1.614968102502849]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2378.14it/s]
least squares fit:   5%|▍         | 234/5000 [00:00<00:02, 2332.36it/s]

bootstrap sample [31.344911606937217, 0.9596230552550087, -2.084944019182774, 1.0635864768954955]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2396.77it/s]
least squares fit:   5%|▍         | 230/5000 [00:00<00:02, 2291.30it/s]

bootstrap sample [30.88785665879763, 0.9739691303740718, -1.750496781109518, -2.0086684580110616]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2372.76it/s]
least squares fit:   9%|▉         | 460/5000 [00:00<00:01, 2310.01it/s]

bootstrap sample [30.524172097277887, 0.9468432200060536, -1.7489583214704674, -0.42947540813439916]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2329.50it/s]
least squares fit:   4%|▎         | 181/5000 [00:00<00:02, 1805.35it/s]

bootstrap sample [33.73887281461898, 0.8342998931764716, -2.0056583070815233, -1.0048943591784738]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 1935.94it/s]
least squares fit:   4%|▍         | 218/5000 [00:00<00:02, 2176.32it/s]

bootstrap sample [29.04731144829789, 0.9737448743420717, -1.7622553843049413, 0.9744871197165679]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2216.65it/s]
least squares fit:   4%|▎         | 181/5000 [00:00<00:02, 1805.28it/s]

bootstrap sample [30.849086975523765, 1.1142041012783979, -2.055393538038613, 1.8606960468590918]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 1939.37it/s]
least squares fit:   5%|▍         | 244/5000 [00:00<00:01, 2431.12it/s]

bootstrap sample [31.20227902410509, 1.0148203879553739, -1.831139817867853, -0.12803605188562736]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2379.69it/s]
least squares fit:   5%|▍         | 238/5000 [00:00<00:02, 2371.01it/s]

bootstrap sample [30.44951242588852, 0.9188875408835141, -1.6623667661150159, 0.41561209518705605]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2407.36it/s]
least squares fit:   5%|▍         | 246/5000 [00:00<00:01, 2452.29it/s]

bootstrap sample [30.93743647937649, 0.9178249912706591, -1.91788395405578, 0.8027340312172657]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2429.55it/s]
least squares fit:   5%|▍         | 245/5000 [00:00<00:01, 2446.13it/s]

bootstrap sample [33.07304817934109, 0.7669188362229076, -1.8621104803815107, -0.5344373694611129]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2390.25it/s]
least squares fit:   5%|▍         | 241/5000 [00:00<00:01, 2407.99it/s]

bootstrap sample [30.98035452936738, 0.9608047189289309, -1.8571138381579286, 1.2456516010877]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2418.24it/s]
least squares fit:   5%|▍         | 243/5000 [00:00<00:01, 2422.04it/s]

bootstrap sample [29.890049201061156, 0.9320508621300003, -1.815157140889288, 1.6197634219660293]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2426.48it/s]
least squares fit:   5%|▍         | 244/5000 [00:00<00:01, 2430.86it/s]

bootstrap sample [32.7497073906742, 0.8163410438179741, -1.6727937223778233, -1.627203273138944]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2434.36it/s]
least squares fit:   5%|▍         | 244/5000 [00:00<00:01, 2438.21it/s]

bootstrap sample [32.23550207094589, 0.9915112587422378, -2.201685593411146, 0.659721525694611]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2422.75it/s]
least squares fit:   5%|▍         | 243/5000 [00:00<00:01, 2426.06it/s]

bootstrap sample [30.238346353105722, 0.9812068545490507, -1.9183149068660714, 2.4252389104819785]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2411.92it/s]
least squares fit:   5%|▍         | 241/5000 [00:00<00:01, 2406.15it/s]

bootstrap sample [30.574120085079127, 0.9174840515163696, -1.791824539551342, 0.9221993996446399]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2400.42it/s]
least squares fit:   5%|▍         | 227/5000 [00:00<00:02, 2269.05it/s]

bootstrap sample [30.200588272490595, 0.9290781608340558, -1.5128386060160508, -0.27164281164191667]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2297.05it/s]
least squares fit:   5%|▍         | 239/5000 [00:00<00:01, 2384.35it/s]

bootstrap sample [30.568001921567824, 1.0423323558239714, -2.0539328282484295, 2.070512986336468]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2413.19it/s]
least squares fit:   5%|▍         | 242/5000 [00:00<00:01, 2414.71it/s]

bootstrap sample [32.24170594033224, 0.928943846288939, -1.9597146432475416, -0.3283270089441192]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2412.03it/s]
least squares fit:   5%|▍         | 238/5000 [00:00<00:02, 2371.27it/s]

bootstrap sample [32.867472630955, 1.0159010210188608, -2.0279568468137548, -0.5177147877542921]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2116.70it/s]
least squares fit:   2%|▏         | 94/5000 [00:00<00:05, 928.98it/s]

bootstrap sample [29.215869116992934, 1.0071212080144287, -1.9567505776484149, 3.7248516336467894]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2229.23it/s]
least squares fit:   5%|▍         | 244/5000 [00:00<00:01, 2438.83it/s]

bootstrap sample [29.731620363957564, 1.0022351904608418, -1.6056750069107464, 0.38365805637548667]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2416.60it/s]
least squares fit:   5%|▍         | 240/5000 [00:00<00:01, 2396.05it/s]

bootstrap sample [32.67347841435598, 0.8824434637692973, -1.9909101579029314, 0.04871947146702888]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2404.35it/s]
least squares fit:   5%|▍         | 241/5000 [00:00<00:01, 2406.86it/s]

bootstrap sample [29.15775553756214, 1.0683351454601346, -1.7096121511993116, 3.2616854857255317]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2358.78it/s]
least squares fit:   5%|▍         | 243/5000 [00:00<00:01, 2422.79it/s]

bootstrap sample [30.488240564960755, 1.0353317712496077, -1.9149562223503453, 2.595089595245561]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2376.99it/s]
least squares fit:   5%|▍         | 238/5000 [00:00<00:02, 2373.54it/s]

bootstrap sample [31.498485256154574, 0.865173722648589, -1.9003285713857743, -0.4448014961070426]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2398.73it/s]
least squares fit:   5%|▍         | 240/5000 [00:00<00:01, 2396.85it/s]

bootstrap sample [28.568637146495437, 0.9377084816305519, -1.6697079214548882, 2.0378528186926]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2403.66it/s]
least squares fit:   5%|▍         | 244/5000 [00:00<00:01, 2435.91it/s]

bootstrap sample [30.8880890003392, 0.9480046573855904, -1.9409732963472783, -0.38053847722698325]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2430.40it/s]
least squares fit:   5%|▍         | 244/5000 [00:00<00:01, 2434.43it/s]

bootstrap sample [30.449302174408764, 1.121851483678596, -1.9621516796699556, 2.244341597832513]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2440.00it/s]
least squares fit:   5%|▍         | 244/5000 [00:00<00:01, 2435.58it/s]

bootstrap sample [30.266204204539516, 1.006867325680496, -2.1198992898486466, 0.5362851256019128]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2434.06it/s]
least squares fit:   5%|▍         | 245/5000 [00:00<00:01, 2449.85it/s]

bootstrap sample [29.33031812613639, 1.0424517245684064, -1.8849226826885934, 2.2650387817258566]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2176.05it/s]
least squares fit:   5%|▍         | 226/5000 [00:00<00:02, 2248.27it/s]

bootstrap sample [31.777389538816966, 0.8928310423632744, -1.9269578522157438, 0.048635890062917325]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2238.14it/s]
least squares fit:   4%|▍         | 216/5000 [00:00<00:02, 2150.60it/s]

bootstrap sample [28.291072745509133, 1.1873361941623277, -1.8546687169062575, 2.6390276558088757]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2249.64it/s]
least squares fit:   5%|▍         | 243/5000 [00:00<00:01, 2428.83it/s]

bootstrap sample [31.725525991297584, 0.8939775539447468, -1.843559060469148, -0.6224324630864045]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2408.52it/s]
least squares fit:   5%|▍         | 246/5000 [00:00<00:01, 2448.97it/s]

bootstrap sample [30.2731194689144, 0.8005769229528958, -1.6991234036996576, 0.9748341305369917]


least squares fit: 100%|██████████| 5000/5000 [00:02<00:00, 2400.85it/s]

bootstrap sample [31.75696506368952, 1.0790800487199688, -2.0880894078200054, 1.6420943383461737]





In [43]:
bootstrap_standard_errors = [
    standard_deviation([beta[i] for beta in bootstrap_betas])
    for i in range(4)]

print(bootstrap_standard_errors)

[1.2715078186272781, 0.10318410116073963, 0.15510591689663628, 1.2490975248051257]


#### Using P-value to determine coefficient estimation meaningfulness

In [44]:
from scratch.multiple_regression import p_value

In [45]:
assert p_value(30.58, 1.27)   < 0.001  # constant term
assert p_value(0.972, 0.103)  < 0.001  # num_friends
assert p_value(-1.865, 0.155) < 0.001  # work_hours
assert p_value(0.923, 1.249)  > 0.4    # phd

The value for PhD is significantly non-zero, meaning the coefficient for PhD is random.

Look into the F-test for more complex datasets.

### Regularization