keywords | title | nb_path | layout |
---|---|---|---|
fastai |
Principal Component Analysis from scatch - preparations |
_notebooks/2020-10-24-pca-preparations.ipynb |
notebook |
{% raw %}
From the Data Science from Scratch book.
import math as m import random import pandas as pd import numpy as npimport altair as alt
</div>
{% raw %}
from typing import List Vector = List[float]
</div>
{% raw %}
def add(vector1: Vector, vector2: Vector) -> Vector: assert len(vector1) == len(vector2) return [v1 + v2 for v1, v2 in zip(vector1, vector2)]
</div>
{% raw %}
def subtract(vector1: Vector, vector2:Vector) -> Vector: assert len(vector1) == len(vector2) return [v1 - v2 for v1, v2 in zip(vector1, vector2)]
</div>
{% raw %}
def vector_sum(vectors: List[Vector]) -> Vector: assert vectors<span class="n">vector_length</span> <span class="o">=</span> <span class="nb">len</span><span class="p">(</span><span class="n">vectors</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span> <span class="k">assert</span> <span class="nb">all</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">v</span><span class="p">)</span> <span class="o">==</span> <span class="n">vector_length</span> <span class="k">for</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">vectors</span><span class="p">)</span> <span class="n">sums</span> <span class="o">=</span> <span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">*</span> <span class="n">vector_length</span> <span class="k">for</span> <span class="n">vector</span> <span class="ow">in</span> <span class="n">vectors</span><span class="p">:</span> <span class="n">sums</span> <span class="o">=</span> <span class="n">add</span><span class="p">(</span><span class="n">sums</span><span class="p">,</span> <span class="n">vector</span><span class="p">)</span> <span class="k">return</span> <span class="n">sums</span>
</div>
{% raw %}
def scalar_multiply(c: float, vector: Vector) -> Vector: return [c * v for v in vector]
</div>
{% raw %}
def vector_mean(vector: Vector) -> float: n = len(vector) return scalar_multiply(1/n, vector)
</div>
{% raw %}
def dot(vector1: Vector, vector2: Vector) -> float: assert len(vector1) == len(vector2) return sum(v1 * v2 for v1, v2 in zip(vector1, vector2))
</div>
{% raw %}
def sum_of_squares(v: Vector) -> Vector: return dot(v, v)
</div>
{% raw %}
def magnitude(v: Vector) -> Vector: return m.sqrt(sum_of_squares(v))
</div>
{% raw %}
def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector: """Return vector adjusted with step. Step is gradient times step size. """ step = scalar_multiply(step_size, gradient) return add(v, step)
</div>
intercept = random.randint(-30, 30) coefficient = random.uniform(-1, 1) n = 30xs = np.random.randint(-50, 10 + 1, 30) ys = np.random.randint(-20, 50 + 1, 30) df = pd.DataFrame({'x': xs, 'y': ys})
print(intercept, coefficient)
alt.Chart(df).mark_point().encode( alt.X('x:Q'), alt.Y('y:Q'), alt.Tooltip(['x', 'y']) )
</div>
-10 0.9679420748641416
function loadScript(lib) {
return new Promise(function(resolve, reject) {
var s = document.createElement('script');
s.src = paths[lib];
s.async = true;
s.onload = () => resolve(paths[lib]);
s.onerror = () => reject(`Error loading script: ${paths[lib]}`);
document.getElementsByTagName("head")[0].appendChild(s);
});
}
function showError(err) {
outputDiv.innerHTML = `<div class="error" style="color:red;">${err}</div>`;
throw err;
}
function displayChart(vegaEmbed) {
vegaEmbed(outputDiv, spec, embedOpt)
.catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));
}
if(typeof define === "function" && define.amd) {
requirejs.config({paths});
require(["vega-embed"], displayChart, err => showError(`Error loading script: ${err.message}`));
} else if (typeof vegaEmbed === "function") {
displayChart(vegaEmbed);
} else {
loadScript("vega")
.then(() => loadScript("vega-lite"))
.then(() => loadScript("vega-embed"))
.catch(showError)
.then(() => displayChart(vegaEmbed));
}
})({"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "data": {"name": "data-253ffb03c5c3da6161e4f4bd338d5909"}, "mark": "point", "encoding": {"tooltip": [{"type": "quantitative", "field": "x"}, {"type": "quantitative", "field": "y"}], "x": {"type": "quantitative", "field": "x"}, "y": {"type": "quantitative", "field": "y"}}, "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json", "datasets": {"data-253ffb03c5c3da6161e4f4bd338d5909": [{"x": -34, "y": 0}, {"x": -17, "y": 40}, {"x": -11, "y": 35}, {"x": -36, "y": 49}, {"x": -32, "y": 4}, {"x": -3, "y": 25}, {"x": -19, "y": 43}, {"x": 6, "y": -8}, {"x": -11, "y": 7}, {"x": -36, "y": 8}, {"x": -34, "y": 29}, {"x": -35, "y": 0}, {"x": -16, "y": 21}, {"x": -10, "y": 15}, {"x": -8, "y": 44}, {"x": -7, "y": 7}, {"x": -47, "y": 49}, {"x": 7, "y": 35}, {"x": -16, "y": 40}, {"x": -27, "y": 35}, {"x": -44, "y": -14}, {"x": -30, "y": 12}, {"x": -24, "y": 40}, {"x": -1, "y": 31}, {"x": -29, "y": -2}, {"x": -28, "y": 31}, {"x": -32, "y": -9}, {"x": -49, "y": -7}, {"x": -41, "y": -19}, {"x": -13, "y": 20}]}}, {"mode": "vega-lite"}); </script>
def de_mean(data: List[Vector]) -> List[Vector]: # mean = vector_mean(data) return [vector - np.mean(vector) for vector in data]
</div>
{% raw %}
xs_demean, ys_demean = de_mean([xs, ys])df = pd.DataFrame({'x': xs_demean, 'y': ys_demean}) alt.Chart(df).mark_point().encode( alt.X('x:Q'), alt.Y('y:Q'), alt.Tooltip(['x', 'y']) )
</div>
function loadScript(lib) {
return new Promise(function(resolve, reject) {
var s = document.createElement('script');
s.src = paths[lib];
s.async = true;
s.onload = () => resolve(paths[lib]);
s.onerror = () => reject(`Error loading script: ${paths[lib]}`);
document.getElementsByTagName("head")[0].appendChild(s);
});
}
function showError(err) {
outputDiv.innerHTML = `<div class="error" style="color:red;">${err}</div>`;
throw err;
}
function displayChart(vegaEmbed) {
vegaEmbed(outputDiv, spec, embedOpt)
.catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));
}
if(typeof define === "function" && define.amd) {
requirejs.config({paths});
require(["vega-embed"], displayChart, err => showError(`Error loading script: ${err.message}`));
} else if (typeof vegaEmbed === "function") {
displayChart(vegaEmbed);
} else {
loadScript("vega")
.then(() => loadScript("vega-lite"))
.then(() => loadScript("vega-embed"))
.catch(showError)
.then(() => displayChart(vegaEmbed));
}
})({"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "data": {"name": "data-4e839f0369987c0fa0c1e6c372f962c7"}, "mark": "point", "encoding": {"tooltip": [{"type": "quantitative", "field": "x"}, {"type": "quantitative", "field": "y"}], "x": {"type": "quantitative", "field": "x"}, "y": {"type": "quantitative", "field": "y"}}, "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json", "datasets": {"data-4e839f0369987c0fa0c1e6c372f962c7": [{"x": -11.433333333333334, "y": -18.7}, {"x": 5.566666666666666, "y": 21.3}, {"x": 11.566666666666666, "y": 16.3}, {"x": -13.433333333333334, "y": 30.3}, {"x": -9.433333333333334, "y": -14.7}, {"x": 19.566666666666666, "y": 6.300000000000001}, {"x": 3.5666666666666664, "y": 24.3}, {"x": 28.566666666666666, "y": -26.7}, {"x": 11.566666666666666, "y": -11.7}, {"x": -13.433333333333334, "y": -10.7}, {"x": -11.433333333333334, "y": 10.3}, {"x": -12.433333333333334, "y": -18.7}, {"x": 6.566666666666666, "y": 2.3000000000000007}, {"x": 12.566666666666666, "y": -3.6999999999999993}, {"x": 14.566666666666666, "y": 25.3}, {"x": 15.566666666666666, "y": -11.7}, {"x": -24.433333333333334, "y": 30.3}, {"x": 29.566666666666666, "y": 16.3}, {"x": 6.566666666666666, "y": 21.3}, {"x": -4.433333333333334, "y": 16.3}, {"x": -21.433333333333334, "y": -32.7}, {"x": -7.433333333333334, "y": -6.699999999999999}, {"x": -1.4333333333333336, "y": 21.3}, {"x": 21.566666666666666, "y": 12.3}, {"x": -6.433333333333334, "y": -20.7}, {"x": -5.433333333333334, "y": 12.3}, {"x": -9.433333333333334, "y": -27.7}, {"x": -26.433333333333334, "y": -25.7}, {"x": -18.433333333333334, "y": -37.7}, {"x": 9.566666666666666, "y": 1.3000000000000007}]}}, {"mode": "vega-lite"}); </script>
def direction(w: Vector) -> Vector: mag = magnitude(w) return [w_i / mag for w_i in w]direction(xs)
</div>
[-0.22863117335525085, -0.11431558667762542, -0.07396890902669881, -0.24208006590555972, -0.21518228080494198, -0.02017333882546331, -0.1277644792279343, 0.04034667765092662, -0.07396890902669881, -0.24208006590555972, -0.22863117335525085, -0.2353556196304053, -0.10759114040247099, -0.06724446275154437, -0.053795570201235494, -0.04707112392608106, -0.31604897493225853, 0.04707112392608106, -0.10759114040247099, -0.1815600494291698, -0.29587563610679524, -0.2017333882546331, -0.1613867106037065, -0.006724446275154437, -0.19500894197947868, -0.18828449570432423, -0.21518228080494198, -0.32949786748256743, -0.27570229728133194, -0.08741780157700768]
{% raw %}
xs_dir = direction(xs_demean) ys_dir = direction(ys_demean)df = pd.DataFrame({'x': xs_dir, 'y': ys_dir}) alt.Chart(df).mark_point().encode( alt.X('x:Q'), alt.Y('y:Q'), alt.Tooltip(['x', 'y']) )
</div>
function loadScript(lib) {
return new Promise(function(resolve, reject) {
var s = document.createElement('script');
s.src = paths[lib];
s.async = true;
s.onload = () => resolve(paths[lib]);
s.onerror = () => reject(`Error loading script: ${paths[lib]}`);
document.getElementsByTagName("head")[0].appendChild(s);
});
}
function showError(err) {
outputDiv.innerHTML = `<div class="error" style="color:red;">${err}</div>`;
throw err;
}
function displayChart(vegaEmbed) {
vegaEmbed(outputDiv, spec, embedOpt)
.catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));
}
if(typeof define === "function" && define.amd) {
requirejs.config({paths});
require(["vega-embed"], displayChart, err => showError(`Error loading script: ${err.message}`));
} else if (typeof vegaEmbed === "function") {
displayChart(vegaEmbed);
} else {
loadScript("vega")
.then(() => loadScript("vega-lite"))
.then(() => loadScript("vega-embed"))
.catch(showError)
.then(() => displayChart(vegaEmbed));
}
})({"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "data": {"name": "data-4cbdaba46d25db68017e0689aa4af2c4"}, "mark": "point", "encoding": {"tooltip": [{"type": "quantitative", "field": "x"}, {"type": "quantitative", "field": "y"}], "x": {"type": "quantitative", "field": "x"}, "y": {"type": "quantitative", "field": "y"}}, "$schema": "https://vega.github.io/schema/vega-lite/v4.8.1.json", "datasets": {"data-4cbdaba46d25db68017e0689aa4af2c4": [{"x": -0.1382701487741325, "y": -0.1699838452782193}, {"x": 0.06732103453434438, "y": 0.19361796280353322}, {"x": 0.13988262864321857, "y": 0.14816773679331416}, {"x": -0.16245734681042387, "y": 0.2754283696219275}, {"x": -0.11408295073784108, "y": -0.13362366447004403}, {"x": 0.23663142078838414, "y": 0.05726728477287603}, {"x": 0.04313383649805298, "y": 0.22088809840966467}, {"x": 0.3454738119516954, "y": -0.24270420689456979}, {"x": 0.13988262864321857, "y": -0.1063535288639126}, {"x": -0.16245734681042387, "y": -0.0972634836618688}, {"x": -0.1382701487741325, "y": 0.09362746558105128}, {"x": -0.15036374779227818, "y": -0.1699838452782193}, {"x": 0.07941463355249008, "y": 0.020907103964700777}, {"x": 0.15197622766136426, "y": -0.0336331672475621}, {"x": 0.17616342569765567, "y": 0.22997814361170846}, {"x": 0.18825702471580136, "y": -0.1063535288639126}, {"x": -0.29548693601002657, "y": 0.2754283696219275}, {"x": 0.3575674109698411, "y": 0.14816773679331416}, {"x": 0.07941463355249008, "y": 0.19361796280353322}, {"x": -0.0536149556471126, "y": 0.14816773679331416}, {"x": -0.25920613895558947, "y": -0.2972444781068327}, {"x": -0.08989575270154969, "y": -0.06090330285369354}, {"x": -0.0173341585926755, "y": 0.19361796280353322}, {"x": 0.26081861882467555, "y": 0.1118075559851389}, {"x": -0.07780215368340399, "y": -0.1881639356823069}, {"x": -0.0657085546652583, "y": 0.1118075559851389}, {"x": -0.11408295073784108, "y": -0.2517942520966136}, {"x": -0.31967413404631795, "y": -0.233614161692526}, {"x": -0.22292534190115237, "y": -0.3426947041170518}, {"x": 0.11569543060692716, "y": 0.011817058762656964}]}}, {"mode": "vega-lite"}); </script>