docs/L26ApplicationsOfSVD.html~



<!DOCTYPE html>


<html lang="en" data-content_root="" >

  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />

    <title>Applications of the SVD &#8212; Linear Algebra, Geometry, and Computation</title>
  
  
  <script data-cfasync="false">
    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
    document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
  </script>
  
  <!-- Loaded before other Sphinx assets -->
  <link href="_static/styles/theme.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link href="_static/styles/bootstrap.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link href="_static/styles/pydata-sphinx-theme.css?digest=5b4479735964841361fd" rel="stylesheet" />

  
  <link href="_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=5b4479735964841361fd" rel="stylesheet" />
  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />

    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
    <link rel="stylesheet" href="_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css" />
    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
    <link rel="stylesheet" type="text/css" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
    <link rel="stylesheet" type="text/css" href="_static/sphinx-thebe.css" />
    <link rel="stylesheet" type="text/css" href="_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
  
  <!-- Pre-loaded scripts that we'll load fully later -->
  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=5b4479735964841361fd" />
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=5b4479735964841361fd" />
  <script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=5b4479735964841361fd"></script>

    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
    <script src="_static/jquery.js"></script>
    <script src="_static/underscore.js"></script>
    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
    <script src="_static/doctools.js"></script>
    <script src="_static/clipboard.min.js"></script>
    <script src="_static/copybutton.js"></script>
    <script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
    <script>let toggleHintShow = 'Click to show';</script>
    <script>let toggleHintHide = 'Click to hide';</script>
    <script>let toggleOpenOnPrint = 'true';</script>
    <script src="_static/togglebutton.js"></script>
    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
    <script src="_static/design-tabs.js"></script>
    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
const thebe_selector = ".thebe,.cell"
const thebe_selector_input = "pre"
const thebe_selector_output = ".output, .cell_output"
</script>
    <script async="async" src="_static/sphinx-thebe.js"></script>
    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
    <script>DOCUMENTATION_OPTIONS.pagename = 'L26ApplicationsOfSVD';</script>
    <link rel="shortcut icon" href="_static/DiagramAR-icon.png"/>
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="prev" title="The Singular Value Decomposition" href="L25SVD.html" />
  <meta name="viewport" content="width=device-width, initial-scale=1"/>
  <meta name="docsearch:language" content="en"/>
  </head>
  
  
  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">

  
  <a class="skip-link" href="#main-content">Skip to main content</a>
  
  <div id="pst-scroll-pixel-helper"></div>

  
  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
    <i class="fa-solid fa-arrow-up"></i>
    Back to top
  </button>

  
  <input type="checkbox"
          class="sidebar-toggle"
          name="__primary"
          id="__primary"/>
  <label class="overlay overlay-primary" for="__primary"></label>
  
  <input type="checkbox"
          class="sidebar-toggle"
          name="__secondary"
          id="__secondary"/>
  <label class="overlay overlay-secondary" for="__secondary"></label>
  
  <div class="search-button__wrapper">
    <div class="search-button__overlay"></div>
    <div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
      action="search.html"
      method="get">
  <i class="fa-solid fa-magnifying-glass"></i>
  <input type="search"
         class="form-control"
         name="q"
         id="search-input"
         placeholder="Search this book..."
         aria-label="Search this book..."
         autocomplete="off"
         autocorrect="off"
         autocapitalize="off"
         spellcheck="false"/>
  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
  </div>
  
    <nav class="bd-header navbar navbar-expand-lg bd-navbar">
    </nav>
  
  <div class="bd-container">
    <div class="bd-container__inner bd-page-width">
      
      <div class="bd-sidebar-primary bd-sidebar">
        

  <div class="sidebar-header-items sidebar-primary__section">
    
    
  </div>
  
    <div class="sidebar-primary-items__start sidebar-primary__section">
        <div class="sidebar-primary-item">

  
<a class="navbar-brand logo" href="landing-page.html">
  
  
    <img src="_static/DiagramAR-icon.png" class="logo__image only-light" alt="Linear Algebra, Geometry, and Computation - Home"/>
    <script>document.write(`<img src="_static/DiagramAR-icon.png" class="logo__image only-dark" alt="Linear Algebra, Geometry, and Computation - Home"/>`);</script>
  
  
</a></div>
        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
    <div class="bd-toc-item navbar-nav active">
        
        <ul class="nav bd-sidenav bd-sidenav__home-link">
            <li class="toctree-l1">
                <a class="reference internal" href="landing-page.html">
                    Preface
                </a>
            </li>
        </ul>
        <ul class="current nav bd-sidenav">
<li class="toctree-l1"><a class="reference internal" href="L01LinearEquations.html">Linear Equations</a></li>
<li class="toctree-l1"><a class="reference internal" href="L02Numerics.html">(Getting Serious About) Numbers</a></li>
<li class="toctree-l1"><a class="reference internal" href="L03RowReductions.html">Gaussian Elimination</a></li>
<li class="toctree-l1"><a class="reference internal" href="L04VectorEquations.html">Vector Equations</a></li>
<li class="toctree-l1"><a class="reference internal" href="L05Axb.html"><span class="math notranslate nohighlight">\(A{\bf x} = {\bf b}\)</span></a></li>
<li class="toctree-l1"><a class="reference internal" href="L06LinearIndependence.html">Linear Independence</a></li>
<li class="toctree-l1"><a class="reference internal" href="L07LinearTransformations.html">Linear Transformations</a></li>
<li class="toctree-l1"><a class="reference internal" href="L08MatrixofLinearTranformation.html">The Matrix of a Linear Transformation</a></li>
<li class="toctree-l1"><a class="reference internal" href="L09MatrixOperations.html">Matrix Algebra</a></li>
<li class="toctree-l1"><a class="reference internal" href="L10MatrixInverse.html">The Inverse of a Matrix</a></li>
<li class="toctree-l1"><a class="reference internal" href="L11MarkovChains.html">Markov Chains</a></li>
<li class="toctree-l1"><a class="reference internal" href="L12MatrixFactorizations.html">Matrix Factorizations</a></li>
<li class="toctree-l1"><a class="reference internal" href="L13ComputerGraphics.html">Computer Graphics</a></li>
<li class="toctree-l1"><a class="reference internal" href="L14Subspaces.html">Subspaces</a></li>
<li class="toctree-l1"><a class="reference internal" href="L15DimensionRank.html">Dimension and Rank</a></li>
<li class="toctree-l1"><a class="reference internal" href="L16Eigenvectors.html">Eigenvectors and Eigenvalues</a></li>
<li class="toctree-l1"><a class="reference internal" href="L17CharacteristicEqn.html">The Characteristic Equation</a></li>
<li class="toctree-l1"><a class="reference internal" href="L18Diagonalization.html">Diagonalization</a></li>
<li class="toctree-l1"><a class="reference internal" href="L19PageRank.html">PageRank</a></li>
<li class="toctree-l1"><a class="reference internal" href="L20Orthogonality.html">Analytic Geometry in <span class="math notranslate nohighlight">\(\mathbb{R}^n\)</span></a></li>
<li class="toctree-l1"><a class="reference internal" href="L21OrthogonalSets.html">Orthogonal Sets and Projection</a></li>
<li class="toctree-l1"><a class="reference internal" href="L22LeastSquares.html">Least Squares</a></li>
<li class="toctree-l1"><a class="reference internal" href="L23LinearModels.html">Linear Models</a></li>
<li class="toctree-l1"><a class="reference internal" href="L24SymmetricMatrices.html">Symmetric Matrices</a></li>
<li class="toctree-l1"><a class="reference internal" href="L25SVD.html">The Singular Value Decomposition</a></li>
<li class="toctree-l1 current active"><a class="current reference internal" href="#">Applications of the SVD</a></li>
</ul>

    </div>
</nav></div>
    </div>
  
  
  <div class="sidebar-primary-items__end sidebar-primary__section">
  </div>
  
  <div id="rtd-footer-container"></div>


      </div>
      
      <main id="main-content" class="bd-main">
        
        
<div class="sbt-scroll-pixel-helper"></div>

          <div class="bd-content">
            <div class="bd-article-container">
              
              <div class="bd-header-article">
<div class="header-article-items header-article__inner">
  
    <div class="header-article-items__start">
      
        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
  <span class="fa-solid fa-bars"></span>
</label></div>
      
    </div>
  
  
    <div class="header-article-items__end">
      
        <div class="header-article-item">

<div class="article-header-buttons">


<a href="https://github.com/mcrovella/CS132-Geometric-Algorithms" target="_blank"
   class="btn btn-sm btn-source-repository-button"
   title="Source repository"
   data-bs-placement="bottom" data-bs-toggle="tooltip"
>
  

<span class="btn__icon-container">
  <i class="fab fa-github"></i>
  </span>

</a>


<div class="dropdown dropdown-download-buttons">
  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
    <i class="fas fa-download"></i>
  </button>
  <ul class="dropdown-menu">
      
      
      <li><a href="_sources/L26ApplicationsOfSVD.ipynb" target="_blank"
   class="btn btn-sm btn-download-source-button dropdown-item"
   title="Download source file"
   data-bs-placement="left" data-bs-toggle="tooltip"
>
  

<span class="btn__icon-container">
  <i class="fas fa-file"></i>
  </span>
<span class="btn__text-container">.ipynb</span>
</a>
</li>
      
      
      <li>
<button onclick="window.print()"
  class="btn btn-sm btn-download-pdf-button dropdown-item"
  title="Print to PDF"
  data-bs-placement="left" data-bs-toggle="tooltip"
>
  

<span class="btn__icon-container">
  <i class="fas fa-file-pdf"></i>
  </span>
<span class="btn__text-container">.pdf</span>
</button>
</li>
      
  </ul>
</div>


<button onclick="toggleFullScreen()"
  class="btn btn-sm btn-fullscreen-button"
  title="Fullscreen mode"
  data-bs-placement="bottom" data-bs-toggle="tooltip"
>
  

<span class="btn__icon-container">
  <i class="fas fa-expand"></i>
  </span>

</button>


<script>
document.write(`
  <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
    <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
    <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
  </button>
`);
</script>


<script>
document.write(`
  <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
  </button>
`);
</script>
<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <span class="fa-solid fa-list"></span>
</label>
</div></div>
      
    </div>
  
</div>
</div>
              
              
<div id="jb-print-docs-body" class="onlyprint">
    <h1>Applications of the SVD</h1>
    <!-- Table of contents -->
    <div id="print-main-content">
        <div id="jb-print-toc">
            
            <div>
                <h2> Contents </h2>
            </div>
            <nav aria-label="Page">
                <ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#recap-of-svd">Recap of SVD</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#approximating-a-matrix">Approximating a Matrix</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#signal-compression">Signal Compression</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dimensionality-reduction">Dimensionality Reduction</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#principal-component-analysis">Principal Component Analysis</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#visualization-using-pca">Visualization using PCA</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#wrapup">Wrapup</a></li>
</ul>
</li>
</ul>
            </nav>
        </div>
    </div>
</div>

              
<div id="searchbox"></div>
                <article class="bd-article" role="main">
                  
  <aside class="margin sidebar">
<p class="sidebar-title"></p>
<p>By Marcel Duchamp (1887-1968) - Philadelphia Museum of Art, PD-US, <a class="reference external" href="https://en.wikipedia.org/w/index.php?curid=3922548">https://en.wikipedia.org/w/index.php?curid=3922548</a></p>
</aside>
<section class="tex2jax_ignore mathjax_ignore" id="applications-of-the-svd">
<h1>Applications of the SVD<a class="headerlink" href="#applications-of-the-svd" title="Permalink to this heading">#</a></h1>
<!-- image credit: https://en.wikipedia.org/wiki/Swiss_Army_knife -->
<center>
<a class="reference internal image-reference" href="_images/Duchamp_-_Nude_Descending_a_Staircase.jpg"><img alt="Figure" src="_images/Duchamp_-_Nude_Descending_a_Staircase.jpg" style="width: 400px;" /></a>
</center>
<p>In “Nude Descending a Staircase” Marcel Duchamp captures a four-dimensional object on a two-dimensional canvas.  Accomplishing this without losing essential information is called <em>dimensionality reduction.</em></p>
<!-- image credit: https://en.wikipedia.org/wiki/Swiss_Army_knife -->
<center>
<a class="reference internal image-reference" href="_images/440px-Wenger_EvoGrip_S17.JPG"><img alt="Figure" src="_images/440px-Wenger_EvoGrip_S17.JPG" style="width: 440px;" /></a>
</center>
<blockquote>
<div><p>The Singular Value Decomposition is the <strong>“Swiss Army Knife”</strong> and the <strong>“Rolls Royce”</strong> of matrix decompositions.</p>
</div></blockquote>
<p>– Diane O’Leary</p>
<p>Today we will concern ourselves with the “Swiss Army Knife” aspect of the SVD.</p>
<p>Our focus today will be on applications to data analysis.</p>
<p>So today we will be thinking of matrices as <strong>data</strong>.</p>
<p>(Rather than thinking of matrices as linear operators.)</p>
<!-- image credit: https://xkcd.com/2610/ -->
<center>
<a class="reference internal image-reference" href="https://imgs.xkcd.com/comics/assigning_numbers.png"><img alt="Figure" src="https://imgs.xkcd.com/comics/assigning_numbers.png" style="width: 450px;" /></a>
</center><p>As a specific example, here is a typical data matrix.   This matrix could be the result of measuring a collection of data objects, and noting a set of features for each object.</p>
<div class="math notranslate nohighlight">
\[\begin{split}{\mbox{$m$ data objects}}\left\{\begin{array}{c}\;\\\;\\\;\\\;\\\;\end{array}\right.\;\;\overbrace{\left[\begin{array}{ccccc}
\begin{array}{c}a_{11}\\\vdots\\a_{i1}\\\vdots\\a_{m1}\end{array}&amp;
\begin{array}{c}\dots\\\ddots\\\dots\\\ddots\\\dots\end{array}&amp;
\begin{array}{c}a_{1j}\\\vdots\\a_{ij}\\\vdots\\a_{mj}\end{array}&amp;
\begin{array}{c}\dots\\\ddots\\\dots\\\ddots\\\dots\end{array}&amp;
\begin{array}{c}a_{1n}\\\vdots\\a_{in}\\\vdots\\a_{mn}\end{array}
\end{array}\right]}^{\mbox{$n$ features}}\end{split}\]</div>
<p>For example, rows could be people, and columns could be movie ratings.</p>
<p>Or rows could be documents, and columns could be words within the documents.</p>
<p>To start discussing the set of tools that SVD provides for analyzing data, let’s remind ourselves what the SVD is.</p>
<section id="recap-of-svd">
<h2>Recap of SVD<a class="headerlink" href="#recap-of-svd" title="Permalink to this heading">#</a></h2>
<p>Today we’ll work exclusively with the reduced SVD.</p>
<p>Here it is again, for the case where <span class="math notranslate nohighlight">\(A\)</span> is <span class="math notranslate nohighlight">\(m \times n\)</span>, and <span class="math notranslate nohighlight">\(A\)</span> has rank <span class="math notranslate nohighlight">\(r\)</span>.</p>
<p>In that case, the reduced SVD looks like this, with singular values on the diagonal of <span class="math notranslate nohighlight">\(\Sigma\)</span>:</p>
<p><span class="math notranslate nohighlight">\(m\left\{\begin{array}{c}\;\\\;\\\;\\\;\\\;\end{array}\right.\;\;\overbrace{\left[\begin{array}{cccc}\begin{array}{c}\vdots\\\vdots\\{\bf a_1}\\\vdots\\\vdots\end{array}&amp;\begin{array}{c}\vdots\\\vdots\\{\bf a_2}\\\vdots\\\vdots\end{array}&amp;\dots&amp;\begin{array}{c}\vdots\\\vdots\\{\bf a_n}\\\vdots\\\vdots\end{array}\\\end{array}\right]}^{\Large n} =
\overbrace{\left[\begin{array}{ccc}\vdots&amp;&amp;\vdots\\\vdots&amp;&amp;\vdots\\\mathbf{u}_1&amp;\cdots&amp;\mathbf{u}_r\\\vdots&amp;&amp;\vdots\\\vdots&amp;&amp;\vdots\end{array}\right]}^{\large r}
\times
\left[\begin{array}{ccc}\sigma_1&amp; &amp;\\&amp;\ddots&amp;\\&amp;&amp;\sigma_r\end{array}\right]
\times
\left[\begin{array}{ccccc}\dots&amp;\dots&amp;\mathbf{v}_1&amp;\dots&amp;\dots\\&amp;&amp;\vdots&amp;&amp;\\\dots&amp;\dots&amp;\mathbf{v}_r&amp;\dots&amp;\dots\end{array}\right]\)</span></p>
<div class="math notranslate nohighlight">
\[\Large\overset{m\,\times\, n}{A^{\vphantom{T}}} = \overset{m\,\times\, r}{U^{\vphantom{T}}}\;\;\overset{r\,\times\, r}{\Sigma^{\vphantom{T}}}\;\;\overset{r\,\times\, n}{V^T}\]</div>
<p>Note that for the reduced version, both <span class="math notranslate nohighlight">\(U\)</span> and <span class="math notranslate nohighlight">\(V\)</span> have orthonormal columns.    This means that:</p>
<div class="math notranslate nohighlight">
\[ U^TU = I \]</div>
<p>and</p>
<div class="math notranslate nohighlight">
\[ V^TV = I. \]</div>
<p>(However, <span class="math notranslate nohighlight">\(U\)</span> and <span class="math notranslate nohighlight">\(V\)</span> are not square in this version, so they are not orthogonal matrices.)</p>
<p>Recall as well, that the route to the SVD starting by asking “What unit vector <span class="math notranslate nohighlight">\(\mathbf{x}\)</span> maximizes <span class="math notranslate nohighlight">\(\Vert A\mathbf{x}\Vert\)</span>”?</p>
<p>We found that the answer is <span class="math notranslate nohighlight">\(\mathbf{v_1}\)</span>, the first row of <span class="math notranslate nohighlight">\(V^T\)</span>.</p>
<p>You should be able to see that the SVD of <span class="math notranslate nohighlight">\(A^T\)</span> is <span class="math notranslate nohighlight">\(V\Sigma U^T\)</span>.</p>
<p>So, we can make the corresponding observation that the unit vector that maximizes <span class="math notranslate nohighlight">\(\Vert A^T\mathbf{x}\Vert\)</span> is <span class="math notranslate nohighlight">\(\mathbf{u_1}\)</span>, the first column of <span class="math notranslate nohighlight">\(U\)</span>.</p>
</section>
<section id="approximating-a-matrix">
<h2>Approximating a Matrix<a class="headerlink" href="#approximating-a-matrix" title="Permalink to this heading">#</a></h2>
<p>To understand the power of SVD for analyzing data, it helps to think of it as a tool for <strong>approximating one matrix by another, simpler, matrix.</strong></p>
<p>To talk about when one matrix <strong>approximates</strong> another, we need a “length” for matrices.</p>
<p>We will use the <strong>Frobenius norm</strong>.</p>
<p>The Frobenius norm is just the usual vector norm, treating the matrix as if it were a vector.</p>
<p>In other words, the definition of the Frobenius norm of <span class="math notranslate nohighlight">\(A\)</span>, denoted <span class="math notranslate nohighlight">\(\Vert A\Vert_F\)</span>, is:</p>
<div class="math notranslate nohighlight">
\[\Vert A\Vert_F = \sqrt{\sum a_{ij}^2}.\]</div>
<p>The approximations we’ll discuss are <strong>low-rank</strong> approximations.</p>
<p>Recall that the rank of a matrix <span class="math notranslate nohighlight">\(A\)</span> is the largest number of linearly independent columns of <span class="math notranslate nohighlight">\(A\)</span>.</p>
<p>Or, equivalently, the dimension of <span class="math notranslate nohighlight">\(\operatorname{Col} A\)</span>.</p>
<p>Let’s define the <strong>rank-<span class="math notranslate nohighlight">\(k\)</span> approximation</strong> to <span class="math notranslate nohighlight">\(A\)</span>:</p>
<p>When <span class="math notranslate nohighlight">\(k &lt; \operatorname{Rank}A\)</span>, the rank-<span class="math notranslate nohighlight">\(k\)</span> approximation to <span class="math notranslate nohighlight">\(A\)</span> is the closest rank-<span class="math notranslate nohighlight">\(k\)</span> matrix to <span class="math notranslate nohighlight">\(A\)</span>, i.e.,</p>
<div class="math notranslate nohighlight">
\[A^{(k)} =\arg \min_{\operatorname{Rank}B = k} \Vert A-B\Vert_F.\]</div>
<p>Why is a rank-<span class="math notranslate nohighlight">\(k\)</span> approximation valuable?</p>
<p>The reason is that a rank-<span class="math notranslate nohighlight">\(k\)</span> matrix may take up <strong>much</strong> less space than the original <span class="math notranslate nohighlight">\(A\)</span>.</p>
<p><span class="math notranslate nohighlight">\(m\left\{\begin{array}{c}\;\\\;\\\;\\\;\\\;\end{array}\right.\;\;\overbrace{\left[\begin{array}{cccc}\begin{array}{c}\vdots\\\vdots\\{\bf a_1}\\\vdots\\\vdots\end{array}&amp;\begin{array}{c}\vdots\\\vdots\\{\bf a_2}\\\vdots\\\vdots\end{array}&amp;\dots&amp;\begin{array}{c}\vdots\\\vdots\\{\bf a_n}\\\vdots\\\vdots\end{array}\\\end{array}\right]}^{\large n} =
\overbrace{\left[\begin{array}{cc}\vdots&amp;\vdots\\\vdots&amp;\vdots\\\sigma_1\mathbf{u}_1&amp;\sigma_k\mathbf{u}_k\\\vdots&amp;\vdots\\\vdots&amp;\vdots\end{array}\right]}^{\large k}
\times
\left[\begin{array}{ccccc}\dots&amp;\dots&amp;\mathbf{v}_1&amp;\dots&amp;\dots\\\dots&amp;\dots&amp;\mathbf{v}_k&amp;\dots&amp;\dots\end{array}\right]\)</span></p>
<p>The rank-<span class="math notranslate nohighlight">\(k\)</span> approximation takes up space <span class="math notranslate nohighlight">\((m+n)k\)</span> while <span class="math notranslate nohighlight">\(A\)</span> itself takes space <span class="math notranslate nohighlight">\(mn\)</span>.</p>
<p>For example, if <span class="math notranslate nohighlight">\(k=10\)</span> and <span class="math notranslate nohighlight">\(m = n = 1000\)</span>, then the rank-<span class="math notranslate nohighlight">\(k\)</span> approximation takes space <span class="math notranslate nohighlight">\(20000/1000000 = 2\%\)</span> of <span class="math notranslate nohighlight">\(A\)</span>.</p>
<aside class="margin sidebar">
<p class="sidebar-title"></p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>The fact that the SVD finds the <em>best</em> rank-<span class="math notranslate nohighlight">\(k\)</span> approximation to any matrix is called the Eckart-Young-Mirsky Theorem.  You can find a proof of the theorem <a class="reference external" href="https://en.wikipedia.org/wiki/Low-rank_approximation">here</a>.  In fact it is true for the Frobenius norm (the norm we are using here) as well as another matrix norm, the <em>spectral</em> norm.</p>
<p>More good resources on how to understand SVD as a data approximation method are <a class="reference external" href="https://www.jeremykun.com/2016/04/18/singular-value-decomposition-part-1-perspectives-on-linear-algebra/">here</a> and <a class="reference external" href="https://liorpachter.wordpress.com/2014/05/26/what-is-principal-component-analysis/">here</a>.</p>
</div>
</aside>
<p>The key to using the SVD for matrix approximation is as follows:</p>
<center><font color = "blue"><b>The best rank-<em>k</em> approximation to any matrix can be found via the SVD.</b></font></center><p>In fact, for an <span class="math notranslate nohighlight">\(m\times n\)</span> matrix <span class="math notranslate nohighlight">\(A\)</span>, the SVD does two things:</p>
<ol class="arabic simple">
<li><p>It gives the best rank-<span class="math notranslate nohighlight">\(k\)</span> approximation to <span class="math notranslate nohighlight">\(A\)</span> for <strong>every</strong> <span class="math notranslate nohighlight">\(k\)</span> up to the rank of <span class="math notranslate nohighlight">\(A\)</span>.</p></li>
<li><p>It gives the <strong>distance</strong> of the best rank-<span class="math notranslate nohighlight">\(k\)</span> approximation <span class="math notranslate nohighlight">\(A^{(k)}\)</span> from <span class="math notranslate nohighlight">\(A\)</span> for each <span class="math notranslate nohighlight">\(k\)</span>.</p></li>
</ol>
<p>When we say “best”, we mean in terms of Frobenius norm <span class="math notranslate nohighlight">\(\Vert A-A^{(k)}\Vert_F\)</span>,</p>
<p>and by distance we mean the same quantity, <span class="math notranslate nohighlight">\(\Vert A-A^{(k)}\Vert_F\)</span>.</p>
<p>How do we use SVD to find the best rank-<span class="math notranslate nohighlight">\(k\)</span> approximation to <span class="math notranslate nohighlight">\(A\)</span>?</p>
<p>Conceptually, we “throw away” the portions of the SVD having the smallest singular values.</p>
<p>More specifically: in terms of the singular value decomposition,</p>
<div class="math notranslate nohighlight">
\[ A = U\Sigma V^T, \]</div>
<p>the best rank-<span class="math notranslate nohighlight">\(k\)</span> approximation to <span class="math notranslate nohighlight">\(A\)</span> is formed by taking</p>
<ul class="simple">
<li><p><span class="math notranslate nohighlight">\(U' =\)</span> the <span class="math notranslate nohighlight">\(k\)</span> leftmost columns of <span class="math notranslate nohighlight">\(U\)</span>,</p></li>
<li><p><span class="math notranslate nohighlight">\(\Sigma ' =\)</span> the <span class="math notranslate nohighlight">\(k\times k\)</span> upper left submatrix of <span class="math notranslate nohighlight">\(\Sigma\)</span>, and</p></li>
<li><p><span class="math notranslate nohighlight">\((V')^T=\)</span> the <span class="math notranslate nohighlight">\(k\)</span> upper rows of <span class="math notranslate nohighlight">\(V^T\)</span>,</p></li>
</ul>
<p>and constructing</p>
<div class="math notranslate nohighlight">
\[A^{(k)} = U'\Sigma'(V')^T.\]</div>
<p>The distance (in Frobenius norm) of the best rank-<span class="math notranslate nohighlight">\(k\)</span> approximation <span class="math notranslate nohighlight">\(A^{(k)}\)</span> from <span class="math notranslate nohighlight">\(A\)</span> is equal to <span class="math notranslate nohighlight">\(\sqrt{\sum_{i=k+1}^r\sigma^2_i}\)</span>.</p>
<p>Notice that this quantity is summing over the singular values <strong>beyond</strong> <span class="math notranslate nohighlight">\(k\)</span>.</p>
<p>What this means is that if, beyond some <span class="math notranslate nohighlight">\(k\)</span>, all of the singular values are small, then <strong><span class="math notranslate nohighlight">\(A\)</span> can be closely approximated by a rank-<span class="math notranslate nohighlight">\(k\)</span> matrix.</strong></p>
</section>
<section id="signal-compression">
<h2>Signal Compression<a class="headerlink" href="#signal-compression" title="Permalink to this heading">#</a></h2>
<p>When working with measurement data, ie measurements of real-world objects, we find that data is often  <strong>approximately low-rank.</strong></p>
<p>In other words, a matrix of measurements can often be well approximated by a low-rank matrix.</p>
<p>Classic examples include</p>
<ul class="simple">
<li><p>measurements of human abilities - eg, psychology</p></li>
<li><p>measurements of human preferences – eg, movie ratings, social networks</p></li>
<li><p>images, movies, sound recordings</p></li>
<li><p>genomics, biological data</p></li>
<li><p>medical records</p></li>
<li><p>text documents</p></li>
</ul>
<p>For example, here is a photo.</p>
<p>We can think of this as a <span class="math notranslate nohighlight">\(512\times 512\)</span> matrix <span class="math notranslate nohighlight">\(A\)</span> whose entries are grayscale values (numbers between 0 and 1).</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/89703f22d6bbe854bedbecad191348133c55e34d9a599caab93c810e37ee756f.png" src="_images/89703f22d6bbe854bedbecad191348133c55e34d9a599caab93c810e37ee756f.png" />
</div>
</div>
<p>Let’s look at the singular values of this matrix.</p>
<p>We compute <span class="math notranslate nohighlight">\(A = U\Sigma V^T\)</span> and look at the values on the diagonal of <span class="math notranslate nohighlight">\(\Sigma\)</span>.</p>
<p>This is often called the matrix’s “spectrum.”</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/d9088bc4f281b551a9981a85f2a957c45f26b6221b1a2bf9124809c5a878ef02.png" src="_images/d9088bc4f281b551a9981a85f2a957c45f26b6221b1a2bf9124809c5a878ef02.png" />
</div>
</div>
<p>What is this telling us?</p>
<p>Most of the singular values of <span class="math notranslate nohighlight">\(A\)</span> are quite small.</p>
<p>Only the first few singular values are large – up to, say, <span class="math notranslate nohighlight">\(k\)</span> = 40.</p>
<p>Remember that the error we get when we use a rank-<span class="math notranslate nohighlight">\(k\)</span> approximation is</p>
<div class="math notranslate nohighlight">
\[\Vert A-A^{(k)}\Vert_F = \sqrt{\sum_{i=k+1}^r\sigma^2_i}.\]</div>
<p>So we can use the singular values of <span class="math notranslate nohighlight">\(A\)</span> to compute the relative error over a range of possible approximations <span class="math notranslate nohighlight">\(A^{(k)}\)</span>.</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/eac3dbf0bd6ce42934489bc25f32bbf65a86a9a5b2381fbe800a00a42e2f57e6.png" src="_images/eac3dbf0bd6ce42934489bc25f32bbf65a86a9a5b2381fbe800a00a42e2f57e6.png" />
</div>
</div>
<p>This matrix <span class="math notranslate nohighlight">\(A\)</span> has rank of 512.</p>
<p>But the error when we approximate <span class="math notranslate nohighlight">\(A\)</span> by a rank 40 matrix is only around 10%.</p>
<p>We say that the <strong>effective</strong> rank of <span class="math notranslate nohighlight">\(A\)</span> is low (perhaps 40).</p>
<p>Let’s find the closest rank-40 matrix to <span class="math notranslate nohighlight">\(A\)</span> and view it.</p>
<p>We can do this quite easily using the SVD.</p>
<p>We simply construct our approximation of <span class="math notranslate nohighlight">\(A\)</span> using only the first 40 columns of <span class="math notranslate nohighlight">\(U\)</span> and top 40 rows of <span class="math notranslate nohighlight">\(V^T\)</span>.</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/9716ac61f956d6c0c03197b180fba210891e34f14d98d075a2007b981266ffc9.png" src="_images/9716ac61f956d6c0c03197b180fba210891e34f14d98d075a2007b981266ffc9.png" />
</div>
</div>
<p>Note that the rank-40 boat takes up only 40/512 = <strong>8% of the space of the original image!</strong></p>
<p>This general principle is what makes image, video, and sound compression effective.</p>
<p>When you</p>
<ul class="simple">
<li><p>watch HDTV, or</p></li>
<li><p>listen to an MP3, or</p></li>
<li><p>look at a JPEG image,</p></li>
</ul>
<p>these signals have been compressed using the fact that they are <strong>effectively low-rank</strong> matrices.</p>
<p>As you can see from the example of the boat image, it is often possible to compress such signals enormously, leading to an immense savings of storage space and transmission bandwidth.</p>
<p>In fact the entire premise of the show “Silicon Valley” is based on this fact :)</p>
</section>
<section id="dimensionality-reduction">
<h2>Dimensionality Reduction<a class="headerlink" href="#dimensionality-reduction" title="Permalink to this heading">#</a></h2>
<p>Another way to think about what we just did is “dimensionality reduction”.</p>
<p>Consider this common situation:</p>
<p><span class="math notranslate nohighlight">\({\mbox{m objects}}\left\{\begin{array}{c}\;\\\;\\\;\\\;\\\;\end{array}\right.\;\;\overbrace{\left[\begin{array}{ccccc}
\begin{array}{c}a_{11}\\\vdots\\a_{i1}\\\vdots\\a_{m1}\end{array}&amp;
\begin{array}{c}\dots\\\ddots\\\dots\\\ddots\\\dots\end{array}&amp;
\begin{array}{c}a_{1j}\\\vdots\\a_{ij}\\\vdots\\a_{mj}\end{array}&amp;
\begin{array}{c}\dots\\\ddots\\\dots\\\ddots\\\dots\end{array}&amp;
\begin{array}{c}a_{1n}\\\vdots\\a_{in}\\\vdots\\a_{mn}\end{array}
\end{array}\right]}^{\mbox{n features}} =
\overbrace{\left[\begin{array}{ccc}\vdots&amp;&amp;\vdots\\\vdots&amp;&amp;\vdots\\\mathbf{u}_1&amp;\cdots&amp;\mathbf{u}_k\\\vdots&amp;&amp;\vdots\\\vdots&amp;&amp;\vdots\end{array}\right]}^{\large k}
\times
\left[\begin{array}{ccc}\sigma_1&amp; &amp;\\&amp;\ddots&amp;\\&amp;&amp;\sigma_k\end{array}\right]
\times
\left[\begin{array}{ccccc}\dots&amp;\dots&amp;\mathbf{v}_1&amp;\dots&amp;\dots\\&amp;&amp;\vdots&amp;&amp;\\\dots&amp;\dots&amp;\mathbf{v}_k&amp;\dots&amp;\dots\end{array}\right]\)</span></p>
<p>The <span class="math notranslate nohighlight">\(U\)</span> matrix has a row for each data object.</p>
<p>Notice that the original data objects had <span class="math notranslate nohighlight">\(n\)</span> features, but each row of <span class="math notranslate nohighlight">\(U\)</span> only has <span class="math notranslate nohighlight">\(k\)</span> entries.</p>
<p>Despite that, a row of <span class="math notranslate nohighlight">\(U\)</span> can still provide most of the information in the corresponding row of <span class="math notranslate nohighlight">\(A\)</span></p>
<p>(To see that, note that we can approximately recover the original row by simply multiplying the row of <span class="math notranslate nohighlight">\(U\)</span> by <span class="math notranslate nohighlight">\(\Sigma V^T\)</span>).</p>
<p>So we have <strong>reduced the dimension</strong> of our data objects – from <span class="math notranslate nohighlight">\(n\)</span> down to <span class="math notranslate nohighlight">\(k\)</span> – without losing much of the information they contain.</p>
</section>
<section id="principal-component-analysis">
<h2>Principal Component Analysis<a class="headerlink" href="#principal-component-analysis" title="Permalink to this heading">#</a></h2>
<p>This kind of dimensionality reduction can be done in an <strong>optimal</strong> way.</p>
<p>The method for doing it is called <strong>Principal Component Analysis</strong> (or PCA).</p>
<p>What does <strong>optimal</strong> mean in this context?</p>
<p>Here we use a statistical criterion: a dimensionality reduction that captures the maximum <strong>variance</strong> in the data.</p>
<p>Here is a classic example.</p>
<p>Consider the points below, which live in <span class="math notranslate nohighlight">\(\mathbb{R}^2\)</span>.</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/825c74345d625e47d1d829447088b5112ee4dc5936c706fe67c25a0acc08561b.png" src="_images/825c74345d625e47d1d829447088b5112ee4dc5936c706fe67c25a0acc08561b.png" />
</div>
</div>
<p>Now, although the points are in <span class="math notranslate nohighlight">\(\mathbb{R}^2\)</span>, they seem to show effective low-rank.</p>
<p>That is, it might not be a bad approximation to replace each point by a point in a 1-D dimensional space, that is, along a line.</p>
<p>What line should we choose?   We will choose the line such that the <strong>sum of the distances of the points to the line is minimized.</strong></p>
<p>The points, projected on this line, will capture the maximum variance in the data (because the remaining errors are minimized).</p>
<p>What would happen if we used SVD at this point, and kept only rank-1 approximation to the data?</p>
<p>This would be the 1-D <strong>subspace</strong> that approximates the data best in Frobenius norm.</p>
<p>However the variance in the data is defined with respect to the data mean, so we need to mean-center the data first, before using SVD.</p>
<p>That is, without mean centering, SVD finds the best 1-D subspace, not the best line though the data (which might not pass through the origin).</p>
<p>So to capture the best line through the data, we first move the data points to the origin:</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/76140e2abdb60552fc086dadd52eb9a673c185fc07e889b9ce4a1f3c139d0caf.png" src="_images/76140e2abdb60552fc086dadd52eb9a673c185fc07e889b9ce4a1f3c139d0caf.png" />
</div>
</div>
<p>Now we use SVD to construct the best 1-D approximation of the mean-centered data:</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/ecdbde14d94a98e8c21e39b0f9123115c39d1cb20b9003cabccad421deffe243.png" src="_images/ecdbde14d94a98e8c21e39b0f9123115c39d1cb20b9003cabccad421deffe243.png" />
</div>
</div>
<p>This method is called <strong>Principal Component Analysis.</strong></p>
<p>In summary, PCA consists of:</p>
<ol class="arabic simple">
<li><p>Mean center the data, and</p></li>
<li><p>Reduce the dimension of the mean-centered data via SVD.</p></li>
</ol>
<p>It winds up constructing the <strong>best low dimensional approximation of the data</strong> in terms of variance.</p>
<p>This is equivalent to projecting the data onto the subspace that captures the maximum variance in the data.</p>
<p>That is, each point is replaced by a point in <span class="math notranslate nohighlight">\(k\)</span> dimensional space such that the total error (distances between points and their replacements) is minimized.</p>
</section>
<section id="visualization-using-pca">
<h2>Visualization using PCA<a class="headerlink" href="#visualization-using-pca" title="Permalink to this heading">#</a></h2>
<p>I’ll now show an extended example to give you a sense of the power of PCA.</p>
<p>Let’s analyze some really high-dimensional data: <strong>documents.</strong></p>
<p>A common way to represent documents is using the bag-of-words model.</p>
<p>In this matrix, rows are documents, columns are words, and entries count how many time a word appears in a document.</p>
<p>This is called a <em>document-term matrix.</em></p>
<div class="math notranslate nohighlight">
\[\begin{split}{\mbox{$m$ documents}}\left\{\begin{array}{c}\;\\\;\\\;\\\;\\\;\end{array}\right.\;\;\overbrace{\left[\begin{array}{ccccc}
\begin{array}{c}a_{11}\\\vdots\\a_{i1}\\\vdots\\a_{m1}\end{array}&amp;
\begin{array}{c}\dots\\\ddots\\\dots\\\ddots\\\dots\end{array}&amp;
\begin{array}{c}a_{1j}\\\vdots\\a_{ij}\\\vdots\\a_{mj}\end{array}&amp;
\begin{array}{c}\dots\\\ddots\\\dots\\\ddots\\\dots\end{array}&amp;
\begin{array}{c}a_{1n}\\\vdots\\a_{in}\\\vdots\\a_{mn}\end{array}
\end{array}\right]}^{\mbox{$n$ terms}}\end{split}\]</div>
<p>We are touching on a broad topic, called Latent Semantic Analysis, which is essentially the application of linear algebra to document analysis.</p>
<p>You can learn about Latent Semantic Analysis in other courses in data science or natural language processing.</p>
<p>Our text documents are going to be posts from certain discussion forums called “newsgroups”.</p>
<p>We will collect posts from three groups:
<code class="docutils literal notranslate"><span class="pre">comp.os.ms-windows.misc</span></code>, <code class="docutils literal notranslate"><span class="pre">sci.space</span></code>, and <code class="docutils literal notranslate"><span class="pre">rec.sport.baseball</span></code>.</p>
<p>I am going to skip over some details.  However, all the code is in this notebook, so you can explore it on your own if you like.</p>
<div class="cell docutils container">
<div class="cell_input docutils container">
<div class="highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">sklearn.datasets</span> <span class="kn">import</span> <span class="n">fetch_20newsgroups</span>
<span class="n">categories</span> <span class="o">=</span> <span class="p">[</span><span class="s1">&#39;comp.os.ms-windows.misc&#39;</span><span class="p">,</span> <span class="s1">&#39;sci.space&#39;</span><span class="p">,</span> <span class="s1">&#39;rec.sport.baseball&#39;</span><span class="p">]</span>
<span class="n">news_data</span> <span class="o">=</span> <span class="n">fetch_20newsgroups</span><span class="p">(</span><span class="n">subset</span><span class="o">=</span><span class="s1">&#39;train&#39;</span><span class="p">,</span> <span class="n">categories</span><span class="o">=</span><span class="n">categories</span><span class="p">)</span>
<span class="kn">from</span> <span class="nn">sklearn.feature_extraction.text</span> <span class="kn">import</span> <span class="n">TfidfVectorizer</span>
<span class="n">vectorizer</span> <span class="o">=</span> <span class="n">TfidfVectorizer</span><span class="p">(</span><span class="n">stop_words</span><span class="o">=</span><span class="s1">&#39;english&#39;</span><span class="p">,</span> <span class="n">min_df</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">max_df</span><span class="o">=</span><span class="mf">0.8</span><span class="p">)</span>
<span class="n">dtm</span> <span class="o">=</span> <span class="n">vectorizer</span><span class="o">.</span><span class="n">fit_transform</span><span class="p">(</span><span class="n">news_data</span><span class="o">.</span><span class="n">data</span><span class="p">)</span><span class="o">.</span><span class="n">todense</span><span class="p">()</span>
</pre></div>
</div>
</div>
</div>
<div class="cell docutils container">
<div class="cell_input docutils container">
<div class="highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="nb">print</span><span class="p">(</span><span class="s1">&#39;The size of our document-term matrix is </span><span class="si">{}</span><span class="s1">&#39;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">dtm</span><span class="o">.</span><span class="n">shape</span><span class="p">))</span>
</pre></div>
</div>
</div>
<div class="cell_output docutils container">
<div class="output stream highlight-myst-ansi notranslate"><div class="highlight"><pre><span></span>The size of our document-term matrix is (1781, 9409)
</pre></div>
</div>
</div>
</div>
<p>So we have 1781 documents, and there are 9409 different words that are contained in the documents.</p>
<p>We can think of each document as a vector in 9409-dimensional space.</p>
<p>Let us apply PCA to the document-term matrix.</p>
<p>First, we mean center the data.</p>
<div class="cell docutils container">
<div class="cell_input docutils container">
<div class="highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">centered_dtm</span> <span class="o">=</span> <span class="n">dtm</span> <span class="o">-</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">dtm</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<p>Now we compute the SVD of the mean-centered data:</p>
<div class="cell docutils container">
<div class="cell_input docutils container">
<div class="highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">u</span><span class="p">,</span> <span class="n">s</span><span class="p">,</span> <span class="n">vt</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">linalg</span><span class="o">.</span><span class="n">svd</span><span class="p">(</span><span class="n">centered_dtm</span><span class="p">)</span>
</pre></div>
</div>
</div>
</div>
<p>Now, we use PCA to visualize the set of documents.</p>
<p>Our visualization will be in two dimensions.</p>
<p>This is pretty extreme …</p>
<p>– we are taking points in 9409-dimensional space and projecting them into a subspace of only two dimensions!</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/b1f5cfd534336fa3bd32e45bd6d81731b8c91951a5dea68175f58acfa479ddc7.png" src="_images/b1f5cfd534336fa3bd32e45bd6d81731b8c91951a5dea68175f58acfa479ddc7.png" />
</div>
</div>
<p>This visualization shows that our collection of documents has considerable internal structure.</p>
<p>In particular, based on word frequency, it appears that there are three general groups of documents.</p>
<p>As you might guess, this is because the discussion topics of the document sets are different:</p>
<div class="cell tag_remove-input docutils container">
<div class="cell_output docutils container">
<img alt="_images/8c81f94a909f8ff6b04622ca10f5013d171a0944b2816cdfef124ee73e419d07.png" src="_images/8c81f94a909f8ff6b04622ca10f5013d171a0944b2816cdfef124ee73e419d07.png" />
</div>
</div>
<p><strong>In summary:</strong></p>
<ul class="simple">
<li><p>Data often arrives in high dimension</p>
<ul>
<li><p>for example, the documents above are points in <span class="math notranslate nohighlight">\(\mathbb{R}^{9049}\)</span></p></li>
</ul>
</li>
<li><p>However, the <strong>structure</strong> in data can be relatively low-dimensional</p>
<ul>
<li><p>in our example, we can see structure in just two dimensions!</p></li>
</ul>
</li>
<li><p>PCA allows us to find the low-dimensional structure in data</p>
<ul>
<li><p>in a way that is optimal in some sense</p></li>
</ul>
</li>
</ul>
<p>For this reason, PCA is a <strong>very</strong> commonly used tool in data analysis.</p>
<section id="wrapup">
<h3>Wrapup<a class="headerlink" href="#wrapup" title="Permalink to this heading">#</a></h3>
<center>
<a class="reference internal image-reference" href="_images/in-conclusion.jpg"><img alt="Figure" src="_images/in-conclusion.jpg" style="width: 750px;" /></a>
</center><p>We have reached the end!</p>
<p>Of course, this is not really the end … more like the beginning.</p>
<p>If we had more time, we’d talk about how linear algebra informs the study of graphs, the methods of machine learning, data mining, and many more topics.</p>
<p>So this is just where we have to stop.</p>
<blockquote>
<div><p>As long as Algebra and Geometry have been separated, their progress has been slow and their usages limited; but when these two sciences were reunited, they lent each other mutual strength and walked together with a rapid step towards perfection.</p>
<p>— Count Joseph-Louis de Lagrange</p>
</div></blockquote>
<p>We have looked at the richness of linear algebra from many angles.</p>
<p>We have seen that the simple linear system <span class="math notranslate nohighlight">\(A\mathbf{x} = \mathbf{b}\)</span> leads to a whole collection of interesting questions, questions that have unfolded step by step over the course of the semester.</p>
<p>But we have also seen that we can extract the idea of matrix out of a linear system, and consider it as an object in its own right.</p>
<p>Considered on their own, matrices can be seen as linear operators, giving us tools for computer graphics and the solution of dynamical systems and linear equations.</p>
<p>We have also seen that matrices can be seen as data objects, whose linear algebraic properties expose useful facts about the data.</p>
<p>There are many courses you can go on to from here, which will rely on your understanding of linear algebra:</p>
<ul class="simple">
<li><p>CS 365 Foundations of Data Science</p></li>
<li><p>CS 440 Intro to Artificial Intelligence</p></li>
<li><p>CS 480 Intro to Computer Graphics</p></li>
<li><p>CS 505 Intro to Natural Language Processing</p></li>
<li><p>CS 506 Tools for Data Science</p></li>
<li><p>CS 507 Intro to Optimization in ML</p></li>
<li><p>CS 523 Deep Learning</p></li>
<li><p>CS 530 Advanced Algorithms</p></li>
<li><p>CS 531 Advanced Optimization Algorithms</p></li>
<li><p>CS 533 Spectral Methods</p></li>
<li><p>CS 542 Machine Learning</p></li>
<li><p>CS 565 Algorithmic Data Mining</p></li>
<li><p>CS 581 Computational Fabrication</p></li>
<li><p>CS 583 Audio Computation</p></li>
</ul>
<p>In each of these you will use and build on your knowledge of linear algebra.</p>
<p>Enjoy!</p>
</section>
</section>
</section>

    <script type="text/x-thebe-config">
    {
        requestKernel: true,
        binderOptions: {
            repo: "binder-examples/jupyter-stacks-datascience",
            ref: "master",
        },
        codeMirrorConfig: {
            theme: "abcdef",
            mode: "python"
        },
        kernelOptions: {
            name: "python3",
            path: "./."
        },
        predefinedOutput: true
    }
    </script>
    <script>kernelName = 'python3'</script>

                </article>
              

                <footer class="prev-next-footer">
                  
<div class="prev-next-area">
    <a class="left-prev"
       href="L25SVD.html"
       title="previous page">
      <i class="fa-solid fa-angle-left"></i>
      <div class="prev-next-info">
        <p class="prev-next-subtitle">previous</p>
        <p class="prev-next-title">The Singular Value Decomposition</p>
      </div>
    </a>
</div>
                </footer>
              
            </div>
            
            
                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">

  <div class="sidebar-secondary-item">
  <div class="page-toc tocsection onthispage">
    <i class="fa-solid fa-list"></i> Contents
  </div>
  <nav class="bd-toc-nav page-toc">
    <ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#recap-of-svd">Recap of SVD</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#approximating-a-matrix">Approximating a Matrix</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#signal-compression">Signal Compression</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#dimensionality-reduction">Dimensionality Reduction</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#principal-component-analysis">Principal Component Analysis</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#visualization-using-pca">Visualization using PCA</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#wrapup">Wrapup</a></li>
</ul>
</li>
</ul>
  </nav></div>

</div></div>
              
            
          </div>
          <footer class="bd-footer-content">
            
<div class="bd-footer-content__inner container">
  
  <div class="footer-item">
    
<p class="component-author">
By Mark Crovella
</p>

  </div>
  
  <div class="footer-item">
    

  <p class="copyright">
    
      © Copyright 2020-2024.
      <br/>
    
  </p>

  </div>
  
  <div class="footer-item">
    
  </div>
  
  <div class="footer-item">
    
  </div>
  
</div>
          </footer>
        

      </main>
    </div>
  </div>
  
  <!-- Scripts loaded after <body> so the DOM is not blocked -->
  <script src="_static/scripts/bootstrap.js?digest=5b4479735964841361fd"></script>
<script src="_static/scripts/pydata-sphinx-theme.js?digest=5b4479735964841361fd"></script>

  <footer class="bd-footer">
  </footer>
  </body>
</html>