|
| 1 | +<!DOCTYPE html> |
| 2 | +<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| 3 | +<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| 4 | +<head> |
| 5 | + <meta charset="utf-8"> |
| 6 | + <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| 7 | + <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| 8 | + |
| 9 | + |
| 10 | + <link rel="shortcut icon" href="../favicon.ico"> |
| 11 | + |
| 12 | + <title>Guide to Benchmarking - Documentation - The Tensor Algebra Compiler (TACO)</title> |
| 13 | + <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'> |
| 14 | + |
| 15 | + <link rel="stylesheet" href="../css/theme.css" type="text/css" /> |
| 16 | + <link rel="stylesheet" href="../css/theme_extra.css" type="text/css" /> |
| 17 | + <link rel="stylesheet" href="../css/highlight.css"> |
| 18 | + <link href="../extra.css" rel="stylesheet"> |
| 19 | + |
| 20 | + <script> |
| 21 | + // Current page data |
| 22 | + var mkdocs_page_name = "Guide to Benchmarking"; |
| 23 | + var mkdocs_page_input_path = "benchmarking.md"; |
| 24 | + var mkdocs_page_url = "/benchmarking/index.html"; |
| 25 | + </script> |
| 26 | + |
| 27 | + <script src="../js/jquery-2.1.1.min.js"></script> |
| 28 | + <script src="../js/modernizr-2.8.3.min.js"></script> |
| 29 | + <script type="text/javascript" src="../js/highlight.pack.js"></script> |
| 30 | + |
| 31 | +</head> |
| 32 | + |
| 33 | +<body class="wy-body-for-nav" role="document"> |
| 34 | + |
| 35 | + <div class="wy-grid-for-nav"> |
| 36 | + |
| 37 | + |
| 38 | + <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav"> |
| 39 | + <div class="wy-side-nav-search"> |
| 40 | + <a href="http://tensor-compiler.org" class="icon icon-home"> Tensor Algebra Compiler (TACO)</a> |
| 41 | + <div role="search"> |
| 42 | + <form id ="rtd-search-form" class="wy-form" action="../search.html" method="get"> |
| 43 | + <input type="text" name="q" placeholder="Search docs" /> |
| 44 | + </form> |
| 45 | +</div> |
| 46 | + </div> |
| 47 | + |
| 48 | + <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| 49 | + <ul class="current"> |
| 50 | + |
| 51 | + |
| 52 | + <li class="toctree-l1"> |
| 53 | + |
| 54 | + <a class="" href="../index.html">Home</a> |
| 55 | + </li> |
| 56 | + |
| 57 | + <li class="toctree-l1"> |
| 58 | + |
| 59 | + <span class="caption-text">Python Library</span> |
| 60 | + <ul class="subnav"> |
| 61 | + <li class=""> |
| 62 | + |
| 63 | + <a class="" href="../pytensors/index.html">Defining Tensors</a> |
| 64 | + </li> |
| 65 | + <li class=""> |
| 66 | + |
| 67 | + <a class="" href="../pycomputations/index.html">Computing on Tensors</a> |
| 68 | + </li> |
| 69 | + </ul> |
| 70 | + </li> |
| 71 | + |
| 72 | + <li class="toctree-l1"> |
| 73 | + |
| 74 | + <span class="caption-text">Example Applications</span> |
| 75 | + <ul class="subnav"> |
| 76 | + <li class=""> |
| 77 | + |
| 78 | + <a class="" href="../scientific_computing/index.html">Scientific Computing: SpMV</a> |
| 79 | + </li> |
| 80 | + <li class=""> |
| 81 | + |
| 82 | + <a class="" href="../data_analytics/index.html">Data Analytics: MTTKRP</a> |
| 83 | + </li> |
| 84 | + <li class=""> |
| 85 | + |
| 86 | + <a class="" href="../machine_learning/index.html">Machine Learning: SDDMM</a> |
| 87 | + </li> |
| 88 | + </ul> |
| 89 | + </li> |
| 90 | + |
| 91 | + <li class="toctree-l1"> |
| 92 | + |
| 93 | + <a class="" href="../optimization/index.html">Strategies for Optimization</a> |
| 94 | + </li> |
| 95 | + |
| 96 | + <li class="toctree-l1 current"> |
| 97 | + |
| 98 | + <a class="current" href="index.html">Guide to Benchmarking</a> |
| 99 | + <ul class="subnav"> |
| 100 | + |
| 101 | + </ul> |
| 102 | + </li> |
| 103 | + |
| 104 | + <li class="toctree-l1"> |
| 105 | + |
| 106 | + <span class="caption-text">C++ Library</span> |
| 107 | + <ul class="subnav"> |
| 108 | + <li class=""> |
| 109 | + |
| 110 | + <a class="" href="../tensors/index.html">Defining Tensors</a> |
| 111 | + </li> |
| 112 | + <li class=""> |
| 113 | + |
| 114 | + <a class="" href="../computations/index.html">Computing on Tensors</a> |
| 115 | + </li> |
| 116 | + </ul> |
| 117 | + </li> |
| 118 | + |
| 119 | + </ul> |
| 120 | + </div> |
| 121 | + |
| 122 | + </nav> |
| 123 | + |
| 124 | + <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| 125 | + |
| 126 | + |
| 127 | + <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> |
| 128 | + <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| 129 | + <a href="http://tensor-compiler.org"> Tensor Algebra Compiler (taco)</a> |
| 130 | + </nav> |
| 131 | + |
| 132 | + |
| 133 | + <div class="wy-nav-content"> |
| 134 | + <div class="rst-content"> |
| 135 | + <div role="navigation" aria-label="breadcrumbs navigation"> |
| 136 | + <ul class="wy-breadcrumbs"> |
| 137 | + <li><a href="../index.html">Docs</a> »</li> |
| 138 | + |
| 139 | + |
| 140 | + |
| 141 | + <li>Guide to Benchmarking</li> |
| 142 | + <li class="wy-breadcrumbs-aside"> |
| 143 | + |
| 144 | + </li> |
| 145 | + </ul> |
| 146 | + <hr/> |
| 147 | +</div> |
| 148 | + <div role="main"> |
| 149 | + <div class="section"> |
| 150 | + |
| 151 | + <p>The performance of Python applications that use TACO can be measured using |
| 152 | +Python's built-in <code>time.perf_counter</code> function with minimal changes to the |
| 153 | +applications. As an example, we can benchmark the performance of the |
| 154 | +scientific computing application shown <a href="../scientific_computing/index.html">here</a> as |
| 155 | +follows:</p> |
| 156 | +<pre><code class="python">import pytaco as pt |
| 157 | +from pytaco import compressed, dense |
| 158 | +import numpy as np |
| 159 | +import time |
| 160 | + |
| 161 | +csr = pt.format([dense, compressed]) |
| 162 | +dv = pt.format([dense]) |
| 163 | + |
| 164 | +A = pt.read("pwtk.mtx", csr) |
| 165 | +x = pt.from_array(np.random.uniform(size=A.shape[1])) |
| 166 | +z = pt.from_array(np.random.uniform(size=A.shape[0])) |
| 167 | +y = pt.tensor([A.shape[0]], dv) |
| 168 | + |
| 169 | +i, j = pt.get_index_vars(2) |
| 170 | +y[i] = A[i, j] * x[j] + z[i] |
| 171 | + |
| 172 | +# Tell TACO to generate code to perform the SpMV computation |
| 173 | +y.compile() |
| 174 | + |
| 175 | +# Benchmark the actual SpMV computation |
| 176 | +start = time.perf_counter() |
| 177 | +y.compute() |
| 178 | +end = time.perf_counter() |
| 179 | + |
| 180 | +print("Execution time: {0} seconds".format(end - start)) |
| 181 | +</code></pre> |
| 182 | + |
| 183 | +<p>In order to accurately measure TACO's computational performance, <strong>only the |
| 184 | +time it takes to actually perform a computation should be measured. The time |
| 185 | +it takes to generate code under the hood for performing that computation should |
| 186 | +not be measured</strong>, since this overhead can be quite variable but can often be |
| 187 | +amortized in practice. By default though, TACO will only generate and compile |
| 188 | +code it needs for performing a computation immediately before it has to |
| 189 | +actually perform the computation. As the example above demonstrates, by |
| 190 | +manually calling the result tensor's <code>compile</code> method, we can tell TACO to |
| 191 | +generate code needed for performing the computation before benchmarking starts, |
| 192 | +letting us measure only the performance of the computation itself.</p> |
| 193 | +<p><strong>The time it takes to construct the initial input tensors should also not be |
| 194 | +measured</strong>, since again this overhead can often be amortized in practice. By |
| 195 | +default, <code>pytaco.read</code> and functions for converting NumPy arrays and SciPy |
| 196 | +matrices to TACO tensors return fully constructed tensors. If you add nonzero |
| 197 | +elements to an input tensor by calling <code>insert</code> though, then <code>pack</code> must also |
| 198 | +be explicitly invoked before any benchmarking is done:</p> |
| 199 | +<pre><code class="python">import pytaco as pt |
| 200 | +from pytaco import compressed, dense |
| 201 | +import numpy as np |
| 202 | +import random |
| 203 | +import time |
| 204 | + |
| 205 | +csr = pt.format([dense, compressed]) |
| 206 | +dv = pt.format([dense]) |
| 207 | + |
| 208 | +A = pt.read("pwtk.mtx", csr) |
| 209 | +x = pt.tensor([A.shape[1]], dv) |
| 210 | +z = pt.tensor([A.shape[0]], dv) |
| 211 | +y = pt.tensor([A.shape[0]], dv) |
| 212 | + |
| 213 | +# Insert random values into x and z and pack them into dense arrays |
| 214 | +for k in range(A.shape[1]): |
| 215 | + x.insert([k], random.random()) |
| 216 | +x.pack() |
| 217 | +for k in range(A.shape[0]): |
| 218 | + z.insert([k], random.random()) |
| 219 | +z.pack() |
| 220 | + |
| 221 | +i, j = pt.get_index_vars(2) |
| 222 | +y[i] = A[i, j] * x[j] + z[i] |
| 223 | + |
| 224 | +y.compile() |
| 225 | + |
| 226 | +start = time.perf_counter() |
| 227 | +y.compute() |
| 228 | +end = time.perf_counter() |
| 229 | + |
| 230 | +print("Execution time: {0} seconds".format(end - start)) |
| 231 | +</code></pre> |
| 232 | + |
| 233 | +<p>TACO avoids regenerating code for performing the same computation though as |
| 234 | +long as the computation is redefined with the same index variables and with the |
| 235 | +same operand and result tensors. Thus, if your application executes the same |
| 236 | +computation many times in a loop and if the computation is executed on |
| 237 | +sufficiently large data sets, TACO will naturally amortize the overhead |
| 238 | +associated with generating code for performing the computation. In such |
| 239 | +scenarios, it is acceptable to include the initial code generation overhead |
| 240 | +in the performance measurement:</p> |
| 241 | +<pre><code class="python">import pytaco as pt |
| 242 | +from pytaco import compressed, dense |
| 243 | +import numpy as np |
| 244 | +import time |
| 245 | + |
| 246 | +csr = pt.format([dense, compressed]) |
| 247 | +dv = pt.format([dense]) |
| 248 | + |
| 249 | +A = pt.read("pwtk.mtx", csr) |
| 250 | +x = pt.tensor([A.shape[1]], dv) |
| 251 | +z = pt.tensor([A.shape[0]], dv) |
| 252 | +y = pt.tensor([A.shape[0]], dv) |
| 253 | + |
| 254 | +for k in range(A.shape[1]): |
| 255 | + x.insert([k], random.random()) |
| 256 | +x.pack() |
| 257 | +for k in range(A.shape[0]): |
| 258 | + z.insert([k], random.random()) |
| 259 | +z.pack() |
| 260 | + |
| 261 | +i, j = pt.get_index_vars(2) |
| 262 | + |
| 263 | +# Benchmark the iterative SpMV computation, including overhead for |
| 264 | +# generating code in the first iteration to perform the computation |
| 265 | +start = time.perf_counter() |
| 266 | +for k in range(1000): |
| 267 | + y[i] = A[i, j] * x[j] + z[i] |
| 268 | + y.evaluate() |
| 269 | + x[i] = y[i] |
| 270 | + x.evaluate() |
| 271 | +end = time.perf_counter() |
| 272 | + |
| 273 | +print("Execution time: {0} seconds".format(end - start)) |
| 274 | +</code></pre> |
| 275 | + |
| 276 | +<div class="admonition warning"> |
| 277 | +<p class="admonition-title">Warning</p> |
| 278 | +<p>In order to avoid regenerating code for performing a computation, the |
| 279 | +computation must be redefined with the exact same index variable <em>objects</em> |
| 280 | +and also with the exact same tensor objects for operands and result. In |
| 281 | +the example above, every loop iteration redefines the computation of <code>y</code> |
| 282 | +and <code>x</code> using the same tensor and index variable objects costructed outside |
| 283 | +the loop, so TACO will only generate code to compute <code>y</code> and <code>x</code> in the |
| 284 | +first iteration. If the index variables were constructed inside the loop |
| 285 | +though, TACO would regenerate code to compute <code>y</code> and <code>x</code> in every loop |
| 286 | +iteration, and the compilation overhead would not be amortized. </p> |
| 287 | +</div> |
| 288 | +<div class="admonition note"> |
| 289 | +<p class="admonition-title">Note</p> |
| 290 | +<p>As a rough rule of thumb, if a computation takes on the order of seconds or |
| 291 | +more in total to perform across all invocations with identical operands and |
| 292 | +result (and is always redefined with identical index variables), then it is |
| 293 | +acceptable to include the overhead associated with generating code for |
| 294 | +performing the computation in performance measurements.</p> |
| 295 | +</div> |
| 296 | + |
| 297 | + </div> |
| 298 | + </div> |
| 299 | + <footer> |
| 300 | + |
| 301 | + <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| 302 | + |
| 303 | + <a href="../tensors/index.html" class="btn btn-neutral float-right" title="Defining Tensors">Next <span class="icon icon-circle-arrow-right"></span></a> |
| 304 | + |
| 305 | + |
| 306 | + <a href="../optimization/index.html" class="btn btn-neutral" title="Strategies for Optimization"><span class="icon icon-circle-arrow-left"></span> Previous</a> |
| 307 | + |
| 308 | + </div> |
| 309 | + |
| 310 | + |
| 311 | + <hr/> |
| 312 | + |
| 313 | + <div role="contentinfo"> |
| 314 | + <!-- Copyright etc --> |
| 315 | + |
| 316 | + </div> |
| 317 | + <script> |
| 318 | + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| 319 | + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| 320 | + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| 321 | + })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); |
| 322 | + ga('create', 'UA-93058524-1', 'auto'); |
| 323 | + ga('send', 'pageview'); |
| 324 | + </script> |
| 325 | + |
| 326 | + <small>Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.</small> |
| 327 | +</footer> |
| 328 | + |
| 329 | + </div> |
| 330 | + </div> |
| 331 | + |
| 332 | + </section> |
| 333 | + |
| 334 | + </div> |
| 335 | + |
| 336 | + <div class="rst-versions" role="note" style="cursor: pointer"> |
| 337 | + <span class="rst-current-version" data-toggle="rst-current-version"> |
| 338 | + |
| 339 | + |
| 340 | + <span><a href="../optimization/index.html" style="color: #fcfcfc;">« Previous</a></span> |
| 341 | + |
| 342 | + |
| 343 | + <span style="margin-left: 15px"><a href="../tensors/index.html" style="color: #fcfcfc">Next »</a></span> |
| 344 | + |
| 345 | + </span> |
| 346 | +</div> |
| 347 | + <script>var base_url = '..';</script> |
| 348 | + <script src="../js/theme.js"></script> |
| 349 | + <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script> |
| 350 | + <script src="../search/require.js"></script> |
| 351 | + <script src="../search/search.js"></script> |
| 352 | + |
| 353 | +</body> |
| 354 | +</html> |
0 commit comments