# SciPy Paper Scalability Visualizations

In [1]:
# Make imports

import geocat.comp as gc
import intake

import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import dask.distributed as dd
import dask.array as da

## Generate dummy data

In [2]:
n = 1000000
t_np = np.random.uniform(250, 320, n)
w_np = np.random.uniform(0, 20, n)/1000
p_np = np.random.uniform(20, 1000, n)*100

In [3]:
# Load line and memory profilers
%load_ext line_profiler
%load_ext memory_profiler

In [4]:
# Time comparison
time_np = %timeit gc.relhum(t_np, w_np, p_np)

35 ms ± 566 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
prun_np = %prun gc.relhum(t_np, w_np, p_np)

 

         70 function calls (66 primitive calls) in 0.040 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.032    0.032    0.039    0.039 relhum.py:184(_relhum)
        2    0.004    0.002    0.004    0.002 _methods.py:87(_clip_dep_invoke_with_casting)
        1    0.002    0.002    0.002    0.002 {method 'astype' of 'numpy.ndarray' objects}
        1    0.001    0.001    0.040    0.040 <string>:1(<module>)
        4    0.000    0.000    0.000    0.000 _methods.py:72(_clip_dep_is_scalar_nan)
        1    0.000    0.000    0.040    0.040 {built-in method builtins.exec}
        8    0.000    0.000    0.000    0.000 {built-in method numpy.array}
        2    0.000    0.000    0.004    0.002 _methods.py:106(_clip)
        4    0.000    0.000    0.000    0.000 fromnumeric.py:3075(ndim)
        1    0.000    0.000    0.039    0.039 relhum.py:6(relhum)
        4    0.000    0.000    0.000    0.000 _methods.py:82(_clip

In [6]:
t_xr = xr.DataArray(t_np)
w_xr = xr.DataArray(w_np)
p_xr = xr.DataArray(p_np)

In [7]:
# Time comparison
time_xr = %timeit gc.relhum(t_xr, w_xr, p_xr)

261 ms ± 5.46 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
prun_xr = %prun gc.relhum(t_xr, w_xr, p_xr)

 

         16436 function calls (16266 primitive calls) in 0.285 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       22    0.101    0.005    0.101    0.005 {built-in method numpy.array}
       11    0.079    0.007    0.079    0.007 {built-in method _hashlib.openssl_sha1}
      6/3    0.018    0.003    0.035    0.012 {built-in method _operator.sub}
        2    0.017    0.009    0.017    0.009 {method 'tolist' of 'numpy.ndarray' objects}
        2    0.010    0.005    0.010    0.005 {method 'searchsorted' of 'numpy.ndarray' objects}
    39/29    0.009    0.000    0.134    0.005 {built-in method numpy.core._multiarray_umath.implement_array_function}
        3    0.008    0.003    0.008    0.003 _methods.py:87(_clip_dep_invoke_with_casting)
        2    0.005    0.003    0.106    0.053 function_base.py:1152(diff)
        1    0.005    0.005    0.283    0.283 relhum.py:258(_xrelhum)
        2    0.003    0.002    0.140    0.07

In [9]:
t_chunk = t_xr.chunk(chunks='auto')
w_chunk = w_xr.chunk(chunks='auto')
p_chunk = p_xr.chunk(chunks='auto')

In [10]:
# Time comparison
time_chunk = %timeit gc.relhum(t_chunk, w_chunk, p_chunk)

13.4 ms ± 198 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [12]:
prun_chunk = %prun gc.relhum(t_chunk, w_chunk, p_chunk)

 

         25037 function calls (24687 primitive calls) in 0.021 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       59    0.001    0.000    0.001    0.000 {method 'copy' of 'dict' objects}
       16    0.001    0.000    0.001    0.000 tokenize.py:295(detect_encoding)
       23    0.001    0.000    0.013    0.001 blockwise.py:12(blockwise)
3360/3352    0.001    0.000    0.001    0.000 {built-in method builtins.isinstance}
      224    0.000    0.000    0.001    0.000 tokenize.py:429(_tokenize)
       85    0.000    0.000    0.001    0.000 utils.py:32(meta_from_array)
  670/627    0.000    0.000    0.001    0.000 functools.py:947(__get__)
       53    0.000    0.000    0.000    0.000 {method 'reduce' of 'numpy.ufunc' objects}
       18    0.000    0.000    0.014    0.001 core.py:4223(elemwise)
       23    0.000    0.000    0.001    0.000 blockwise.py:78(blockwise)
       22    0.000    0.000    0.001    0.000 core.py:3502(

In [13]:
prun_chun