# SciPy Paper Scalability Visualizations

In [1]:
# Make imports

import geocat.comp as gc
import intake

import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import dask.distributed as dd
import dask.array as da

## Generate dummy data

In [2]:
n = 1000000
t_np = np.random.uniform(250, 320, n)
w_np = np.random.uniform(0, 20, n)/1000
p_np = np.random.uniform(20, 1000, n)*100

In [3]:
# Load line and memory profilers
%load_ext line_profiler
%load_ext memory_profiler

In [4]:
# Time comparison
time_np = %timeit -r 20 -n 20 - gc.relhum(t_np, w_np, p_np)

38.4 ms ± 2.13 ms per loop (mean ± std. dev. of 20 runs, 20 loops each)


In [5]:
prun_np = %prun gc.relhum(t_np, w_np, p_np)

 

         70 function calls (66 primitive calls) in 0.040 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.033    0.033    0.039    0.039 relhum.py:184(_relhum)
        2    0.004    0.002    0.004    0.002 _methods.py:87(_clip_dep_invoke_with_casting)
        1    0.002    0.002    0.002    0.002 {method 'astype' of 'numpy.ndarray' objects}
        1    0.001    0.001    0.040    0.040 <string>:1(<module>)
        4    0.000    0.000    0.000    0.000 _methods.py:72(_clip_dep_is_scalar_nan)
        1    0.000    0.000    0.040    0.040 {built-in method builtins.exec}
        8    0.000    0.000    0.000    0.000 {built-in method numpy.array}
        4    0.000    0.000    0.000    0.000 fromnumeric.py:3075(ndim)
        2    0.000    0.000    0.004    0.002 _methods.py:106(_clip)
        1    0.000    0.000    0.039    0.039 relhum.py:6(relhum)
        4    0.000    0.000    0.000    0.000 _methods.py:82(_clip

In [6]:
t_xr = xr.DataArray(t_np)
w_xr = xr.DataArray(w_np)
p_xr = xr.DataArray(p_np)

In [7]:
# Time comparison
time_xr = %timeit -r 20 -n 20 gc.relhum(t_xr, w_xr, p_xr)

261 ms ± 6.9 ms per loop (mean ± std. dev. of 20 runs, 20 loops each)


In [8]:
prun_xr = %prun gc.relhum(t_xr, w_xr, p_xr)

 

         16436 function calls (16266 primitive calls) in 0.276 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       22    0.106    0.005    0.106    0.005 {built-in method numpy.array}
       11    0.083    0.008    0.083    0.008 {built-in method _hashlib.openssl_sha1}
        2    0.017    0.009    0.017    0.009 {method 'tolist' of 'numpy.ndarray' objects}
        2    0.010    0.005    0.010    0.005 {method 'searchsorted' of 'numpy.ndarray' objects}
    39/29    0.009    0.000    0.135    0.005 {built-in method numpy.core._multiarray_umath.implement_array_function}
        1    0.006    0.006    0.006    0.006 {method 'astype' of 'numpy.ndarray' objects}
        2    0.006    0.003    0.112    0.056 function_base.py:1152(diff)
        1    0.005    0.005    0.274    0.274 relhum.py:258(_xrelhum)
        2    0.003    0.002    0.147    0.073 slicing.py:577(take)
        4    0.003    0.001    0.013    0.003 computatio

In [9]:
t_chunk = t_xr.chunk(chunks='auto')
w_chunk = w_xr.chunk(chunks='auto')
p_chunk = p_xr.chunk(chunks='auto')

In [10]:
# Time comparison
time_chunk = %timeit -r 20 -n 20 gc.relhum(t_chunk, w_chunk, p_chunk)

13.5 ms ± 370 µs per loop (mean ± std. dev. of 20 runs, 20 loops each)


In [11]:
prun_chunk = %prun gc.relhum(t_chunk, w_chunk, p_chunk)

 

         25029 function calls (24679 primitive calls) in 0.026 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.004    0.004    0.005    0.005 common.py:1437(astype)
       55    0.001    0.000    0.001    0.000 {method 'copy' of 'dict' objects}
       53    0.001    0.000    0.001    0.000 {method 'reduce' of 'numpy.ufunc' objects}
       48    0.001    0.000    0.001    0.000 inspect.py:2045(p)
3360/3352    0.001    0.000    0.001    0.000 {built-in method builtins.isinstance}
       23    0.001    0.000    0.013    0.001 blockwise.py:12(blockwise)
      224    0.001    0.000    0.001    0.000 tokenize.py:429(_tokenize)
       85    0.000    0.000    0.001    0.000 utils.py:32(meta_from_array)
  670/627    0.000    0.000    0.001    0.000 functools.py:947(__get__)
       18    0.000    0.000    0.015    0.001 core.py:4223(elemwise)
       23    0.000    0.000    0.001    0.000 blockwise.py:78(blockwise)
    

In [12]:
perf = np.loadtxt('./time.txt')

OSError: ./time.txt not found.