# Benchmarks

This documents collects some benchmarks of code improvements.

In [1]:
import skgstat as skg
import numpy as np

In [2]:
an_field = np.loadtxt('./docs/data/aniso_x2.txt')

np.random.seed(42)
c = np.random.randint(an_field.shape[0], size=(3000, 2))
v = np.array([an_field[p[0], p[1]] for p in c])

an_field.shape

(100, 100)

## `Variogram._experimental`

removing the loop in `0.3.6`

### Prepare Data

In [3]:
Vsm = skg.Variogram(c[:100,:], v[:100], n_lags=30)
Vlg = skg.Variogram(c, v, n_lags=30)
print(Vsm)
print(Vlg)

spherical Variogram
-------------------
Estimator:         matheron
        Effective Range:   38.82
        Sill:              0.79
        Nugget:            0.00
        
spherical Variogram
-------------------
Estimator:         matheron
        Effective Range:   60.13
        Sill:              1.15
        Nugget:            0.00
        


### define the old and new function

In [4]:
from joblib import Parallel, delayed

def _new(self):

    if self._estimator.__name__ == 'entropy':
        bins = np.linspace(
            np.min(self.distance),
            np.max(self.distance),
            50 
        )
        def mapper(lag_values):
            return self._estimator(lag_values, bins=bins)
    else:
        mapper =  self._estimator

    return np.fromiter(map(mapper, self.lag_classes()), dtype=np.float)

def _old(self):
    y = np.zeros(len(self.bins), dtype=np.float64)

    if self._estimator.__name__ == 'entropy':
        bins = np.linspace(
            np.min(self.distance),
            np.max(self.distance),
            50
        )
        # apply
        for i, lag_values in enumerate(self.lag_classes()):
            y[i] = self._estimator(lag_values, bins=bins)

    # default
    else:
        for i, lag_values in enumerate(self.lag_classes()):
            y[i] = self._estimator(lag_values)

    # apply
    return y.copy()

def _parallel(self):
    if self._estimator.__name__ == 'entropy':
        bins = np.linspace(
            np.min(self.distance),
            np.max(self.distance),
            50 
        )
        model = lambda lags: self._estimator(lags, bins=bis)
    else:
        model = self._estimator
    
    return Parallel(n_jobs=8)(delayed(self._estimator)(lags) for lags in self.lag_classes())

### Results

In [5]:
Vsm.set_estimator('cressie')
Vlg.set_estimator('cressie')

In [6]:
%timeit _old(Vsm)

248 µs ± 8.05 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [7]:
%timeit _new(Vsm)

238 µs ± 1.37 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [8]:
%timeit _parallel(Vsm)

17.4 ms ± 3.82 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%timeit _old(Vlg)

205 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%timeit _new(Vlg)

199 ms ± 1.28 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%timeit _parallel(Vlg)

385 ms ± 5.03 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


It's not really much, but the code looks much better. Parallelization doesn't effect anything here.