In [None]:
import numpy as np
import xarray as xr
from dask.distributed import Client,LocalCluster
import matplotlib.pyplot as plt

In [None]:
path_to_data = '/path/to/our/shared/datasets/folder/processed_data/'

# making linear regression go fast

method: using dask distributed scheduler, xr.polyfit, and xr.map_blocks

if you don't use distributed (client,cluster) and instead use default dask, this same thing takes 16 minutes

only remaining issue: I cannot for my life suppress xarray polyfit rank warnings

<br><br>
general rules for fast linear reg comp that I've found in my testing
1) always use dask distributed client,cluster
2) use built in functions that do the math for you as much as possible
3) compute linear reg in a way that can be vectorized, don't ever compute a single cell at a time

In [None]:
nworkers=20 # there are 2 threads per core so max we can pick for LocalCluster is 2x40=80
cluster=LocalCluster(n_workers=nworkers,threads_per_worker=1) # a cluster where each thread is a separate process or "worker"
client=Client(cluster)  # connect to your compute cluster
client.wait_for_workers(n_workers=nworkers,timeout=10) # wait up to 10s for the cluster to be fully ready, error if not ready in 10s
client # print info

In [None]:
# function to call with map_blocks
# operates on xarray chunks and returns xarray chunks
def polyfit_parallel(data_chunk,skipna):
    data_chunk.coords['datetime']=data_chunk.time
    data_chunk.coords['time']=data_chunk.datetime.dt.year
    result_chunk = data_chunk.polyfit('time',1,skipna=skipna)
    return result_chunk.polyfit_coefficients.sel(degree=1).drop_vars('degree')

In [None]:
%%time
# lazy load wealth to chunked array (not in memory)
print('lazy load data')
# this chunking results in wealth with 80 chunks, each about 75MB in size, not in memory yet
wealth = xr.open_dataset(path_to_data+'wealth.nc',chunks={'time':-1,'y':1000,'x':1000}).wealth_pc

# set up a chunked array template that has the exact dims and coords as the output of function polyfit_parallel will have
print('create output template array')
template=wealth.isel(time=0).drop_vars(['time','spatial_ref'])
template.attrs={'standard_name':'wealth_pc trend'} # put whatever attributes you want

# do the parallel compute
print('execute')
wealth_trend=wealth.map_blocks(polyfit_parallel,template=template,kwargs={'skipna':'True'}).compute()
wealth_trend

In [None]:
client.shutdown()

I would think that this will also work with apply_ufunc and may be even faster since that deals with numpy arrays instead of labeled xr arrays.

In [None]:
wealth_trend.min().item(),wealth_trend.max().item()

In [None]:
wealth_trend.plot()

Is this the expected result? Most trend values are very close to zero