### LD Matrix Numba vs Cuda

Compare 1KG LD matrix calc times on CPU and GPU

In [1]:
from lib import api
import xarray as xr
import numpy as np
%run ../nb/paths.py

In [None]:
path = PLINK_1KG_PATH_02
path

In [None]:
%%time
ds = api.read_plink(path, chunks='auto', fam_sep=' ', bim_sep='\t', lock=False)
ds = ds.sel(ds.contig == 1)
ds

In [4]:
# Set intervals for LD calculation (standard parameterization is 1000kbp window)
intervals = api.axis_intervals(ds, window=1_000_000, unit='physical', target_chunk_size=100_000, backend='numba')

In [5]:
# Average chunk size
(int(intervals[1].pipe(lambda a: a.sel(var='max_stop') - a.sel(var='min_start')).mean().values), ds.dims['sample'])

(86922, 629)

In [6]:
%%time
ldm = api.ld_matrix(ds, intervals=intervals, threshold=0.2, backend='dask/numba', preallocate=False)
ldm.count().compute(scheduler='threads', num_workers=8, threads_per_worker=1)

CPU times: user 4h 20min 2s, sys: 45.3 s, total: 4h 20min 47s
Wall time: 35min 34s


i        371826613
j        371826613
value    371826613
dtype: int64

In [9]:
%%time
ldm = api.ld_matrix(ds, intervals=intervals, threshold=None, backend='dask/numba')
ldm.count().compute(scheduler='threads', num_workers=8, threads_per_worker=1)

CPU times: user 4h 8min 36s, sys: 1min 40s, total: 4h 10min 17s
Wall time: 33min 19s


i        13414996395
j        13414996395
value    13414996395
dtype: int64

In [19]:
%%time
ldm = api.ld_matrix(ds, intervals=intervals, threshold=None, backend='dask/cuda')
ldm.count().compute(scheduler='single-threaded')

CPU times: user 23min 35s, sys: 1min 34s, total: 25min 10s
Wall time: 25min 7s


i        13414996395
j        13414996395
value    13414996395
dtype: int64

In [7]:
%%time
ldm = api.ld_matrix(ds, intervals=intervals, threshold=0.2, backend='dask/cuda')
ldm.count().compute(scheduler='single-threaded')

CPU times: user 22min 10s, sys: 31.8 s, total: 22min 41s
Wall time: 22min 39s


i        371826627
j        371826627
value    371826627
dtype: int64