In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
from pathlib import Path
from sgkit_bgen import read_bgen
from bokeh.io import output_notebook, export_png
from dask.diagnostics import ResourceProfiler, ProgressBar
%load_ext autoreload
%autoreload 2
output_notebook()

In [2]:
import sys
sys.path.insert(0,'../scripts')
from convert import BGENPaths, Contig, load_bgen, load_bgen_samples, load_bgen_dosage, save_dataset

In [3]:
paths = BGENPaths(
    bgen_path='/home/eczech/data/rs-ukb-local/bgen/ukb_imp_chrXY_v3.bgen',
    variants_path='/home/eczech/data/rs-ukb-local/bgen/ukb_mfi_chrXY_v3.txt',
    samples_path='/home/eczech/data/rs-ukb-local/bgen/ukb59384_imp_chrXY_v3_s486331.sample',
)

In [4]:
n_bytes = 536870912 # 512MiB
#n_bytes = 268435456 # 256MiB
n_variants = 1024
n_samples = (n_bytes // 4) // n_variants
n_variants, n_samples

(1024, 131072)

In [5]:
paths = BGENPaths(
    bgen_path='/home/eczech/data/rs-ukb-local/bgen/ukb_imp_chrXY_v3.bgen',
    variants_path='/home/eczech/data/rs-ukb-local/bgen/ukb_mfi_chrXY_v3.txt',
    samples_path='/home/eczech/data/rs-ukb-local/bgen/ukb59384_imp_chrXY_v3_s486331.sample',
)
contig = Contig(index=25, name='XY')
ds = load_bgen(paths, contig, chunks=(n_variants, n_samples))
ds

Unnamed: 0,Array,Chunk
Bytes,89.32 GB,536.87 MB
Shape,"(45906, 486443)","(1024, 131072)"
Count,181 Tasks,180 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 89.32 GB 536.87 MB Shape (45906, 486443) (1024, 131072) Count 181 Tasks 180 Chunks Type float32 numpy.ndarray",486443  45906,

Unnamed: 0,Array,Chunk
Bytes,89.32 GB,536.87 MB
Shape,"(45906, 486443)","(1024, 131072)"
Count,181 Tasks,180 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,22.33 GB,134.22 MB
Shape,"(45906, 486443)","(1024, 131072)"
Count,361 Tasks,180 Chunks
Type,bool,numpy.ndarray
"Array Chunk Bytes 22.33 GB 134.22 MB Shape (45906, 486443) (1024, 131072) Count 361 Tasks 180 Chunks Type bool numpy.ndarray",486443  45906,

Unnamed: 0,Array,Chunk
Bytes,22.33 GB,134.22 MB
Shape,"(45906, 486443)","(1024, 131072)"
Count,361 Tasks,180 Chunks
Type,bool,numpy.ndarray


In [6]:
ds['call_dosage'][:3, :10].compute()

In [7]:
%%time
with ProgressBar(), ResourceProfiler() as prof:
    m = ds['call_dosage'][:2500,:].mean(dim='samples').compute(scheduler='processes')

[########################################] | 100% Completed |  2min 18.6s
CPU times: user 43.3 s, sys: 1.11 s, total: 44.4 s
Wall time: 2min 19s


In [9]:
pd.Series(m).describe()

count    2500.000000
mean        0.115621
std         0.320614
min         0.000021
25%         0.000411
50%         0.001762
75%         0.011835
max         1.912868
dtype: float64

In [10]:
dss = ds.sel(variants=np.s_[:2500])
dss

Unnamed: 0,Array,Chunk
Bytes,4.86 GB,536.87 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,193 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.86 GB 536.87 MB Shape (2500, 486443) (1024, 131072) Count 193 Tasks 12 Chunks Type float32 numpy.ndarray",486443  2500,

Unnamed: 0,Array,Chunk
Bytes,4.86 GB,536.87 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,193 Tasks,12 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.22 GB,134.22 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,373 Tasks,12 Chunks
Type,bool,numpy.ndarray
"Array Chunk Bytes 1.22 GB 134.22 MB Shape (2500, 486443) (1024, 131072) Count 373 Tasks 12 Chunks Type bool numpy.ndarray",486443  2500,

Unnamed: 0,Array,Chunk
Bytes,1.22 GB,134.22 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,373 Tasks,12 Chunks
Type,bool,numpy.ndarray


### Test Save Locally

In [13]:
%%time
with ProgressBar(), ResourceProfiler() as prof:
    save_dataset('/tmp/test.zarr', dss, contig, scheduler='processes', remote=False)

2020-08-18 23:50:48,652 | convert | INFO | Dataset for contig Contig(name='XY', index=25):
<xarray.Dataset>
Dimensions:               (samples: 486443, variants: 2500)
Dimensions without coordinates: samples, variants
Data variables:
    variant_id            (variants) object 'X:60014_T_C' ... 'X:293124_A_G'
    variant_rsid          (variants) object 'rs370048753' ... 'rs28621836'
    variant_position      (variants) int32 60014 60014 60017 ... 293106 293124
    variant_allele1_ref   (variants) object 'T' 'T' 'C' 'G' ... 'G' 'C' 'G' 'A'
    variant_allele2_alt   (variants) object 'C' 'G' 'T' 'C' ... 'C' 'T' 'A' 'G'
    variant_maf           (variants) float64 0.0003958 0.0005032 ... 0.0008407
    variant_minor_allele  (variants) object 'C' 'G' 'T' 'C' ... 'C' 'T' 'A' 'G'
    variant_info          (variants) float64 0.7276 0.6867 ... 0.3437 0.8284
    sample_id1            (samples) int32 4476413 3205773 ... 2850971 4315851
    sample_id2            (samples) int32 4476413 3205773 ...

In [14]:
plot = prof.visualize()
plot

In [16]:
export_png(plot, filename="bgen_to_zarr_XY_2500.png")

In [17]:
dsr = xr.open_zarr('/tmp/test.zarr')
dsr

Unnamed: 0,Array,Chunk
Bytes,4.86 GB,536.87 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.86 GB 536.87 MB Shape (2500, 486443) (1024, 131072) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",486443  2500,

Unnamed: 0,Array,Chunk
Bytes,4.86 GB,536.87 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.22 GB,134.22 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,13 Tasks,12 Chunks
Type,bool,numpy.ndarray
"Array Chunk Bytes 1.22 GB 134.22 MB Shape (2500, 486443) (1024, 131072) Count 13 Tasks 12 Chunks Type bool numpy.ndarray",486443  2500,

Unnamed: 0,Array,Chunk
Bytes,1.22 GB,134.22 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,13 Tasks,12 Chunks
Type,bool,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 MB,243.22 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 1.95 MB 243.22 kB Shape (486443,) (60806,) Count 9 Tasks 8 Chunks Type int32 numpy.ndarray",486443  1,

Unnamed: 0,Array,Chunk
Bytes,1.95 MB,243.22 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 MB,243.22 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 1.95 MB 243.22 kB Shape (486443,) (60806,) Count 9 Tasks 8 Chunks Type int32 numpy.ndarray",486443  1,

Unnamed: 0,Array,Chunk
Bytes,1.95 MB,243.22 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.89 MB,486.45 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 3.89 MB 486.45 kB Shape (486443,) (60806,) Count 9 Tasks 8 Chunks Type object numpy.ndarray",486443  1,

Unnamed: 0,Array,Chunk
Bytes,3.89 MB,486.45 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.89 MB,486.45 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 3.89 MB 486.45 kB Shape (486443,) (60806,) Count 9 Tasks 8 Chunks Type object numpy.ndarray",486443  1,

Unnamed: 0,Array,Chunk
Bytes,3.89 MB,486.45 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.00 kB,5.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 5.00 kB 5.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type int16 numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,5.00 kB,5.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,40.00 kB,40.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,numpy.ndarray,
"Array Chunk Bytes 40.00 kB 40.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,40.00 kB,40.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,numpy.ndarray,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.00 kB,10.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 10.00 kB 10.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type int32 numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,10.00 kB,10.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray


In [18]:
%%time
with ProgressBar(), ResourceProfiler() as prof:
    m = dsr['call_dosage'][:2500,:].mean(dim='samples').compute(scheduler='processes')

[########################################] | 100% Completed |  9.0s
CPU times: user 232 ms, sys: 146 ms, total: 377 ms
Wall time: 10.1 s


In [20]:
pd.Series(m).describe()

count    2500.000000
mean        0.115621
std         0.320614
min         0.000021
25%         0.000411
50%         0.001762
75%         0.011835
max         1.912868
dtype: float64

### Test Save Remotely

In [25]:
%%time
with ProgressBar(), ResourceProfiler() as prof:
    save_dataset('gs://rs-ukb/prep-data/test.zarr', dss, contig, scheduler='processes', remote=True)

2020-08-19 00:02:10,341 | convert | INFO | Dataset for contig Contig(name='XY', index=25):
<xarray.Dataset>
Dimensions:               (samples: 486443, variants: 2500)
Dimensions without coordinates: samples, variants
Data variables:
    variant_id            (variants) object 'X:60014_T_C' ... 'X:293124_A_G'
    variant_rsid          (variants) object 'rs370048753' ... 'rs28621836'
    variant_position      (variants) int32 60014 60014 60017 ... 293106 293124
    variant_allele1_ref   (variants) object 'T' 'T' 'C' 'G' ... 'G' 'C' 'G' 'A'
    variant_allele2_alt   (variants) object 'C' 'G' 'T' 'C' ... 'C' 'T' 'A' 'G'
    variant_maf           (variants) float64 0.0003958 0.0005032 ... 0.0008407
    variant_minor_allele  (variants) object 'C' 'G' 'T' 'C' ... 'C' 'T' 'A' 'G'
    variant_info          (variants) float64 0.7276 0.6867 ... 0.3437 0.8284
    sample_id1            (samples) int32 4476413 3205773 ... 2850971 4315851
    sample_id2            (samples) int32 4476413 3205773 ...

In [26]:
import gcsfs
gcs = gcsfs.GCSFileSystem()
store = gcsfs.GCSMap('gs://rs-ukb/prep-data/test.zarr', gcs=gcs, check=False, create=False)
dsr = xr.open_zarr(store)
dsr

Unnamed: 0,Array,Chunk
Bytes,4.86 GB,536.87 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 4.86 GB 536.87 MB Shape (2500, 486443) (1024, 131072) Count 13 Tasks 12 Chunks Type float32 numpy.ndarray",486443  2500,

Unnamed: 0,Array,Chunk
Bytes,4.86 GB,536.87 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,13 Tasks,12 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.22 GB,134.22 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,13 Tasks,12 Chunks
Type,bool,numpy.ndarray
"Array Chunk Bytes 1.22 GB 134.22 MB Shape (2500, 486443) (1024, 131072) Count 13 Tasks 12 Chunks Type bool numpy.ndarray",486443  2500,

Unnamed: 0,Array,Chunk
Bytes,1.22 GB,134.22 MB
Shape,"(2500, 486443)","(1024, 131072)"
Count,13 Tasks,12 Chunks
Type,bool,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 MB,243.22 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 1.95 MB 243.22 kB Shape (486443,) (60806,) Count 9 Tasks 8 Chunks Type int32 numpy.ndarray",486443  1,

Unnamed: 0,Array,Chunk
Bytes,1.95 MB,243.22 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.95 MB,243.22 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 1.95 MB 243.22 kB Shape (486443,) (60806,) Count 9 Tasks 8 Chunks Type int32 numpy.ndarray",486443  1,

Unnamed: 0,Array,Chunk
Bytes,1.95 MB,243.22 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.89 MB,486.45 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 3.89 MB 486.45 kB Shape (486443,) (60806,) Count 9 Tasks 8 Chunks Type object numpy.ndarray",486443  1,

Unnamed: 0,Array,Chunk
Bytes,3.89 MB,486.45 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.89 MB,486.45 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 3.89 MB 486.45 kB Shape (486443,) (60806,) Count 9 Tasks 8 Chunks Type object numpy.ndarray",486443  1,

Unnamed: 0,Array,Chunk
Bytes,3.89 MB,486.45 kB
Shape,"(486443,)","(60806,)"
Count,9 Tasks,8 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,5.00 kB,5.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,int16,numpy.ndarray
"Array Chunk Bytes 5.00 kB 5.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type int16 numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,5.00 kB,5.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,int16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,40.00 kB,40.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,numpy.ndarray,
"Array Chunk Bytes 40.00 kB 40.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,40.00 kB,40.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,numpy.ndarray,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.00 kB,10.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 10.00 kB 10.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type int32 numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,10.00 kB,10.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 20.00 kB 20.00 kB Shape (2500,) (2500,) Count 2 Tasks 1 Chunks Type object numpy.ndarray",2500  1,

Unnamed: 0,Array,Chunk
Bytes,20.00 kB,20.00 kB
Shape,"(2500,)","(2500,)"
Count,2 Tasks,1 Chunks
Type,object,numpy.ndarray


In [27]:
%%time
with ProgressBar(), ResourceProfiler() as prof:
    m = dsr['call_dosage'][:2500,:].mean(dim='samples').compute(scheduler='processes')

[########################################] | 100% Completed | 10.1s
CPU times: user 220 ms, sys: 142 ms, total: 362 ms
Wall time: 11.1 s


In [28]:
pd.Series(m).describe()

count    2500.000000
mean        0.115621
std         0.320614
min         0.000021
25%         0.000411
50%         0.001762
75%         0.011835
max         1.912868
dtype: float64