# Preload precipitation files

Since loading from the original COSMO GRIB files takes quite a long time, I will try to preload them in a separate file. This should hopefully speed up my analysis.

In [6]:
# Imports
from helpers import *
from config import *
import bcolz
import numpy as np
import datetime as dt
from cosmo_utils.helpers import ddhhmmss
import timeit

In [8]:
# Let's try for one experiment for the deterministic forecasts first
exp_id = 'DA_REF'
fclt = 24   # Forecast lead time
date = '20160529000000'

In [10]:
data_dir = datadir + exp_id + ''
time_start = timeit.default_timer()
for h in range(1, fclt +1):
    hstr = ddhhmmss(dt.timedelta(hours=h))
    p = load_det(data_dir, date, hstr, return_array=True);
    if h == 1:  # Allocate output array
        prec = np.empty((fclt, p.shape[0], p.shape[1]))
    prec[h - 1] = p
time_stop = timeit.default_timer()
print('Time: %.2f s' % (time_stop - time_start))

initial path:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529000000/det/lfff00010000_15
1h earlier:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529000000/det/lfff00000000_15
initial path:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529000000/det/lfff00020000_15
1h earlier:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529000000/det/lfff00010000_15
initial path:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529000000/det/lfff00030000_15
1h earlier:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529000000/det/lfff00020000_15
initial path:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529000000/det/lfff00040000_15
1h earlier:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529000000/det/lfff00030000_15
initial path:  /project/meteo/w2w/A6/S.Rasp/kenda_psp_data/data_forecast/DA_REF/20160529

Loading the data initially takes 8.6s, then if it's loaded again it takes 4.8s

In [11]:
prec.shape

(24, 461, 421)

In [12]:
# From fast.ai deeplearning course 1 in utils.py
def save_array(fname, arr):
    c=bcolz.carray(arr, rootdir=fname, mode='w')
    c.flush()


def load_array(fname):
    return bcolz.open(fname)[:]

In [13]:
fn = '/project/meteo/w2w/A6/S.Rasp/tmp/test.bc'
save_array(fn, prec)

In [16]:
!du -h -d1 /project/meteo/w2w/A6/S.Rasp/tmp/test.bc

3.3M	/project/meteo/w2w/A6/S.Rasp/tmp/test.bc/data
950K	/project/meteo/w2w/A6/S.Rasp/tmp/test.bc/meta
4.3M	/project/meteo/w2w/A6/S.Rasp/tmp/test.bc


In [18]:
time_start = timeit.default_timer()
prec = load_array(fn)
time_stop = timeit.default_timer()
print('Time: %.2f s' % (time_stop - time_start))

Time: 0.17 s


Now let's do the same for an ensemble

In [19]:
exp_id = 'DA_REF_ens'

In [21]:
data_dir = datadir + exp_id + ''
time_start = timeit.default_timer()
for h in range(1, fclt +1):
    hstr = ddhhmmss(dt.timedelta(hours=h))
    p = np.array(load_ens(data_dir, date, hstr, return_array=True))
    if h == 1:  # Allocate output array
        prec = np.empty((fclt, p.shape[0], p.shape[1], p.shape[2]))
    prec[h - 1] = p
time_stop = timeit.default_timer()
print('Time: %.2f s' % (time_stop - time_start))

read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
reading took 0:00:03.546789
read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
reading took 0:00:04.124560
read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
reading took 0:00:03.958489
read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
reading took 0:00:03.943129
read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
reading took 0:00:03.952523
read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
reading took 0:00:04.052176
read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
reading took 0:00:04.124520
read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
reading took 0:00:04.058961
read to tmp-file /tmp/user/13098/tmp9okG27fieldobj_list_array_met-ws-720e07_10156.npy
re

In [22]:
prec.shape

(24, 20, 461, 421)

In [23]:
fn = '/project/meteo/w2w/A6/S.Rasp/tmp/test_ens.bc'
save_array(fn, prec)

In [24]:
!du -h -d1 /project/meteo/w2w/A6/S.Rasp/tmp/test_ens.bc

70M	/project/meteo/w2w/A6/S.Rasp/tmp/test_ens.bc/data
19M	/project/meteo/w2w/A6/S.Rasp/tmp/test_ens.bc/meta
88M	/project/meteo/w2w/A6/S.Rasp/tmp/test_ens.bc


In [25]:
time_start = timeit.default_timer()
prec = load_array(fn)
time_stop = timeit.default_timer()
print('Time: %.2f s' % (time_stop - time_start))

Time: 1.18 s


99s to 1s. That should speed up things quite considerably.

In [26]:
np.save('/project/meteo/w2w/A6/S.Rasp/tmp/test_ens.npy', prec)

In [28]:
time_start = timeit.default_timer()
prec = np.load('/project/meteo/w2w/A6/S.Rasp/tmp/test_ens.npy')
time_stop = timeit.default_timer()
print('Time: %.2f s' % (time_stop - time_start))

Time: 0.27 s
