We can create a store of all the data so that we can quickly access it. This took about half an hour to run on my machine.

In [1]:
import pandas as pd

In [6]:
city = 'STLOUIS'
store = pd.HDFStore('{c}/store.h5'.format(c=city))
months={4:'April', 5:'May', 6:'June', 7:'July',8:'August', 9:'September'}

for m in months.keys():
    for y in range(2002, 2016):
        path = '../../data/{c}/BOX/'.format(c=city)
        def dateparse(Y, m, d, H, M):
            d = pd.datetime(int(Y), int(m), int(d), int(H), int(M))
            return d

        df = pd.read_csv(path+'StLouis_box_radar_{yyyy}_{mm:02d}.csv'.format(yyyy=y, mm=m),
                         header=None, sep = ',', na_values = '-99',
                         parse_dates={'date_time': [0,1,2,3,4]},
                         date_parser=dateparse, index_col=[0])
        df.columns = range(0,19600)
        store['{c}_{yyyy}_{mm:02d}'.format(c=city, yyyy=y, mm=m)] = df
store.close()

We can also store computed values that we feel we will be accessing a lot. This one takes a while too. 

In [3]:
from radar import Radar

In [None]:
months={4:'April', 5:'May', 6:'June', 7:'July',8:'August', 9:'September'}
monthly_mean = {}

for m in months.keys():
    to_stack = []
    for y in range(2001, 2016):
        c = Radar('Charlotte', t=pd.datetime(y, m, 4), how='hdf5')
        to_stack.append(np.nanmean(c.box, axis=0)*4)
    monthly_mean.update({m: np.nanmean(np.stack(to_stack), axis=0)})

In [None]:
computed = pd.HDFStore('computed.h5')
monthly_mean_df = pd.DataFrame([v.flatten() for v in monthly_mean.values()], index=monthly_mean.keys())
computed['monthly_mean_df'] = monthly_mean_df
computed.close()

Once we have the store of all the data it is pretty quick to make this other store of the big events. Now we will be able to load really quickly.

In [8]:
city = 'CHARLOTTE'

In [9]:
TOP50 = pd.read_csv('{c}/TOP50_events.csv'.format(c=city), parse_dates=[0,1])

In [10]:
for n in range(50):
    top = pd.HDFStore('{c}/TOP50.h5'.format(c=city))
    store = pd.HDFStore('{c}/store.h5'.format(c=city))
    y = TOP50.t_begin[n].year
    m = TOP50.t_begin[n].month
    d = TOP50.t_begin[n].day
    fname = '{c}_{yyyy}_{mm:02d}'.format(c=city, yyyy=y, mm=m)
    df = store[fname]
    top['storm_{yyyy}_{mm:02d}_{dd:02d}'.format(yyyy=y, mm=m, dd=d)] = df[TOP50.t_begin[n]:TOP50.t_end[n]]
    top.close()
    store.close()

Make the nice peak time data

In [None]:
# Peak time of exceedance
peak_time={2:np.zeros([96,140,140], dtype=np.int32),
           10:np.zeros([96,140,140], dtype=np.int32),
           25:np.zeros([96,140,140], dtype=np.int32),
           50:np.zeros([96,140,140], dtype=np.int32)}

for n in range(50):
    c = Radar(city, TOP50.t_begin[n], how='hdf5', store='{c}/TOP50.h5', make_rate=4)
    for thresh in [2, 10, 25, 50]:
        b = c.box>thresh
        for ix in range(0,140):
            for iy in range(0,140):
                q = c.time[b[:,iy,ix]].hour*60+c.time[b[:,iy,ix]].minute
                for i, t in enumerate(range(0, 60*24, 15)):
                    peak_time[thresh][i,iy,ix]+=np.sum(q==t)
computed = pd.HDFStore('{c}/computed.h5'.format(c=city))
for thresh in [2,10,25,50]:
    p = pd.Panel(peak_time[thresh])
    computed['peak_time_TOP50_{thresh}'.format(thresh=thresh)] = p
computed.close()