## Imports

In [34]:
import pandas as pd

In [35]:
observations: pd.DataFrame = pd.read_csv("../data/Offshore_Observations.csv", index_col=0)  # time is index
observations.index = pd.to_datetime(observations.index)  # convert Index to DateTimeIndex
pred_vars = observations.keys().drop(
    ["horizon", "is_origin"])  # Index(['u10', 'v10', 'd2m', 't2m', 'msl', 'sp', 'speed'])
observations = observations[pred_vars]  # leave out "horizon" and "is_origin" from observations
observations = observations.sort_index(level=0)

In [36]:
observations.iloc[-1]

u10           2.982716
v10          -5.717480
d2m         281.803680
t2m         289.077330
msl         291.514500
sp       102389.480000
speed         6.448734
Name: 2018-08-31 18:00:00, dtype: float64

In [37]:
ensembles: pd.DataFrame = pd.read_csv("../data/Offshore_Ensembles.csv")
ensembles["time"] = pd.to_datetime(ensembles["time"], infer_datetime_format=True)  # convert time column to datetime
ensembles = ensembles.pivot(index=["horizon", "time", "number"], columns=[])  # create multiindex
ensembles = ensembles[pred_vars]  # reduce columns to necessary ones
ensembles = ensembles.sort_index(
    level=[0, 1, 2])  # sort by horizon first (irrelevant), then by date (relevant for iloc!)

In [38]:
ensembles

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,u10,v10,d2m,t2m,msl,sp,speed
horizon,time,number,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,2017-02-01,1,-3.977708,7.919396,274.65344,277.35468,101832.234,101833.09,8.862223
0,2017-02-01,2,-2.662095,8.383109,274.98750,277.06024,101817.600,101818.65,8.795639
0,2017-02-01,3,-3.467212,8.559954,274.81345,277.03150,101851.420,101852.51,9.235495
0,2017-02-01,4,-3.173303,7.742434,274.82900,277.38577,101798.590,101799.03,8.367505
0,2017-02-01,5,-3.267270,7.927633,274.84552,277.43518,101814.110,101814.95,8.574521
...,...,...,...,...,...,...,...,...,...
24,2018-09-01,46,3.305267,-4.471345,283.36807,289.00464,102446.620,102447.15,5.560370
24,2018-09-01,47,3.569658,-4.180910,283.10547,288.84120,102460.940,102461.63,5.497496
24,2018-09-01,48,4.149755,-3.274074,282.53824,289.06512,102444.984,102445.64,5.285832
24,2018-09-01,49,3.501141,-3.465200,282.36310,289.06354,102411.970,102412.65,4.926012


In [39]:
horizon =  18
ensembles = ensembles.loc[horizon]  # select horizon from data

In [40]:
ensembles

Unnamed: 0_level_0,Unnamed: 1_level_0,u10,v10,d2m,t2m,msl,sp,speed
time,number,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-02-01 18:00:00,1,-6.765342,9.689136,275.32645,277.89280,101188.310,101189.336,11.817327
2017-02-01 18:00:00,2,-6.058051,10.363215,275.58044,278.44110,101127.540,101128.220,12.004008
2017-02-01 18:00:00,3,-5.979378,10.052594,275.70944,278.53094,101151.360,101152.164,11.696478
2017-02-01 18:00:00,4,-6.375141,10.204773,275.41544,278.00250,101104.530,101105.200,12.032448
2017-02-01 18:00:00,5,-6.189095,9.715862,275.88672,278.55417,101198.830,101199.650,11.519673
...,...,...,...,...,...,...,...,...
2018-08-31 18:00:00,46,2.912539,-6.299201,282.70288,289.21503,102365.440,102366.310,6.939944
2018-08-31 18:00:00,47,3.825933,-6.051285,282.54490,289.11670,102380.945,102381.680,7.159317
2018-08-31 18:00:00,48,2.482987,-5.617123,281.98657,289.21515,102352.414,102353.330,6.141441
2018-08-31 18:00:00,49,3.287867,-5.798559,281.72733,288.95807,102364.310,102365.350,6.665835


In [42]:
ensembles.index.get_level_values(0).unique()

DatetimeIndex(['2017-02-01 18:00:00', '2017-02-02 18:00:00',
               '2017-02-03 18:00:00', '2017-02-04 18:00:00',
               '2017-02-05 18:00:00', '2017-02-06 18:00:00',
               '2017-02-07 18:00:00', '2017-02-08 18:00:00',
               '2017-02-09 18:00:00', '2017-02-10 18:00:00',
               ...
               '2018-08-22 18:00:00', '2018-08-23 18:00:00',
               '2018-08-24 18:00:00', '2018-08-25 18:00:00',
               '2018-08-26 18:00:00', '2018-08-27 18:00:00',
               '2018-08-28 18:00:00', '2018-08-29 18:00:00',
               '2018-08-30 18:00:00', '2018-08-31 18:00:00'],
              dtype='datetime64[ns]', name='time', length=577, freq=None)