overview of this notebook...

In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import xskillscore as xs

Use the same data as in 01_Deterministic.py

In [2]:
stores = np.arange(4)
skus = np.arange(3)
dates = pd.date_range("1/1/2020", "1/5/2020", freq="D")

rows = []
for _, date in enumerate(dates):
    for _, store in enumerate(stores):
        for _, sku in enumerate(skus):
            rows.append(
                dict(
                    {
                        "DATE": date,
                        "STORE": store,
                        "SKU": sku,
                        "QUANTITY_SOLD": np.random.randint(10),
                    }
                )
            )
df = pd.DataFrame(rows)
df.rename(columns={"QUANTITY_SOLD": "y"}, inplace=True)
df.set_index(['DATE', 'STORE', 'SKU'], inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,y
DATE,STORE,SKU,Unnamed: 3_level_1
2020-01-01,0,0,6
2020-01-01,0,1,2
2020-01-01,0,2,4
2020-01-01,1,0,4
2020-01-01,1,1,3


Make multiple predictions...

Append the predictions to the df using a `member` column...

In [3]:
tmp = df.copy()
for i in range(1, 7):
    tmp['member'] = i
    noise = np.random.uniform(-1, 1, size=len(df['y']))
    tmp['yhat'] = (df['y'] + (df['y'] * noise)).astype(int)
    if i == 1:
        df_yhat = tmp.copy()
    else:
        df_yhat = df_yhat.append(tmp)
df_yhat

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,y,member,yhat
DATE,STORE,SKU,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01,0,0,6,1,3
2020-01-01,0,1,2,1,2
2020-01-01,0,2,4,1,4
2020-01-01,1,0,4,1,1
2020-01-01,1,1,3,1,2
...,...,...,...,...,...
2020-01-05,2,1,7,6,10
2020-01-05,2,2,1,6,0
2020-01-05,3,0,8,6,2
2020-01-05,3,1,4,6,7


Drop the `y` column and add `member` to the index...

In [4]:
df_yhat.drop('y', axis=1, inplace=True)
df_yhat.set_index(['member'], append=True, inplace=True)
df_yhat

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,yhat
DATE,STORE,SKU,member,Unnamed: 4_level_1
2020-01-01,0,0,1,3
2020-01-01,0,1,1,2
2020-01-01,0,2,1,4
2020-01-01,1,0,1,1
2020-01-01,1,1,1,2
...,...,...,...,...
2020-01-05,2,1,6,10
2020-01-05,2,2,6,0
2020-01-05,3,0,6,2
2020-01-05,3,1,6,7


Convert the target dataframe to an xarray.Dataset... 

In [5]:
ds = df.to_xarray()
ds

Add the predicted dataframe as an `xarray.DataArray` to the `xarray.Dataset`

In [6]:
ds['yhat'] = df_yhat.to_xarray()['yhat']
ds

# Using xskillscore - CRPS

What is CRPS...

It's useage in kaggle comps...

Apply CRPS over the member dimension... This will return the CRPS as a date, store and sku level

In [8]:
print(ds.xs.crps_ensemble('y', 'yhat', dim='member'))

<xarray.DataArray (DATE: 5, STORE: 4, SKU: 3)>
array([[[1.25      , 0.25      , 0.58333333],
        [0.41666667, 0.58333333, 1.11111111],
        [0.25      , 0.83333333, 0.02777778],
        [1.88888889, 0.91666667, 1.13888889]],

       [[0.94444444, 1.05555556, 0.41666667],
        [0.47222222, 0.27777778, 2.16666667],
        [1.33333333, 1.66666667, 0.88888889],
        [1.22222222, 0.        , 1.63888889]],

       [[0.97222222, 2.        , 2.16666667],
        [0.72222222, 1.11111111, 1.88888889],
        [0.        , 1.97222222, 0.44444444],
        [0.80555556, 1.47222222, 1.77777778]],

       [[2.11111111, 0.        , 1.33333333],
        [0.91666667, 3.72222222, 1.36111111],
        [0.88888889, 1.55555556, 0.19444444],
        [0.91666667, 0.66666667, 0.25      ]],

       [[1.38888889, 0.75      , 0.        ],
        [0.52777778, 0.36111111, 0.44444444],
        [2.13888889, 1.05555556, 0.25      ],
        [1.80555556, 0.83333333, 1.02777778]]])
Coordinates:
  * DATE  

To return an overal CRPS it is recommened averaging over all dimensions before using `crps`...

In [10]:
y = ds['y'].mean(dim=['DATE', 'STORE', 'SKU'])
print(y)
yhat = ds['yhat'].mean(dim=['DATE', 'STORE', 'SKU'])
print(yhat)
print(xs.crps_ensemble(y, yhat, dim='member'))

<xarray.DataArray 'y' ()>
array(4.65)
<xarray.DataArray 'yhat' (member: 6)>
array([4.1       , 4.36666667, 4.2       , 4.06666667, 4.33333333,
       3.83333333])
Coordinates:
  * member   (member) int64 1 2 3 4 5 6
<xarray.DataArray ()>
array(0.40092593)
