# Speeding things up with caching

In [79]:
%%time

from creme import datasets

X_y = datasets.ToulouseBikes()

for x, y in X_y:
    pass

CPU times: user 2.56 s, sys: 11.8 ms, total: 2.57 s
Wall time: 2.57 s


In [80]:
%%time

from creme import stream

cache = stream.Cache(directory='.')

for x, y in cache(X_y, key='bikes'):
    pass

CPU times: user 3.71 s, sys: 129 ms, total: 3.83 s
Wall time: 4.07 s


In [81]:
%%time

from creme import stream

cache = stream.Cache(directory='.')

for x, y in cache(X_y, key='bikes'):
    pass

CPU times: user 586 ms, sys: 36.1 ms, total: 622 ms
Wall time: 621 ms


In [82]:
cache

.
bikes - 24.5MiB

In [83]:
cache.clear('bikes')
cache

.

In [48]:
import datetime as dt
from creme import compose
from creme import datasets
from creme import feature_extraction
from creme import linear_model
from creme import metrics
from creme import preprocessing
from creme import stats

X_y = datasets.ToulouseBikes()

def add_hour(x):
    x['hour'] = x['moment'].hour
    return x

def make_model():
    model = compose.Whitelister('clouds', 'humidity', 'pressure', 'temperature', 'wind')
    model += (
        add_hour |
        feature_extraction.TargetAgg(by=['station', 'hour'], how=stats.Mean())
    )
    model += feature_extraction.TargetAgg(by='station', how=stats.EWMean(0.5))
    model |= preprocessing.StandardScaler()
    model |= linear_model.LinearRegression()
    return model

In [50]:
%%time

from creme import model_selection

model_selection.progressive_val_score(
    X_y=X_y,
    model=make_model(),
    metric=metrics.MAE(),
    on='moment',
    delay=dt.timedelta(minutes=30),
    print_every=30_000
)

[30,000] MAE: 2.230049
[60,000] MAE: 2.290409
[90,000] MAE: 2.334638
[120,000] MAE: 2.315149
[150,000] MAE: 2.319982
[180,000] MAE: 2.335385
CPU times: user 22.6 s, sys: 59.7 ms, total: 22.6 s
Wall time: 22.7 s


MAE: 2.338837

In [53]:
%%time

from creme import stream

cache = stream.Cache()

model_selection.progressive_val_score(
    X_y=cache(X_y, key='bikes'),
    model=make_model(),
    metric=metrics.MAE(),
    on='moment',
    delay=dt.timedelta(minutes=30),
    print_every=30_000
)

[30,000] MAE: 2.230049
[60,000] MAE: 2.290409
[90,000] MAE: 2.334638
[120,000] MAE: 2.315149
[150,000] MAE: 2.319982
[180,000] MAE: 2.335385
CPU times: user 24.8 s, sys: 273 ms, total: 25.1 s
Wall time: 25.1 s


MAE: 2.338837

In [54]:
%%time

model_selection.progressive_val_score(
    X_y=cache(X_y, key='bikes'),
    model=make_model(),
    metric=metrics.MAE(),
    on='moment',
    delay=dt.timedelta(minutes=30),
    print_every=30_000
)

[30,000] MAE: 2.230049
[60,000] MAE: 2.290409
[90,000] MAE: 2.334638
[120,000] MAE: 2.315149
[150,000] MAE: 2.319982
[180,000] MAE: 2.335385
CPU times: user 18 s, sys: 86.4 ms, total: 18.1 s
Wall time: 18.1 s


MAE: 2.338837