# Bike-sharing forecasting

In the previous In this tutorial we're going to forecast the number of bikes in 5 bike stations from the city of Toulouse. We'll do so by building a simple model step by step. The dataset contains 182,470 observations. Let's first take a peak at the data.

In [1]:
from pprint import pprint
from river import datasets

dataset = datasets.Bikes()

for x, y in dataset:
    pprint(x)
    print(f"Number of available bikes: {y}")
    break

{'clouds': 75,
 'description': 'light rain',
 'humidity': 81,
 'moment': datetime.datetime(2016, 4, 1, 0, 0, 7),
 'pressure': 1017.0,
 'station': 'metro-canal-du-midi',
 'temperature': 6.54,
 'wind': 9.3}
Number of available bikes: 1


In [2]:
from deep_river.regression import RollingRegressor
from river import feature_extraction, preprocessing, stats, compose
import torch


def get_hour(x):
    x["hour"] = x["moment"].hour
    return x




class RnnModule(torch.nn.Module):

    def __init__(self, n_features, hidden_size):
        super().__init__()
        self.n_features = n_features
        self.rnn = torch.nn.RNN(
            input_size=n_features, hidden_size=hidden_size, num_layers=1
        )
        self.fc = torch.nn.Linear(in_features=hidden_size, out_features=1)

    def forward(self, X, **kwargs):
        output, hn = self.rnn(X)  # lstm with input, hidden, and internal state
        return self.fc(output[-1, :])


model = compose.Select("clouds", "humidity", "pressure", "temperature", "wind")
model += get_hour | feature_extraction.TargetAgg(
    by=["station", "hour"], how=stats.Mean()
)
model |= preprocessing.StandardScaler()
model |= RollingRegressor(
    module=RnnModule,
    loss_fn="mse",
    optimizer_fn="sgd",
    lr=1e-2,
    hidden_size=20,
    window_size=32,
)

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Forecasting and plotting with uncertainty
y_true = []
y_pred = []
y_pred_lower = []
y_pred_upper = []
for i, (x, y) in enumerate(dataset.take(50000)):
    model.learn_one(x, y)

for i, (x, y) in enumerate(dataset.take(50000)):
    if i % 5000 == 0:
        print(f"Processing {i}th observation")
        print(f"Sample input: {x}, {y}")
        
    # Forecasting
    y_hat = model.forecast(xs=None, horizon=10)  # Pass the current observation as a list of dicts
    
    # Collecting true and predicted values
    y_true.append(y)
    y_pred.append(y_hat[0])  # Use the first prediction for plotting
    
    # Uncertainty estimation
    uncertainty = 0.1 * np.array(y_hat)
    y_pred_lower.append(np.array(y_hat) - uncertainty)
    y_pred_upper.append(np.array(y_hat) + uncertainty)
    
    # Updating the model with the true value
    model.learn_one(x, y)

# Convert lists to arrays for plotting
y_true = np.array(y_true)
y_pred = np.array(y_pred)
y_pred_lower = np.array(y_pred_lower)
y_pred_upper = np.array(y_pred_upper)

# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(y_true, label='True values', alpha=0.7)
plt.plot(y_pred, label='Predicted values', alpha=0.7)
plt.fill_between(range(len(y_pred)), y_pred_lower[:, 0], y_pred_upper[:, 0], color='gray', alpha=0.2, label='Uncertainty')
plt.xlabel('Observation')
plt.ylabel('Number of Bikes')
plt.legend()
plt.show()


Processing 0th observation
Sample input: {'moment': datetime.datetime(2016, 4, 1, 0, 0, 7), 'station': 'metro-canal-du-midi', 'clouds': 75, 'description': 'light rain', 'humidity': 81, 'pressure': 1017.0, 'temperature': 6.54, 'wind': 9.3}, 1


AttributeError: 'NoneType' object has no attribute 'get'