# Widget to predict citybike availability

This is a proof-of-concept for a tool that predicts bike availability in citybike stations. The widget itself is at the end of the playbook (tested on MacBook Air). Only Kaisaniemi station is represented at the moment, and there are three different sliders to choose weather effects from. The model is based on linear regression so the effects of each variable are pretty straight-forward but also correspond to intuition.

In [1]:
import numpy as np
import pandas as pd

import plotly.graph_objects as go
from ipywidgets import widgets

In [2]:
data = pd.read_csv('kaisaniemi_summer_combined.csv')

In [3]:
print(data.shape)
data.head()

(4689, 22)


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Päivämäärä,Kaisaniemi/Eläintarhanlahti,weekday,day,year,hour,month,Clouds,...,RainmmH,Snowcm,ToC,DewToC,Visibilitym,Winddirdeg,Gustms,Windms,bikesAvailable,spacesAvailable
0,0,0,ke 3 huhti 2019 00:00,5,ke,3,2019,0,4,0,...,0.0,0,3.4,-3.5,50000,172,5.02,3.05,12.916667,9.083333
1,1,1,ke 3 huhti 2019 01:00,2,ke,3,2019,1,4,0,...,0.0,0,2.6,-3.2,50000,174,4.01,2.08,11.5,10.666667
2,2,2,ke 3 huhti 2019 02:00,1,ke,3,2019,2,4,0,...,0.0,0,2.5,-2.8,50000,157,4.02,2.08,17.0,6.0
3,3,3,ke 3 huhti 2019 13:00,55,ke,3,2019,13,4,0,...,0.0,0,8.0,-3.8,46450,173,8.06,4.07,17.272727,4.727273
4,4,4,ke 3 huhti 2019 14:00,51,ke,3,2019,14,4,0,...,0.0,0,9.2,-3.5,35530,169,8.04,4.03,14.666667,7.333333


In [4]:
hourlycity = data[['hour', 'RainmmH', 'ToC', 'Windms', 'bikesAvailable', 'spacesAvailable']]

In [5]:
hourlycity.sample()

Unnamed: 0,hour,RainmmH,ToC,Windms,bikesAvailable,spacesAvailable
1526,11,0.0,14.9,3.05,7.833333,14.166667


In [6]:
avgs = dict()
for i in range(24):
    hourset = hourlycity[hourlycity['hour'] == i]
    bikes = hourset['bikesAvailable'].sum()
    spaces = hourset['spacesAvailable'].sum()
    avgs[i] = (bikes / len(hourset), spaces / len(hourset))

In [7]:
avgs

{0: (8.651897765534128, 16.123010407101315),
 1: (8.145163758800122, 16.487794612794612),
 2: (7.940707070707071, 16.642587412587414),
 3: (7.545631182289212, 16.937490186842517),
 4: (7.055672105672105, 17.275097125097123),
 5: (7.010330578512397, 17.283462971965648),
 6: (7.181843818899656, 17.05631184945906),
 7: (7.2531179138322, 16.87750206143063),
 8: (5.557011440940013, 17.95860389610389),
 9: (5.184799893983568, 17.91604271578761),
 10: (6.050372960372961, 17.022198912198913),
 11: (6.9673138086403394, 16.141761639720823),
 12: (7.9328231292517, 15.225262832405688),
 13: (9.32918012613444, 13.940170742962621),
 14: (10.182978938160286, 13.199734203620215),
 15: (11.869554180548944, 11.66161351737268),
 16: (17.33733491639251, 7.70944439504649),
 17: (21.95986013986014, 6.306930846930848),
 18: (21.417965367965365, 7.595779220779219),
 19: (19.17146853146853, 9.247801087801088),
 20: (16.197893547512834, 11.442066971381692),
 21: (13.04179797979798, 13.383891414141415),
 22: (10

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [9]:
X = hourlycity[['hour', 'RainmmH', 'ToC', 'Windms']]
y = hourlycity['bikesAvailable']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44)

In [11]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((3751, 4), (938, 4), (3751,), (938,))

In [12]:
regr = LinearRegression()

In [13]:
regr.fit(X_train, y_train)

LinearRegression()

In [14]:
y_pred = regr.predict(X_test)
y_pred = pd.DataFrame(y_pred, columns=['Predicted'])
y_pred.head()

Unnamed: 0,Predicted
0,10.262528
1,14.613328
2,16.656173
3,12.08313
4,9.997908


In [15]:
rain_init = 0
rain = widgets.FloatSlider(
    value=rain_init,
    min=0,
    max=hourlycity['RainmmH'].max(),
    step=0.1,
    description='Rain:',
    continuous_update=True
)

temp_init = 15
temp = widgets.IntSlider(
    value=temp_init,
    min=hourlycity['ToC'].min(),
    max=hourlycity['ToC'].max(),
    step=1,
    description='Temperature:',
    continuous_update=True
)

wind_init = 1
wind = widgets.IntSlider(
    value=wind_init,
    min=0,
    max=hourlycity['Windms'].max(),
    step=1,
    description='Wind:',
    continuous_update=True
)
container = widgets.HBox(children=[rain, temp, wind])

initl = list()
for i in range(24):
    initl.append(regr.predict(np.array([i, rain_init, temp_init, wind_init]).reshape(1, -1)))
    
initr = go.Scatter(y=[elem[0] for elem in avgs.values()], name='Bikes available')
g = go.FigureWidget(data=[initr],
                    layout=go.Layout(
                        title=dict(
                            text='Citybikes available (Kaisaniemi)',
                        ),
                        colorway=['orange'],
                        xaxis=go.layout.XAxis(title=dict(
                            text='Hour of Day')
                        ),
                        yaxis=go.layout.YAxis(title=dict(
                            text='Bikes')
                        )
                    ))

In [16]:
def update_vals():
    new = list()
    for i in range(24):
        shift = initl[i] - regr.predict(np.array([i, rain.value, temp.value, wind.value]).reshape(1, -1))
        new.append(avgs[i][0] - shift)
    return new
        

def response(change):
    trc = update_vals()
        
    with g.batch_update():
        g.data[0].y = [elem[0] for elem in trc]
        g.layout = go.Layout(
            title = dict(
                text = 'Citybikes available (Kaisaniemi)'
            ),
            colorway=['orange'],
            xaxis=go.layout.XAxis(title=dict(
                            text='Hour of Day')
            ),
            yaxis=go.layout.YAxis(title=dict(
                text='Bikes')
            )
        )
        g.update_yaxes(range=[0, 30], fixedrange=True)

rain.observe(response, names='value')
temp.observe(response, names='value')
wind.observe(response, names='value')

In [17]:
g.update_yaxes(range=[0, 30], fixedrange=True)
widgets.VBox([container, g])

VBox(children=(HBox(children=(FloatSlider(value=0.0, description='Rain:', max=19.06), IntSlider(value=15, desc…