# Regression Performance Dashboard for Bike Sharing Dataset

In [1]:
import pandas as pd
import requests
import zipfile
import io

from sklearn.ensemble import RandomForestRegressor

from evidently import ColumnMapping
from evidently.dashboard import Dashboard
from evidently.tabs import RegressionPerformanceTab

from evidently.model_profile import Profile
from evidently.profile_sections import RegressionPerformanceProfileSection

## Bike Sharing Demand Data

More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset

Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg

In [2]:
content = requests.get("https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip").content
with zipfile.ZipFile(io.BytesIO(content)) as arc:
    raw_data = pd.read_csv(arc.open("day.csv"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')

In [3]:
ref_data = raw_data[:120]
prod_data = raw_data[120:150]

In [4]:
ref_data

Unnamed: 0_level_0,instant,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
dteday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2011-01-01,1,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
2011-01-02,2,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2011-01-03,3,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
2011-01-04,4,1,0,1,0,2,1,1,0.200000,0.212122,0.590435,0.160296,108,1454,1562
2011-01-05,5,1,0,1,0,3,1,1,0.226957,0.229270,0.436957,0.186900,82,1518,1600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011-04-26,116,2,0,4,0,2,1,1,0.631667,0.594083,0.729167,0.326500,678,3722,4400
2011-04-27,117,2,0,4,0,3,1,2,0.620000,0.575142,0.835417,0.312200,547,3325,3872
2011-04-28,118,2,0,4,0,4,1,2,0.617500,0.578929,0.700833,0.320908,569,3489,4058
2011-04-29,119,2,0,4,0,5,1,1,0.510000,0.497463,0.457083,0.240063,878,3717,4595


## Regression Model

### Model training

In [5]:
target = 'cnt'
prediction = 'prediction'
datetime = 'dteday'

numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'weekday']
categorical_features = ['season', 'holiday', 'workingday', 'weathersit']

features = numerical_features + categorical_features

In [6]:
model = RandomForestRegressor(random_state = 0)

In [7]:
model.fit(ref_data[features], ref_data[target])

RandomForestRegressor(random_state=0)

In [8]:
ref_data['prediction']  = model.predict(ref_data[features])
prod_data['prediction'] = model.predict(prod_data[features])

## Regression Perfomance Report 

### verbose_level
Usage:
- verbose_level=0 for the short reports
- verbose_level=1 for the full reports

In [13]:
column_mapping = ColumnMapping(target,
                               'prediction',
                               numerical_features=numerical_features,
                               categorical_features=categorical_features)

In [16]:
dashboard = Dashboard(tabs=[RegressionPerformanceTab(verbose_level=0)])

In [17]:
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [18]:
dashboard.show()

### include_widgets
Notes: 
- include_widgets overwrites the verbose_level parameter
- in order to quickly see a list of avaliable widgets run the list_widgets() method of the tab object

Usage:
- include_widgets=["Regression Model Performance Report.", "Reference: Predicted vs Actual"]

In [19]:
RegressionPerformanceTab.list_widgets()

['Regression Model Performance Report.',
 'Reference: Model Quality (+/- std)',
 'Current: Model Quality (+/- std)',
 'Reference: Predicted vs Actual',
 'Current: Predicted vs Actual',
 'Reference: Predicted vs Actual in Time',
 'Current: Predicted vs Actual in Time',
 'Reference: Error (Predicted - Actual)',
 'Current: Error (Predicted - Actual)',
 'Reference: Absolute Percentage Error',
 'Current: Absolute Percentage Error',
 'Reference: Error Distribution',
 'Current: Error Distribution',
 'Reference: Error Normality',
 'Current: Error Normality',
 'Reference: Mean Error per Group (+/- std)',
 'Current: Mean Error per Group (+/- std)',
 'Reference: Predicted vs Actual per Group',
 'Current: Predicted vs Actual per Group',
 'Error Bias: Mean/Most Common Feature Value per Group']

In [29]:
dashboard = Dashboard(tabs=[RegressionPerformanceTab(verbose_level=0, include_widgets=[
    "Regression Model Performance Report.",
    "Reference: Error Distribution",
    "Current: Error Distribution",
])])

In [30]:
dashboard.calculate(ref_data, prod_data, column_mapping=column_mapping)

In [31]:
dashboard.show()

In [93]:
#### dashboard.save('bike_sharing_demand_model_perfomance.html')