In [None]:
import os
import math

import numpy as np
import pandas as pd
import seaborn as sns
from fbprophet import Prophet

%matplotlib inline
from matplotlib import pyplot as plt

from sklearn.base import BaseEstimator, RegressorMixin, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import (
    train_test_split, cross_val_score, GridSearchCV, TimeSeriesSplit
)
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error

In [None]:
DATASET_PATH = 'datasets/motorbike_ambulance_calls.csv'
FIGURES_PATH = 'figures'

pd.plotting.register_matplotlib_converters()
sns.set_style('whitegrid')
sns.set_palette('muted')
year_palette = {
    2011: 'C0',
    2012: 'C3'
}

In [None]:
motorbike_data = (
    pd.read_csv(
        DATASET_PATH,
        parse_dates=['date'],
        dayfirst=False,
    )
    .set_index('index')
    
    .assign(
        yr=lambda x: np.where(
            x['yr'] == 0,
            2011,
            2012
        )
    )
)
motorbike_data.info()
motorbike_data.head()

In [None]:
if not os.path.exists(FIGURES_PATH):
    os.mkdir(FIGURES_PATH)

In [None]:
ax = sns.relplot(
    x='mnth',
    y='cnt',
    hue='yr',
    legend='full',
    kind='line',
    marker='o',
    aspect=2,
    palette=year_palette,
    data=(
        motorbike_data
        .groupby(['yr', 'mnth'], as_index=False)
        .sum()
    )
)
ax._legend.texts[0].set_text('Year')
ax.set(
    xlabel='Month', 
    ylabel='Number of calls', 
)
plt.subplots_adjust(top=0.9)
ax.fig.suptitle('Number of calls by month and year')
ax.fig.savefig(os.path.join(FIGURES_PATH, 'calls-by-month.png'))

In [None]:
ax = sns.relplot(
    x='season',
    y='cnt',
    hue='yr',
    legend='full',
    kind='line',
    marker='o',
    aspect=2,
    palette=year_palette,
    data=(
        motorbike_data
        .groupby(['yr', 'season'], as_index=False)
        .sum()
    )
)
ax._legend.texts[0].set_text('Year')
ax.set(
    xlabel='Season', 
    ylabel='Number of calls', 
)
plt.subplots_adjust(top=0.9)
ax.fig.suptitle('Number of calls by season and year')
ax.fig.savefig(os.path.join(FIGURES_PATH, 'calls-by-sesason.png'))

In [None]:
ax = sns.relplot(
    x='hr',
    y='cnt',
    hue='yr',
    row='workingday',
    legend='full',
    kind='line',
    marker='o',
    aspect=2,
    palette=year_palette,
    data=(
        motorbike_data
        .groupby(['yr', 'hr', 'workingday'], as_index=False)
        .sum()
    )
)
ax._legend.texts[0].set_text('Year')
ax.set(
    xlabel='Hour', 
    ylabel='Number of calls',
    xticks=np.arange(0, 24)
)
plt.subplots_adjust(top=0.9)
ax.fig.suptitle('Number of calls by hour and year')
ax.fig.savefig(os.path.join(FIGURES_PATH, 'calls-by-hour.png'))

In [None]:
g = sns.FacetGrid(
    motorbike_data, 
    col='yr',
    hue='yr',
    palette=year_palette,
    height=8
)
g.map(
    sns.pointplot, 
    'season', 
    'cnt', 
    order=['spring', 'summer', 'autumn', 'winter']
)
g.axes[0][0].set(
    title='Year 2011',
    xlabel='Season',
    ylabel='Number of calls'
)
g.axes[0][1].set(
    title='Year 2012',
    xlabel='Season'
)
plt.subplots_adjust(top=0.9)
g.fig.suptitle('Number of calls by season and year')
g.fig.savefig(os.path.join(FIGURES_PATH, 'calls-by-season-significance.png'))

In [None]:
g = sns.FacetGrid(
    motorbike_data, 
    col='yr',
    hue='yr',
    palette=year_palette,
    height=8
)
g.map(
    sns.pointplot, 
    'mnth', 
    'cnt',
    order=np.arange(1, 13)
)
g.axes[0][0].set(
    title='Year 2011',
    xlabel='Month',
    ylabel='Number of calls'
)

g.axes[0][1].set(
    title='Year 2012',
    xlabel='Month'
)
plt.subplots_adjust(top=0.9)
g.fig.suptitle('Number of calls by month and year')
g.fig.savefig(os.path.join(FIGURES_PATH, 'calls-by-month-significance.png'))