# Introduction

<div class="alert alert-block alert-warning">
<font color=black><br>

**What?** Interactive plotting within ipython

<br></font>
</div>

# Import modules

In [1]:
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.ensemble import GradientBoostingRegressor
import glob
from ipywidgets import interact, widgets
import plotly.graph_objs as go
from plotly.offline import iplot, plot, init_notebook_mode
init_notebook_mode(connected=True)
import plotly_express as px
import cufflinks as cf
#cf.go_offline(connected=True)


# Check packages version

In [2]:
import numpy,pandas,plotly
%load_ext watermark
%watermark -p numpy,pandas,plotly

numpy : 1.21.2
pandas: 1.2.4
plotly: 4.14.3



# Dataset

<div class="alert alert-block alert-info">
<font color=black><br>

- The energy data is measured every 15 minutes and includes 3 weather variables related to energy consumption: temperature, irradiance, and relative humidity. 
- This is the data from the DrivenData Energy Forecasting competition.
- I've cleaned up the datasets and extracted 8 features that allow us to predict the energy consumption fairly accurately.
- Dataset refrence: https://www.drivendata.org/competitions/51/electricity-prediction-machine-learning/

<br></font>
</div>

In [3]:
files = glob.glob('../../DATASETS/*_energy_data.csv')
files

['../../DATASETS/building_1_energy_data.csv',
 '../../DATASETS/building_2_energy_data.csv',
 '../../DATASETS/building_4_energy_data.csv',
 '../../DATASETS/building_3_energy_data.csv']

In [4]:
data = pd.read_csv(files[2], parse_dates = ['timestamp'], index_col = 'timestamp').sort_index()
data.head()
data = data.rename(columns={"energy": "actual"})

In [5]:
data.head(5)

Unnamed: 0_level_0,actual,business_day,temperature,irradiance,relative_humidity,day_of_week,time_of_day,day_of_year,year
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-01-01 05:15:00+00:00,4.24,1,5.102,0.0,63.855,3,5.25,1,2015
2015-01-01 05:30:00+00:00,2.48,1,5.097,-0.0,64.385,3,5.5,1,2015
2015-01-01 05:45:00+00:00,2.0,1,5.111,0.0,64.791,3,5.75,1,2015
2015-01-01 06:00:00+00:00,2.0,1,5.044,-0.0,65.336,3,6.0,1,2015
2015-01-01 06:15:00+00:00,1.92,1,4.999,0.0,65.323,3,6.25,1,2015


# Interactive plotting

In [6]:
# Create a subset of data for plotting
data_to_plot = data.loc["2015"].copy()


def plot_timescale(timescale, selection, theme):
    """
    Plot the energy consumption on different timescales (day, week, month).
    
    :param timescale: the timescale to use
    :param selection: the numeric value of the timescale selection (for example the 15th day
    of the year or the 1st week of the year)
    :param theme: aesthetics of plot
    """
    # Subset based on timescale and selection
    subset = data_to_plot.loc[
        getattr(data_to_plot.index, timescale) == selection, "actual"
    ].copy()

    if subset.empty:
        print("Choose another selection")
        return
    
    # Make an interactive plot
    fig = subset.iplot(
            title=f"Energy for {selection} {timescale.title()}", theme=theme, asFigure=True
    )
    fig['layout']['height'] = 500
    fig['layout']['width'] = 1400
    iplot(fig)
    


_ = interact(
    plot_timescale,
    timescale=widgets.RadioButtons(
        options=["dayofyear", "week", "month"], value="dayofyear"
    ),
    # Selection 
    selection=widgets.IntSlider(value=16, min=0, max=365),
    theme=widgets.Select(options=cf.themes.THEMES.keys(), value='ggplot')
)

interactive(children=(RadioButtons(description='timescale', options=('dayofyear', 'week', 'month'), value='day…

# References

<div class="alert alert-block alert-warning">
<font color=black><br>

- https://towardsdatascience.com/how-to-generate-prediction-intervals-with-scikit-learn-and-python-ab3899f992ed<br>
- https://nbviewer.jupyter.org/github/WillKoehrsen/Data-Analysis/blob/master/prediction-intervals/prediction_intervals.ipynb<br>

<br></font>
</div>