# Developing a visualization to monitor energy comsumption 

### Data for the project can be found in the UCI Machine Learning Repository located here:
https://archive.ics.uci.edu/ml/datasets/Individual+household+electric+power+consumption

#### Data description (after clean-up)

1.date-time: Datetime in format yyyy-mm-dd hh:mm:ss 

2.global_active_power: household global minute-averaged active power (in kilowatt) 

3.global_reactive_power: household global minute-averaged reactive power (in kilowatt) 

4.voltage: minute-averaged voltage (in volt) 

5.global_intensity: household global minute-averaged current intensity (in ampere) 

6.sub_metering_1: energy sub-metering No. 1 (in watt-hour of active energy). It corresponds to the kitchen, containing mainly a dishwasher, an oven and a microwave (hot plates are not electric but gas powered). 

7.sub_metering_2: energy sub-metering No. 2 (in watt-hour of active energy). It corresponds to the laundry room, containing a washing-machine, a tumble-drier, a refrigerator and a light. 

8.sub_metering_3: energy sub-metering No. 3 (in watt-hour of active energy). It corresponds to an electric water-heater and an air-conditioner.

In [4]:
import matplotlib.pylab as plt
%matplotlib inline

In [5]:
#Make sure that you use the api key, using plotly
import plotly.plotly as py
import plotly.graph_objs as go

In [6]:
import pandas as pd
import numpy as np
from datetime import datetime
import time
from datasci_prj.dayofweek import dayofweek #This function returns the day of week in text
from datasci_prj.monthofyear import monthofyear #This function returns the day of week in text
from datasci_prj.functions import get_time_from_dt_list # This functions returns just the time from a list of datetimes

In [7]:
# Read the datasets and append
df1 = pd.read_csv('household_power_consumption_set_1.csv', 
                   sep=',',header=0,low_memory=False)
df2 = pd.read_csv('household_power_consumption_set_2.csv', 
                   sep=',',header=0,low_memory=False)
df3 = pd.read_csv('household_power_consumption_set_3.csv', 
                   sep=',',header=0,low_memory=False)
df = df1.append(df2.append(df3))
df = df.reset_index(drop=True)
df['Datetime']=pd.to_datetime(df['Datetime'])

In [8]:
entry1 = df.Datetime[0]
print 'The first entry of this dataset begins on'+\
      ' '+dayofweek(entry1.weekday())+', ' +\
          monthofyear(entry1.month)+' '+ str(entry1.day)+\
      ', '+str(entry1.year) 
print 'at', entry1.time()

The first entry of this dataset begins on Saturday, December 16, 2006
at 17:24:00


In [9]:
# This is on Boxing day of 2006 
df4 = df[ (df['Datetime'] > datetime(2006,12,27,0,0,0) ) 
        &  (df['Datetime'] < datetime(2006,12,28,0,0,0) ) ]

In [10]:
trace2 = go.Scatter(
    x = df4.Datetime.tolist(),
    y = df4.Global_active_power.tolist(),
    mode = 'lines',
    name = 'Global_active_power'
)
data = [trace2]

# Plot and embed in ipython notebook!
#py.iplot(data, filename='scatter-mode')

layout = go.Layout(
    showlegend=True,
    legend=dict(
        x=0.1,
        y=1
    )
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Global-active-power')


In [11]:
from statsmodels.nonparametric.smoothers_lowess import lowess
filtered = lowess(np.array(df4.Global_active_power.tolist()), np.linspace(0,1,len(df4)), is_sorted=True, frac=0.01, it=0)
trace2 = go.Scatter(
    x = df4.Datetime.tolist(),
    y = filtered[:,1],
    mode = 'lines',
    name = 'Global_active_power_filtered'
)

data = [trace2]

# Plot and embed in ipython notebook!
#py.iplot(data, filename='scatter-mode')

layout = go.Layout(
    showlegend=True,
    legend=dict(
        x=0.1,
        y=1
    )
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Global-active-power-filtered')



In [12]:
# Get the time from a list of timestamps
# time_list = get_time_from_dt_list(df4.Datetime.tolist())

### Plot the global active power over a the consecutive days spanning Christmas Eve through Boxing Day, 2006

In [13]:
# Get the dataframes within each day
df_24 = df[ (df['Datetime'] >= datetime(2006,12,24,0,0,0) ) 
        &   (df['Datetime'] < datetime(2006,12,25,0,0,0) ) ]
df_25 = df[ (df['Datetime'] >= datetime(2006,12,25,0,0,0) ) 
        &   (df['Datetime'] < datetime(2006,12,26,0,0,0) ) ]
df_26 = df[ (df['Datetime'] >= datetime(2006,12,26,0,0,0) ) 
        &   (df['Datetime'] < datetime(2006,12,27,0,0,0) ) ]

In [14]:
#get the list of the time in each day (actually only need one of them)
time_list_24 = get_time_from_dt_list(df_24.Datetime.tolist())
time_list_25 = get_time_from_dt_list(df_25.Datetime.tolist())
time_list_26 = get_time_from_dt_list(df_26.Datetime.tolist())

In [15]:
trace_24 = go.Scatter(
    x = df_24.Datetime.tolist(),
    y = df_24.Global_active_power.tolist(),
    mode = 'lines',
    name = 'Global_active_power on Dec 24, 2006'
)

trace_25 = go.Scatter(
    x = df_25.Datetime.tolist(),
    y = df_25.Global_active_power.tolist(),
    mode = 'lines',
    name = 'Global_active_power on Dec 25, 2006'
)

trace_26 = go.Scatter(
    x = df_26.Datetime.tolist(),
    y = df_26.Global_active_power.tolist(),
    mode = 'lines',
    name = 'Global_active_power on Dec 26, 2006'
)

data = [trace_24, trace_25, trace_26]

# Plot and embed in ipython notebook!
#py.iplot(data, filename='scatter-mode')

layout = go.Layout(
    showlegend=True,
    legend=dict(
        x=0.1,
        y=1
    )
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Global-active-power Daterange')



In [16]:


trace_24 = go.Scatter(
    x = time_list_24,
    y = df_24.Global_active_power.tolist(),
    mode = 'lines',
    name = 'Global_active_power on Dec 24, 2006'
)

trace_25 = go.Scatter(
    x = time_list_25,
    y = df_25.Global_active_power.tolist(),
    mode = 'lines',
    name = 'Global_active_power on Dec 25, 2006'
)

trace_26 = go.Scatter(
    x = time_list_26,
    y = df_26.Global_active_power.tolist(),
    mode = 'lines',
    name = 'Global_active_power on Dec 26, 2006'
)

data = [trace_24, trace_25, trace_26]

# Plot and embed in ipython notebook!
#py.iplot(data, filename='scatter-mode')

layout = go.Layout(
    showlegend=True,
    legend=dict(
        x=0.1,
        y=1
    )
)
fig2 = go.Figure(data=data, layout=layout)
py.iplot(fig2, filename='Global-active-power Daterange')

In [17]:
#Filtered data
filtered_24 = lowess(np.array(df_24.Global_active_power.tolist()), np.linspace(0,1,len(df_24)), is_sorted=True, frac=0.01, it=0)
filtered_25 = lowess(np.array(df_25.Global_active_power.tolist()), np.linspace(0,1,len(df_25)), is_sorted=True, frac=0.01, it=0)
filtered_26 = lowess(np.array(df_26.Global_active_power.tolist()), np.linspace(0,1,len(df_26)), is_sorted=True, frac=0.01, it=0)

trace_24 = go.Scatter(
    x = time_list_24,
    y = filtered_24[:,1],
    mode = 'lines',
    name = 'Global_active_power on Dec 24, 2006'
)

trace_25 = go.Scatter(
    x = time_list_25,
    y = filtered_25[:,1],
    mode = 'lines',
    name = 'Global_active_power on Dec 25, 2006'
)

trace_26 = go.Scatter(
    x = time_list_26,
    y = filtered_26[:,1],
    mode = 'lines',
    name = 'Global_active_power on Dec 26, 2006'
)

data = [trace_24, trace_25, trace_26]

# Plot and embed in ipython notebook!
#py.iplot(data, filename='scatter-mode')

layout = go.Layout(
    showlegend=True,
    legend=dict(
        x=0.1,
        y=1
    )
)
fig3 = go.Figure(data=data, layout=layout)
py.iplot(fig3, filename='Global-active-power Daterange')


In [18]:
df_24_26_2006 = df[ (df['Datetime'] >= datetime(2006,12,24,0,0,0) ) 
        &  (df['Datetime'] < datetime(2006,12,27,0,0,0) ) ]

df_24_26_2007 = df[ (df['Datetime'] >= datetime(2007,12,24,0,0,0) ) 
        &  (df['Datetime'] < datetime(2007,12,27,0,0,0) ) ]

df_24_26_2008 = df[ (df['Datetime'] >= datetime(2008,12,24,0,0,0) ) 
        &  (df['Datetime'] < datetime(2008,12,27,0,0,0) ) ]

df_24_26_2009 = df[ (df['Datetime'] >= datetime(2009,12,24,0,0,0) ) 
        &  (df['Datetime'] < datetime(2009,12,27,0,0,0) ) ]

from statsmodels.nonparametric.smoothers_lowess import lowess
filtered_2006 = lowess(np.array(df_24_26_2006.Global_active_power.tolist()), np.linspace(0,1,len(df_24_26_2006)), is_sorted=True, frac=0.01, it=0)
filtered_2007 = lowess(np.array(df_24_26_2007.Global_active_power.tolist()), np.linspace(0,1,len(df_24_26_2007)), is_sorted=True, frac=0.01, it=0)
filtered_2008 = lowess(np.array(df_24_26_2008.Global_active_power.tolist()), np.linspace(0,1,len(df_24_26_2008)), is_sorted=True, frac=0.01, it=0)
filtered_2009 = lowess(np.array(df_24_26_2009.Global_active_power.tolist()), np.linspace(0,1,len(df_24_26_2009)), is_sorted=True, frac=0.01, it=0)

#get the list of the time in each day (actually only need one of them)
time_list_06 = get_time_from_dt_list(df_24_26_2006.Datetime.tolist())
time_list_07 = get_time_from_dt_list(df_24_26_2007.Datetime.tolist())
time_list_08 = get_time_from_dt_list(df_24_26_2008.Datetime.tolist())
time_list_09 = get_time_from_dt_list(df_24_26_2006.Datetime.tolist())

In [19]:
df_24_26_2006 = df[ (df['Datetime'] >= datetime(2006,12,24,0,0,0) ) 
        &  (df['Datetime'] < datetime(2006,12,27,0,0,0) ) ]

df_24_26_2007 = df[ (df['Datetime'] >= datetime(2007,12,24,0,0,0) ) 
        &  (df['Datetime'] < datetime(2007,12,27,0,0,0) ) ]

df_24_26_2008 = df[ (df['Datetime'] >= datetime(2008,12,24,0,0,0) ) 
        &  (df['Datetime'] < datetime(2008,12,27,0,0,0) ) ]

df_24_26_2009 = df[ (df['Datetime'] >= datetime(2009,12,24,0,0,0) ) 
        &  (df['Datetime'] < datetime(2009,12,27,0,0,0) ) ]

from statsmodels.nonparametric.smoothers_lowess import lowess
filtered_2006 = lowess(np.array(df_24_26_2006.Global_active_power.tolist()), np.linspace(0,1,len(df_24_26_2006)), is_sorted=True, frac=0.01, it=0)
filtered_2007 = lowess(np.array(df_24_26_2007.Global_active_power.tolist()), np.linspace(0,1,len(df_24_26_2007)), is_sorted=True, frac=0.01, it=0)
filtered_2008 = lowess(np.array(df_24_26_2008.Global_active_power.tolist()), np.linspace(0,1,len(df_24_26_2008)), is_sorted=True, frac=0.01, it=0)
filtered_2009 = lowess(np.array(df_24_26_2009.Global_active_power.tolist()), np.linspace(0,1,len(df_24_26_2009)), is_sorted=True, frac=0.01, it=0)

#get the list of the time in each day (actually only need one of them)
time_list_06 = get_time_from_dt_list(df_24_26_2006.Datetime.tolist())
time_list_07 = get_time_from_dt_list(df_24_26_2007.Datetime.tolist())
time_list_08 = get_time_from_dt_list(df_24_26_2008.Datetime.tolist())
time_list_09 = get_time_from_dt_list(df_24_26_2006.Datetime.tolist())

trace_06 = go.Scatter(
    x = df_24_26_2006.Datetime.tolist(),
    y = filtered_2006[:,1],
    mode = 'lines',
    name = 'Global_active_power on Dec 24-26, 2006'
)

trace_07 = go.Scatter(
    x = df_24_26_2006.Datetime.tolist(),
    y = filtered_2007[:,1],
    mode = 'lines',
    name = 'Global_active_power on Dec 24-26, 2007'
)

trace_08 = go.Scatter(
    x = df_24_26_2006.Datetime.tolist(),
    y = filtered_2008[:,1],
    mode = 'lines',
    name = 'Global_active_power on Dec 24-26, 2008'
)

trace_09 = go.Scatter(
    x = df_24_26_2006.Datetime.tolist(),
    y = filtered_2009[:,1],
    mode = 'lines',
    name = 'Global_active_power on Dec 24-26, 2009'
)


data = [trace_06, trace_07, trace_08, trace_09]

# Plot and embed in ipython notebook!
#py.iplot(data, filename='scatter-mode')

layout = go.Layout(
    showlegend=False,
    legend=dict(
        x=0.1,
        y=1
    )
)
fig3 = go.Figure(data=data, layout=layout)
py.iplot(fig3, filename='Global-active-power Daterange')

In [25]:

df_12_14_2007 = df[ (df['Datetime'] >= datetime(2007,11,12,0,0,0) ) 
        &  (df['Datetime'] < datetime(2007,11,14,0,0,0) ) ]

df_12_14_2007 = df_12_14_2007.sort_values('Datetime')

df_12_14_2008 = df[ (df['Datetime'] >= datetime(2008,11,12,0,0,0) ) 
        &  (df['Datetime'] < datetime(2008,11,14,0,0,0) ) ]

df_12_14_2008 = df_12_14_2008.sort_values('Datetime')

df_12_14_2009 = df[ (df['Datetime'] >= datetime(2009,11,12,0,0,0) ) 
        &  (df['Datetime'] < datetime(2009,11,14,0,0,0) ) ]

df_12_14_2009 = df_12_14_2009.sort_values('Datetime')

from statsmodels.nonparametric.smoothers_lowess import lowess
filtered_2007 = lowess(np.array(df_12_14_2007.Global_active_power.tolist()), np.linspace(0,1,len(df_12_14_2007)), is_sorted=True, frac=0.01, it=0)
filtered_2008 = lowess(np.array(df_12_14_2008.Global_active_power.tolist()), np.linspace(0,1,len(df_12_14_2008)), is_sorted=True, frac=0.01, it=0)
filtered_2009 = lowess(np.array(df_12_14_2009.Global_active_power.tolist()), np.linspace(0,1,len(df_12_14_2009)), is_sorted=True, frac=0.01, it=0)

#get the list of the time in each day (actually only need one of them)



trace_07 = go.Scatter(
    x = df_12_14_2007.Datetime.tolist(),
    y = filtered_2007[:,1],
    mode = 'lines',
    name = 'Global_active_power on Nov 12-14, 2006'
)

trace_08 = go.Scatter(
    x = df_12_14_2007.Datetime.tolist(),
    y = filtered_2008[:,1],
    mode = 'lines',
    name = 'Global_active_power on Nov 12-14, 2006'
)

trace_09 = go.Scatter(
    x = df_12_14_2007.Datetime.tolist(),
    y = filtered_2009[:,1],
    mode = 'lines',
    name = 'Global_active_power on Nov 12-14, 2006'
)


data = [trace_07, trace_08, trace_09]

# Plot and embed in ipython notebook!
#py.iplot(data, filename='scatter-mode')

layout = go.Layout(
    showlegend=True,
    legend=dict(
        x=0.1,
        y=1
    )
)

fig4 = go.Figure(data=data, layout=layout)
py.iplot(fig4, filename='Global-active-power Daterange')

In [82]:
#dayofweek(datetime(2007,11,12,0,0,0).weekday())