# Timeseries Analysis of Appliance and Light Usage

#### Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# import data
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00374/energydata_complete.csv')

In [None]:
# df for exploratory data
exp_df = df
df.head(5)

### Exploratory Data Analysis

#### Attribute and Characteristics

In [None]:
# column names
exp_df.columns

In [None]:
# check for missing values
exp_df.isna().sum()

In [None]:
# check data types of each column
exp_df.dtypes

#### Visualize Data

In [None]:
fig, ax = plt.subplots(4,5, figsize=(16,9))

ax[0,0].plot(exp_df['date'].loc[:100,], exp_df['T1'].loc[:100,])
ax[0,0].tick_params(labelbottom = False, bottom = False)
ax[0,0].set_ylabel('Temperature')
ax[1,0].plot(exp_df['date'].loc[:100,], exp_df['RH_1'].loc[:100,])
ax[1,0].tick_params(labelbottom = False, bottom = False)
ax[1,0].set_ylabel('Humidity')
ax[0,0].set_title('Kitchen Area')

ax[0,1].plot(exp_df['date'].loc[:100,], exp_df['T2'].loc[:100,])
ax[0,1].tick_params(labelbottom = False, bottom = False)
ax[1,1].plot(exp_df['date'].loc[:100,], exp_df['RH_2'].loc[:100,])
ax[1,1].tick_params(labelbottom = False, bottom = False)
ax[0,1].set_title('Living Area')

ax[0,2].plot(exp_df['date'].loc[:100,], exp_df['T3'].loc[:100,])
ax[0,2].tick_params(labelbottom = False, bottom = False)
ax[1,2].plot(exp_df['date'].loc[:100,], exp_df['RH_3'].loc[:100,])
ax[1,2].tick_params(labelbottom = False, bottom = False)
ax[0,2].set_title('Laundry Area')

ax[0,3].plot(exp_df['date'].loc[:100,], exp_df['T4'].loc[:100,])
ax[0,3].tick_params(labelbottom = False, bottom = False)
ax[1,3].plot(exp_df['date'].loc[:100,], exp_df['RH_4'].loc[:100,])
ax[1,3].tick_params(labelbottom = False, bottom = False)
ax[0,3].set_title('Office Area')

ax[0,4].plot(exp_df['date'].loc[:100,], exp_df['T5'].loc[:100,])
ax[0,4].tick_params(labelbottom = False, bottom = False)
ax[1,4].plot(exp_df['date'].loc[:100,], exp_df['RH_5'].loc[:100,])
ax[1,4].tick_params(labelbottom = False, bottom = False)
ax[0,4].set_title('Bathroom')

ax[2,0].plot(exp_df['date'].loc[:100,], exp_df['Appliances'].loc[:100,])
ax[2,0].tick_params(labelbottom = False, bottom = False)
ax[2,0].set_ylabel('Energy (Wh)')
ax[2,0].set_title('Appliances')
ax[3,0].plot(exp_df['date'].loc[:100,], exp_df['lights'].loc[:100,])
ax[3,0].tick_params(labelbottom = False, bottom = False)
ax[3,0].set_ylabel('Energy (Wh)')
ax[3,0].set_title('Lights')
ax[3,0].set_xlabel('Time (mins)')

ax[2,1].plot(exp_df['date'].loc[:100,], exp_df['T8'].loc[:100,])
ax[2,1].tick_params(labelbottom = False, bottom = False)
ax[2,1].set_ylabel('Temperature')
ax[3,1].plot(exp_df['date'].loc[:100,], exp_df['RH_8'].loc[:100,])
ax[3,1].set_xlabel('Time (mins)')
ax[3,1].set_ylabel('Humidity')
ax[3,1].tick_params(labelbottom = False, bottom = False)
ax[2,1].set_title('Teenager Room')

ax[2,2].plot(exp_df['date'].loc[:100,], exp_df['T3'].loc[:100,])
ax[2,2].tick_params(labelbottom = False, bottom = False)
ax[3,2].plot(exp_df['date'].loc[:100,], exp_df['RH_3'].loc[:100,])
ax[3,2].tick_params(labelbottom = False, bottom = False)
ax[3,2].set_xlabel('Time (mins)')
ax[2,2].set_title('Parents Room')

ax[2,3].plot(exp_df['date'].loc[:100,], exp_df['T6'].loc[:100,])
ax[2,3].tick_params(labelbottom = False, bottom = False)
ax[3,3].plot(exp_df['date'].loc[:100,], exp_df['RH_6'].loc[:100,])
ax[3,3].tick_params(labelbottom = False, bottom = False)
ax[3,3].set_xlabel('Time (mins)')
ax[2,3].set_title('Outside Building')

ax[2,4].plot(exp_df['date'].loc[:100,], exp_df['T7'].loc[:100,])
ax[2,4].tick_params(labelbottom = False, bottom = False)
ax[3,4].plot(exp_df['date'].loc[:100,], exp_df['RH_7'].loc[:100,])
ax[3,4].tick_params(labelbottom = False, bottom = False)
ax[3,4].set_xlabel('Time (mins)')
ax[2,4].set_title('Ironing Room')


plt.tight_layout()

In [None]:
#looking at Temperature columns
# regex = '^T' means starts with T
filt = exp_df.filter(regex='^T', axis='columns').corr()
ax = plt.axes()
sns.heatmap(filt.corr(), ax = ax)
ax.set_title('Room Temperature Correlation')
plt.show()

In [None]:
#looking at Relative Humidity columns
# regex = '^R' means starts with R
filt = exp_df.filter(regex='^R', axis='columns').corr()
ax = plt.axes()
sns.heatmap(filt.corr(), ax = ax)
ax.set_title('Room Relative Humidity Correlation')
plt.show()

#### Initial EDA Observations

There are many basic relationships we are able to see just from our exploratory data analysis. For example, in the line graphs we can see the trends of energy usage by appliances and lights in the bottom left hand column. In the right above their graphs, we can see the temperature and relative humidity of the kitchen (chosen since many appliances are in this one room). Based on the graphs of all the rooms, we can see there is an overall spike and decline. Look more closely though, and we see these spikes and declines are not all the same. Interestingly, the graphs of the appliances and lights have a greater amount of spikes and variations then most rooms.<br/><br/>
Upon first glance, these correlation plots reveal a few interesting things. It appears there is not as much of a correlation between temperature and the appliance and lights as there is for relative humidity and appliances and lights. <br/><br/>
Just from these two plots, we hypothesize that relative humidity has a stronger impact on the energy usage of lights rather than appliances. On the flipside, we hypothesize temperature has a stronger impact on the energy usage of appliances than lights.   

### Prepare the Data