# Getting Started 

## Neighborhood Energy Data

Part 1

In [None]:
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

sns.set(rc={'figure.figsize':(12,6)})
plt.style.use(['seaborn-whitegrid'])

import warnings
warnings.simplefilter('ignore')

What does a typical file look like?

In [None]:
print("\n".join(open('data/measured_real_power.csv').read().split("\n")[:10])) #!head for windows

Read with pandas into a **DataFrame**

In [None]:
import pandas as pd

filename = 'data/measured_real_power.csv'
df = pd.read_csv(filename, skiprows=8)

What's this look like?

In [None]:
df.shape

In [None]:
df.head()

Clean this up a bit..

**rename columns**

In [None]:
df.rename(columns={'# timestamp': 'timestamp'}, inplace=True)
df.columns

Convert to `datetime`.

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'])

Set the index

In [None]:
df.set_index('timestamp', inplace=True)
df.head()

Create a read function

In [None]:
%%writefile utils.py

import pandas as pd

def read_csv(filename):
    """ Simple read function for GridLABD files """
    df = pd.read_csv(filename, skiprows=8)
    df.rename(columns={'# timestamp': 'timestamp'}, inplace=True)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)
    return df


In [None]:
import utils

df = utils.read_csv('data/measured_real_power.csv')
df.head()

## Questions 

What does the raw data look like?

In [None]:
df.columns

Sometimes it's useful to look at a subset, like *triplex_meter_0* on *July 5th*.

In [None]:
df['triplex_meter_0'].plot()

In [None]:
df.loc['2013-07-05'].plot(legend=False)

In [None]:
df.loc['2013-07-05', 'triplex_meter_0'].plot(legend=False)

In [None]:
df.loc['2013-07-05', ['triplex_meter_0', 'triplex_meter_7']].plot(legend=False)

What's the average house power over time?

In [None]:
df.mean(axis=1).head()

In [None]:
ax = df.mean(axis=1).plot()

Which house has the highest average value?

In [None]:
md = df.mean(axis=0).sort_values(ascending=True)  #axis=0!
md.tail()

In [None]:
md.plot(kind='barh')

In [None]:
solar_houses = md[md<600].index
nonsolar_houses = md[md>=600].index

Box plot of `house_0`, `house_1`, `house_2`, and `house_3`.

In [None]:
tmp = df[['triplex_meter_0', 'triplex_meter_1', 'triplex_meter_2', 'triplex_meter_3']]
tmp.head()

**Boxplots**

In [None]:
tmp.boxplot(return_type='axes')

In [None]:
sns.boxplot(tmp)

We need to **melt**

In [None]:
tmp.melt().head()

In [None]:
sns.boxplot(data=tmp.melt(), y='value', x='variable', color='steelblue')

In [None]:
sns.violinplot(data=tmp.melt(), y='value', x='variable', color='steelblue')

Here's a little string formatting trick.

In [None]:
"word {}".format("hello")

In [None]:
[ "triplex_meter_{}".format(x) for x in range(5)]

What is the average power of each house over time?

In [None]:
tmp = df[[ "triplex_meter_{}".format(x) for x in range(4)]]
_ = tmp.mean(axis=1).plot(legend=False, alpha=0.75)

In [None]:
tmp.resample('30min').mean().mean(axis=1).plot(legend=False)

In [None]:
ax = df[solar_houses].resample('30min').mean().mean(axis=1).plot(label="solar", legend=True)
df[nonsolar_houses].resample('30min').mean().mean(axis=1).plot(label='non-solar', legend=True)