<p>
    <img src="https://s3.amazonaws.com/iotanalytics-templates/Logo.png" style="float:left;width:65px">
    <h1 style="float:left;color:#1A5276;padding-left:15px;font-size:20px;">AWS IoT Analytics | Notebook</h1>
</p>

Demonstration of [AWS IoT Analytics](https://aws.amazon.com/iot-analytics/) Notebooks, using sample sensor data. Notebook uses [pandas](https://pandas.pydata.org/) for data analysis and manipulation, and [matplotlob](https://matplotlib.org/) and [plotly](https://github.com/plotly/plotly.py) for visualization.


In [None]:
import sys

import boto3
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
from matplotlib.dates import DateFormatter
from pandas.plotting import register_matplotlib_converters

In [None]:
# constants
MY_TIMEZONE='US/Eastern' # update to your local timezone or use 'UTC'
DATETIME_FORMAT='%y-%m-%d %H:%M' # update to your local date/time format

In [None]:
def parse(x):
    x = pd.to_datetime(x, infer_datetime_format=True, unit='s', utc=True)
    x = x.tz_convert(MY_TIMEZONE)
    return x

In [None]:
%%time

client = boto3.client('iotanalytics')
nrows_read = None # specify 'None' if want to read whole data set (10k records)
dataset = 'iot_analytics_data_set'
data_location = client.get_dataset_content(datasetName=dataset)['entries'][0]['dataURI']

df = pd.read_csv(data_location,
                 nrows=nrows_read,
                 header=0,
                 low_memory=False,
                 infer_datetime_format=True,
                 date_parser=parse,
                 index_col=['ts'])

In [None]:
# clean up the DataFrame
df = df.drop(columns='__dt')
df = df.sort_values(by='ts', ascending=True)

In [None]:
# transform metadata attribute, from device registry, to valid json object
df['metadata'].replace('{','{"',regex=True, inplace = True)
df['metadata'].replace('=','":"',regex=True, inplace = True)
df['metadata'].replace(', ','","',regex=True, inplace = True)
df['metadata'].replace(':"{"',':{"',regex=True, inplace = True)
df['metadata'].replace('}",','},',regex=True, inplace = True)
df['metadata'].replace('}','"}',regex=True, inplace = True)

In [None]:
display(df.tail(5))

In [None]:
# filter temp/humidity outliers (>1% & <99%)
df = df.loc[df['temp'] > df.groupby('device').temp.transform(lambda x: x.quantile(.01))]
df = df.loc[df['temp'] < df.groupby('device').temp.transform(lambda x: x.quantile(.99))]

df = df.loc[df['humidity'] > df.groupby('device').humidity.transform(lambda x: x.quantile(.01))]
df = df.loc[df['humidity'] < df.groupby('device').humidity.transform(lambda x: x.quantile(.99))]

In [None]:
# group by device
groups = df.groupby('device')

In [None]:
print('DataFrame Stats')
print('-------------')
print('Record count: {:,}'.format(df['temp'].count()))
print('DataFrame size (MB): {:,.2f}'.format(sys.getsizeof(df)/1024/1024))
print('-------------')
print('Time range (min): {:%Y-%m-%d %H:%M:%S %Z}'.format(df.index[1]))
print('Time range (max): {:%Y-%m-%d %H:%M:%S %Z}'.format(df.index[-1]))
print('Temperature (min): {:.2f}'.format(df['temp'].min()))
print('Temperature (max): {:.2f}'.format(df['temp'].max()))
print('Humidity (min): {:.2f}{}'.format(df['humidity'].min(), '%'))
print('Humidity (max): {:.2f}{}'.format(df['humidity'].max(), '%'))
print('-------------')
print('Record count:\n{}'.format(groups.size()))
# print('Temperature (min):\n{}'.format(groups['temp'].min()))
# print('Temperature (max):\n{}'.format(groups['temp'].max()))
# print('Humidity (min)\n{}'.format(groups['humidity'].min()))
# print('Humidity (max):\n{}'.format(groups['humidity'].max()))

In [None]:
# example of extracting value from metadata attribute
metadata = pd.read_json(df['metadata'][0], orient='records', typ='series')
print(metadata['thingname'])

In [None]:
# matplotlib datetime config
plt.rcParams['timezone']=MY_TIMEZONE
register_matplotlib_converters()
myFmt = DateFormatter(DATETIME_FORMAT)

### Scatter Plot using Matplotlib
Using [Matplotlib: Visualization with Python](https://matplotlib.org/)

In [None]:
_, ax = plt.subplots(1, 1, figsize=(18, 9))
for device, group in groups:
    ax.plot(group.temp,
            group.humidity,
            marker='o',
            linestyle='',
            alpha=.5,
            ms=10,
            label=device)
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Temperature vs. Humidity')
plt.xlabel('Temperature (˚F)')
plt.ylabel('Humidity (%)')
plt.show()

### Temperature Graph using Moving Average
Smoothing data using the mean average of a 1 minute rolling window.  
1 minutes == (20) data-points @ 3 second intervals

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(18, 9))
for device, group in groups:
    group.mean = group.temp.rolling(window=20).mean()
    ax.plot(group.mean,
            label=device)
fig.autofmt_xdate()
ax.xaxis.set_major_formatter(myFmt)
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Temperature Comparison over Time')
plt.ylabel('Temperature (˚F)')
plt.xlabel('Time')
plt.show()

### Humidity Graph using Moving Average
Smoothing data using the mean average of a 1 minute rolling window (moving average).  
1 minutes == (20) data-points @ 3 second intervals

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(18, 9))
for device, group in groups:
    group.mean = group.humidity.rolling(window=20).mean()
    ax.plot(group.mean,
            label=device)
fig.autofmt_xdate()
ax.xaxis.set_major_formatter(myFmt)
ax.grid()
ax.margins(0.05)
ax.legend()
plt.title('Humidity Comparison over Time')
plt.ylabel('Humidity (%)')
plt.xlabel('Time')
plt.show()

#### Plotly vs. Matplotlib for Graphing
Graphs using [Plotly Python Open Source Graphing Library](https://plotly.com/python/)

In [None]:
# strip timezone info so Plotly won't convert to UTC
df.index = df.index.tz_localize(None)

In [None]:
fig = px.scatter(df,
                 x='temp',
                 y='humidity',
                 color='device',
                 hover_name='device',
                 trendline='ols',
                 render_mode='svg',
                 hover_data={'device': False, 'temp':':.2f', 'humidity':':.2f'})

fig.update_layout(title='Temperature vs. Humidity',
                  xaxis_title='Temperature (˚F)',
                  yaxis_title='Humidity (%)',
                  template='ggplot2')

fig.show()

In [None]:
fig = px.line(df,
              x=df.index.values,
              y='temp',
              color='device',
              render_mode='svg',
              hover_name='device',
              hover_data={'device': False,
                             'temp':':.2f'})

fig.update_layout(title='Temperature Comparison over Time',
                  xaxis_title='Date/Time',
                  yaxis_title='Temperature (˚F)',
                  template='ggplot2')

fig.show()

In [None]:
fig = px.line(df,
              x=df.index.values,
              y='humidity',
              color='device',
              render_mode='svg',
              hover_name='device',
              hover_data={'device': False,
                          'humidity':':.2f'})

fig.update_layout(title='Humidity Comparison over Time',
                  xaxis_title='Time',
                  yaxis_title='Humidity (%)',
                  template='ggplot2')

fig.show()

In [None]:
fig = px.line(df,
              x=df.index.values,
              y='humidity',
              color='device',
              render_mode='svg',
              hover_name='device',
              hover_data={'device': False,
                          'humidity':':.2f'})

fig.update_layout(title='Humidity Comparison over Time',
                  xaxis_title='Time',
                  yaxis_title='Humidity (%)',
                  template='ggplot2')

fig.show()

<div style="height:60px;"><div style="height:7px;background-color:#20B3CD;width:100%;margin-top:20px;position:relative;"><img src="https://s3.amazonaws.com/iotanalytics-templates/Logo.png" style="height:50px;width:50px;margin-top:-20px;position:absolute;margin-left:42%;"></div></div>