# Datalab Flow Retrieval and Visualization

This simple notebook provides a template for retrieving flow data using eQL, visualizing the results and saving as a .csv

### First we import our notebook tools

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from energyworx_client.client import EWX

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

### Next, pick your namespace

In [2]:
namespace = 'na.engie.com'
api = EWX(namespace)

### Define your flow query

Note that you need to add your desired flow ID into the query. The flow ID is easily found using the console (click the dropdown tab for the flow details).

In [3]:
#example of extracting usage
#flow_query = "SELECT STRUCT(timestamp, STRUCT(flow_id, ARRAY[STRUCT(channel_classifier_id, value, \
#              ARRAY(SELECT AS STRUCT annotation, sequence_id, ARRAY_AGG(STRUCT(key, value))))] AS channel) AS flow) AS row \
#              FROM flows WHERE flow_id IN ('{flow_id}') \
#              AND timestamp > '{start_timestamp}' AND timestamp <= '{end_timestamp}' \
#              GROUP BY timestamp, flow_id, channel_classifier_id, value ORDER BY timestamp, flow_id "
#        
#timeseries_df = api.execute_query(flow_query.format( flow_id='7cd20c8e088a48c6a65e3dcee069d3b0', start_timestamp='2018-01-01T00:00:00', end_timestamp='2030-01-01T00:00:00')
#, limit=50000)
#timeseries_df.head()`

#documentation for api.execute_query()
#print(api.execute_query.__doc__) 



In [3]:
import datetime

timeseries_df = []

#example of extracting latest datasource ids 
flow_query = "Select datasource_id,flow_timestamp,flow_type,channel_classifier_id,flow_id from flow_metadata where flow_timestamp > '2018-01-01T00:00:00' ORDER BY flow_timestamp"
timeseries_df = api.execute_query(flow_query,limit = 10, raw_result = True)

#process resulting dictionary
kdatasource  = pd.DataFrame()
LENK = len(list(timeseries_df['rows']))
for k in range(0,LENK):
    kdatasource.loc[k,"datasource_id"] = list(timeseries_df['rows'])[k]['f'][0]['v']
    ktimestamp = list(timeseries_df['rows'])[k]['f'][1]['v']
    kdate = datetime.datetime.fromtimestamp(float(ktimestamp))
    kdatasource.loc[k,"flow_timestamp"] =  kdate
    kdatasource.loc[k,"flow_type"] =  list(timeseries_df['rows'])[k]['f'][2]['v']
    kdatasource.loc[k,"channel_classifier_id"] =  list(timeseries_df['rows'])[k]['f'][3]['v']
    kdatasource.loc[k,"flow_id"] =  list(timeseries_df['rows'])[k]['f'][4]['v']
kdatasource.head()


Unnamed: 0,datasource_id,flow_timestamp,flow_type,channel_classifier_id,flow_id
0,NEPOOL_CLP_51936044007_533192008,2018-01-02 13:31:29.384260,scenario,DELIVERY_IDR,548d4d2c36eb422087efbf010515d70a
1,NEPOOL_WMECO_54472602028_018741009,2018-01-02 13:36:25.125888,scenario,DELIVERY_SCALAR,74412323c4f24267a1e851cd176a66a8
2,ERCOT_ONCOR_10443720000437140,2018-01-02 13:55:02.645628,scenario,DELIVERY_SCALAR,dff0af511dc94df4b6bcd73b7a87a723
3,ERCOT_ONCOR_10443720001104315,2018-01-02 13:55:15.721216,scenario,DELIVERY_SCALAR,a4a1966e008e40afb0758f328743bfc8
4,ERCOT_ONCOR_10443720001003229,2018-01-02 14:08:00.754024,scenario,DELIVERY_IDR,26186a8ef9db4f619acb972010f1599c


In [7]:
flow_query = "SELECT timestamp, ARRAY[STRUCT(channel AS channel_classifier, value AS value)] AS raw \
              FROM INGEST WHERE timestamp > '2016-10-08T10:00:00' AND timestamp <= '2018-09-14T00:00:01' AND datasource_id IN ('KADW') AND channel_classifier_id IN ('AIRTEMPERATURE') \
              GROUP BY timestamp, channel, value ORDER BY timestamp asc "

timeseries_df = api.execute_query(flow_query)

HttpError: <HttpError 404 when requesting https://ewx-live.appspot.com/_ah/api/ewx/v1/query/execute?query=SELECT+timestamp%2C+ARRAY%5BSTRUCT%28channel+AS+channel_classifier%2C+value+AS+value%29%5D+AS+raw+++++++++++++++FROM+INGEST+WHERE+timestamp+%3E+%272016-10-08T10%3A00%3A00%27+AND+timestamp+%3C%3D+%272018-09-14T00%3A00%3A01%27+AND+datasource_id+IN+%28%27KADW%27%29+AND+channel_classifier_id+IN+%28%27AIRTEMPERATURE%27%29+++++++++++++++GROUP+BY+timestamp%2C+channel%2C+value+ORDER+BY+timestamp+asc+&alt=json&limit=10 returned "Datasource with id KADW was not found">

In [51]:

timeseries_df

Unnamed: 0,AIRTEMPERATURE
2016-10-08 10:00:00,17.22
2016-10-08 11:00:00,17.22
2016-10-08 12:00:00,17.22
2016-10-08 13:00:00,17.78
2016-10-08 14:00:00,17.78
2016-10-08 15:00:00,18.33
2016-10-08 16:00:00,18.33
2016-10-08 17:00:00,18.89
2016-10-08 18:00:00,17.78
2016-10-08 19:00:00,18.33


### Select your desired column(s) and visualize

In [8]:
timeseries_df = timeseries_df.loc[:, ['FORECAST']].dropna()
fig, ax = plt.subplots(1, figsize=(20, 8))
timeseries_df.plot(ax=ax)
plt.xticks(rotation=25);

AttributeError: 'dict' object has no attribute 'loc'

### Zoom in to a particular time period

In [None]:
fig, ax = plt.subplots(1, figsize=(20, 8))
timeseries_df.loc['2026-01-01':'2029-01-01', :].plot(ax=ax)
plt.xticks(rotation=25);

### Name your file and save as .csv
The file will be available in your directory and can be downloaded to your device

In [None]:
to_csv(timeseries_df, "account_forecast.csv")

In [1]:
import datetime

namespace = 'na.engie.com'
api = EWX(namespace=namespace)

timeseries_df = []

#example of extracting latest datasource ids 
flow_query = "Select datasource_id,flow_timestamp,flow_type,channel_classifier_id from flow_metadata where flow_timestamp > '2018-01-01T00:00:00' ORDER BY flow_timestamp"
timeseries_df = api.execute_query(flow_query,limit = 10, raw_result = True)
