In [1]:
import pandas as pd
import requests
from datetime import datetime, timezone, timedelta

# Extraction

In [2]:
# Kedro Catalog
tangaras = catalog.load('tangaras')

2022-06-01 15:44:24,500 - kedro.io.data_catalog - INFO - Loading data from `tangaras` (CSVDataSet)...


In [3]:
# Tangaras
tangaras

Unnamed: 0,MAC,Label_ID,Geolocation,Status
0,D29ESP32DED36FA,Tangara_36FA,,Offline
1,D29ESP32DED1CE2,Tangara_1CE2,,Offline
2,D29ESP32DED1FCA,Tangara_1FCA,,Offline
3,D29ESP32DED14D6,Tangara_14D6,,Offline
4,D29ESP32DED2FF6,Tangara_2FF6,,Offline
5,D29ESP32DED2492,Tangara_2492,,Offline
6,D29TTGOT7D4D7A,Tangara_4D7A,,Offline
7,D29TTGOT7D48C6,CanAirIO_48C6,3.446018 -76.541824,Online
8,D29TTGOT7D532E,CanAirIO_532E,3.446018 -76.541824,Online


In [4]:
# Period time when sensors were validated on DAGMA stations
validation_period = catalog.load('parameters')['validation_period']
validation_period

2022-06-01 15:44:24,690 - kedro.io.data_catalog - INFO - Loading data from `parameters` (MemoryDataSet)...


{'start_datetime': '2022-04-01T00:00:00',
 'end_datetime': '2022-04-30T23:59:59'}

In [5]:
# Get Period Time
def get_period_time(start_datetime, end_datetime):
    start_datetime = int(datetime.fromisoformat(start_datetime).timestamp() * 1000)
    end_datetime = int(datetime.fromisoformat(end_datetime).timestamp() * 1000)
    return f"time >= {start_datetime}ms and time <= {end_datetime}ms"

In [6]:
# Get SQL Query Sensors
def get_sql_query_sensors(tangaras):
    sql_query = ""
    period_time = get_period_time(validation_period['start_datetime'], validation_period['end_datetime'])
    #period_time = "time >= now() - 1h and time <= now()"
    for mac in tangaras['MAC'].to_list():
        sql_query += "SELECT \"name\", last(\"pm25\") "\
                    "FROM \"fixed_stations_01\" WHERE "\
                    f"(\"name\" = '{mac}') AND "\
                    f"{period_time} " \
                    "GROUP BY time(30s) fill(none); "
    return sql_query[:-2]

In [7]:
# SQL Query by Sensor
sql_query = get_sql_query_sensors(tangaras)
#sql_query

In [8]:
# Request to InfluxDB API REST
def request_to_influxdb(sql_query):
    endpoint = "http://influxdb.canair.io:8086/query"
    database = "canairio"
    parameters = {
        'db': database,
        'q': sql_query,
        'epoch': 'ms'
    }
    return requests.get(endpoint, params=parameters)

In [9]:
# InfluxDB API REST Request
influxdb_api_request = request_to_influxdb(sql_query)
influxdb_api_request

<Response [200]>

In [10]:
# Get Data Frame Sensors
def get_df_sensors(tangaras, influxdb_api_request):
    df_sensors = []

    result_list = influxdb_api_request.json()['results']
    result_list = [value for value in result_list if 'series' in value]
    df_influxdb_sensors = pd.json_normalize(result_list, record_path=['series', 'values']).sort_values(by=[0])
    
    for index, row in tangaras.iterrows():
        df_sensor = df_influxdb_sensors.loc[df_influxdb_sensors[1] == row['MAC']].reset_index(drop=True)[[0, 2]] # Warning
        if not df_sensor.empty:
            df_sensor.rename(columns={0: 'Datetime', 2: row['Label_ID']}, inplace=True)
            df_sensor.set_index('Datetime', inplace=True)
            df_sensors.append(df_sensor)
    
    df_sensors = df_sensors[0].join(df_sensors[1:]).reset_index()

    tz = timezone(timedelta(hours=-5))
    df_sensors['Datetime'] = df_sensors['Datetime'].apply(lambda x: datetime.fromtimestamp(x / 1000, tz=tz).isoformat())

    df_sensors[df_sensors.columns.to_list()[1:]] = df_sensors[df_sensors.columns.to_list()[1:]].astype('Int64')
    
    return df_sensors

In [11]:
# Data Frame Sensors
df_sensors = get_df_sensors(tangaras, influxdb_api_request)
df_sensors.head()

Unnamed: 0,Datetime,Tangara_36FA,Tangara_1CE2,Tangara_1FCA,Tangara_14D6,Tangara_2FF6,Tangara_2492,Tangara_4D7A,CanAirIO_48C6,CanAirIO_532E
0,2022-04-01T00:00:00-05:00,6,6.0,6,5.0,6,,1,3.0,
1,2022-04-01T00:01:00-05:00,6,6.0,5,5.0,6,,1,3.0,
2,2022-04-01T00:01:30-05:00,6,,6,,6,,1,2.0,
3,2022-04-01T00:02:00-05:00,5,6.0,6,6.0,6,,1,,
4,2022-04-01T00:02:30-05:00,6,6.0,6,5.0,6,,1,3.0,


In [12]:
# Save df_sensors into Catalog
catalog.save('raw_data_sensors', df_sensors)

2022-06-01 15:44:40,302 - kedro.io.data_catalog - INFO - Saving data to `raw_data_sensors` (CSVDataSet)...
