In [30]:
from kedro.config import ConfigLoader
from kedro.framework.project import settings
from datetime import datetime
from influxdb_client import InfluxDBClient

### Getting Started with InfluxDB and Pandas
[Getting Started with InfluxDB and Pandas](https://www.influxdata.com/blog/getting-started-with-influxdb-and-pandas/)
### influxdb-client-python
[influxdb-client-python](https://github.com/influxdata/influxdb-client-python)
### Getting Started with Python and InfluxDB v2.0
[Getting Started with Python and InfluxDB v2.0](https://www.influxdata.com/blog/getting-started-with-python-and-influxdb-v2-0/)
### How to ingest DataFrame with default tags
[How to ingest DataFrame with default tags](https://github.com/influxdata/influxdb-client-python/blob/master/examples/ingest_dataframe_default_tags.py)
### How to ingest large DataFrame by splitting into chunks
[How to ingest large DataFrame by splitting into chunks](https://github.com/influxdata/influxdb-client-python/blob/master/examples/ingest_large_dataframe.py)

influx delete --bucket Tangara --start 1970-01-01T00:00:00Z --stop $(date +"%Y-%m-%dT%H:%M:%SZ") --predicate '_measurement="PM25"'
https://docs.influxdata.com/influxdb/cloud/reference/cli/influx/v1/dbrp/create/


In [33]:
# Load Parameters
parameters = catalog.load('parameters')

# Load Credentials
conf_path = str(context.project_path / settings.CONF_SOURCE)
conf_loader = ConfigLoader(conf_source=conf_path, env='local')
credentials = conf_loader.get('credentials*', 'credentials*/**')

print('Parameters:', parameters)
print('Credentials:', credentials)

2022-09-20 00:13:26,366 - kedro.io.data_catalog - INFO - Loading data from `parameters` (MemoryDataSet)...
Parameters: {'nowcast_datetime': '2022-09-06T13:35:00', 'influxdb_version': '2.x'}
Credentials: {'influxdb': {'url': 'http://localhost:8086', 'token': 'C5nfbXbV7sM0eVbpmpeO6sojZ8YNuETeZFzl9xFkq5FQCBtV1sbLpGCHGFuuu6JQQJwx-fHvHCUNccmSbXrUyA==', 'org': 'Tangara', 'bucket': 'Tangara', 'username': 'tangara', 'password': 'sebaxtian', 'database': 'Tangara'}}


In [3]:
# Function to Ingesting each Tangara Sensor DataFrame to InfluxDB
def ingesting_influxdb(data_sensors, measurement_name):
    # Check InfluxDB Version
    if parameters['influxdb_version'] == '2.x':
        # Secrets
        # You can generate an API token from the "API Tokens Tab" in the UI
        url = credentials['influxdb']['url']
        token = credentials['influxdb']['token']
        org = credentials['influxdb']['org']
        bucket = credentials['influxdb']['bucket']
    elif parameters['influxdb_version'] == '1.8':
        # Secrets
        url = credentials['influxdb']['url']
        username = credentials['influxdb']['username']
        password = credentials['influxdb']['password']
        token = f'{username}:{password}'
        database = credentials['influxdb']['database']
        retention_policy = 'autogen'
        bucket = f'{database}/{retention_policy}'
        org = credentials['influxdb']['org']
    # Update Datatype
    data_sensors[data_sensors.columns.to_list()[1:]] = data_sensors[data_sensors.columns.to_list()[1:]].astype('float64')
    print('data_sensors.dtypes: ', data_sensors.dtypes)
    # Ingesting each Tangara Sensor DataFrame to InfluxDB
    for column in data_sensors.columns[1:]:
        # For each Data Sensor
        tangara_X = data_sensors[['DATETIME', column]].copy()
        tangara_X['FIELD'] = column
        """
        Ingest DataFrame
        """
        #print()
        #print(f"=== Ingesting DataFrame {column} via batching API ===")
        #print()
        startTime = datetime.now()

        with InfluxDBClient(url=url, token=token, org=org) as client:
            """
            Use batching API
            """
            with client.write_api() as write_api:
                write_api.write(bucket=bucket, record=tangara_X,
                                data_frame_timestamp_column='DATETIME',
                                data_frame_tag_columns=['FIELD'],
                                data_frame_measurement_name=measurement_name,
                                data_frame_timestamp_timezone='America/Bogota')
                #print()
                #print(f"Wait to finishing ingesting DataFrame {column}...")
                #print()

        #print()
        #print(f'Import finished in: {datetime.now() - startTime}')
        #print()
        """
        Close client
        """
        client.close()


---

# Extraction - PM25 Raw

In [4]:
# Kedro Catalog
pm25_raw = catalog.load('pm25_raw')
# Data Frame Sensors
pm25_raw.head()

2022-09-19 23:52:19,566 - kedro.io.data_catalog - INFO - Loading data from `pm25_raw` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-18T23:52:00,8.0,19.0,,10.0,8.0,11.0,7.0,1.0,3.0,5.0,8.0,4.0,11.0
1,2022-09-18T23:52:30,8.0,16.0,,10.0,8.0,11.0,8.0,1.0,4.0,3.0,10.0,4.0,9.0
2,2022-09-18T23:53:00,8.0,12.0,,10.0,8.0,11.0,7.0,1.0,1.0,2.0,10.0,4.0,11.0
3,2022-09-18T23:53:30,7.0,15.0,,9.0,8.0,11.0,11.0,1.0,4.0,5.0,10.0,4.0,11.0
4,2022-09-18T23:54:00,8.0,21.0,,9.0,7.0,11.0,9.0,1.0,4.0,3.0,11.0,4.0,11.0


In [5]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(pm25_raw, 'PM25_RAW')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - PM25 Clean

In [6]:
# Kedro Catalog
pm25_clean = catalog.load('pm25_clean')
# Data Frame Sensors
pm25_clean.head()

2022-09-19 23:52:22,550 - kedro.io.data_catalog - INFO - Loading data from `pm25_clean` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-18T23:52:00,8.0,19.0,,10.0,8.0,11.0,7.0,1.0,3.0,5.0,8.0,4.0,11.0
1,2022-09-18T23:52:30,8.0,16.0,,10.0,8.0,11.0,8.0,1.0,4.0,3.0,10.0,4.0,9.0
2,2022-09-18T23:53:00,8.0,12.0,,10.0,8.0,11.0,7.0,1.0,1.0,2.0,10.0,4.0,11.0
3,2022-09-18T23:53:30,7.0,15.0,,9.0,8.0,11.0,11.0,1.0,4.0,5.0,10.0,4.0,11.0
4,2022-09-18T23:54:00,8.0,21.0,,9.0,7.0,11.0,9.0,1.0,4.0,3.0,11.0,4.0,11.0


In [7]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(pm25_clean, 'PM25_CLEAN')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - PM25 Last Hour

In [8]:
# Kedro Catalog
pm25_last_hour = catalog.load('pm25_last_hour')
# Data Frame Sensors
pm25_last_hour.head()

2022-09-19 23:52:25,565 - kedro.io.data_catalog - INFO - Loading data from `pm25_last_hour` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-19 00:51:30,6.076271,9.819444,,10.908257,7.789474,8.95614,4.517544,1.016949,3.40708,7.610169,6.136752,5.915254,8.285714
1,2022-09-19 01:51:30,3.958333,12.556818,,7.521008,7.333333,4.85,2.854701,1.141667,5.288288,8.591667,4.2,6.7,4.757143
2,2022-09-19 02:51:30,4.193277,13.776119,,9.837607,7.775862,7.092437,3.5,1.683761,6.196429,11.008403,5.245763,8.008403,5.681818
3,2022-09-19 03:51:30,4.948718,7.666667,,7.589744,8.534483,6.710526,4.094828,1.043103,3.791304,7.435897,6.12931,5.846154,5.245455
4,2022-09-19 04:51:30,5.2,8.798319,,8.372881,8.09322,8.589744,5.184874,1.016667,3.785714,6.840336,9.137255,5.516667,7.610619


In [9]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(pm25_last_hour, 'PM25_LAST_HOUR')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - PM25 Last 8 Hours

In [10]:
# Kedro Catalog
pm25_last_8h = catalog.load('pm25_last_8h')
# Data Frame Sensors
pm25_last_8h.head()

2022-09-19 23:52:27,300 - kedro.io.data_catalog - INFO - Loading data from `pm25_last_8h` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-19 07:51:30,6.152106,10.86936,,9.224244,8.305021,8.188831,5.408124,1.200403,4.600115,8.610642,10.391619,6.442252,7.994413
1,2022-09-19 15:51:30,6.798655,6.827996,5.719333,7.945797,7.253204,7.493729,6.249252,1.60975,6.044574,10.14989,9.115361,5.593139,8.782873
2,2022-09-19 23:51:30,4.257309,6.629342,4.778865,5.162093,4.221576,4.74734,3.847083,0.94409,3.809786,7.130846,6.217149,5.065084,5.244806


In [11]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(pm25_last_8h, 'PM25_LAST_8H')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - PM25 Last 12 Hours

In [12]:
# Kedro Catalog
pm25_last_12h = catalog.load('pm25_last_12h')
# Data Frame Sensors
pm25_last_12h.head()

2022-09-19 23:52:29,019 - kedro.io.data_catalog - INFO - Loading data from `pm25_last_12h` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-19 11:51:30,7.90201,10.902404,9.988214,10.010243,9.810661,9.946839,6.980244,1.542801,6.434268,11.989756,11.344565,7.558759,10.289156
1,2022-09-19 23:51:30,3.570037,5.315395,4.025134,4.877847,3.375873,3.673094,3.356062,0.960027,3.202049,5.271163,5.804854,4.005359,4.392238


In [13]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(pm25_last_12h, 'PM25_LAST_12H')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - PM25 Last 24 Hours

In [14]:
# Kedro Catalog
pm25_last_24h = catalog.load('pm25_last_24h')
# Data Frame Sensors
pm25_last_24h.head()

2022-09-19 23:52:30,721 - kedro.io.data_catalog - INFO - Loading data from `pm25_last_24h` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-19 23:51:30,5.736023,8.108899,5.21775,7.444045,6.593267,6.809966,5.168153,1.251414,4.818158,8.63046,8.57471,5.704811,7.340697


In [15]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(pm25_last_24h, 'PM25_LAST_24H')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


---

# Extraction - AQI Instant

In [16]:
# Kedro Catalog
aqi_instant = catalog.load('aqi_instant')
# Data Frame Sensors
aqi_instant.head()

2022-09-19 23:52:32,517 - kedro.io.data_catalog - INFO - Loading data from `aqi_instant` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-18T23:52:00,34.0,66.0,,42.0,34.0,46.0,30.0,5.0,13.0,21.0,34.0,17.0,46.0
1,2022-09-18T23:52:30,34.0,60.0,,42.0,34.0,46.0,34.0,5.0,17.0,13.0,42.0,17.0,38.0
2,2022-09-18T23:53:00,34.0,50.0,,42.0,34.0,46.0,30.0,5.0,5.0,9.0,42.0,17.0,46.0
3,2022-09-18T23:53:30,30.0,58.0,,38.0,34.0,46.0,46.0,5.0,17.0,21.0,42.0,17.0,46.0
4,2022-09-18T23:54:00,34.0,70.0,,38.0,30.0,46.0,38.0,5.0,17.0,13.0,46.0,17.0,46.0


In [17]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(aqi_instant, 'AQI_INSTANT')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - AQI Last Hour

In [18]:
# Kedro Catalog
aqi_last_hour = catalog.load('aqi_last_hour')
# Data Frame Sensors
aqi_last_hour.head()

2022-09-19 23:52:35,725 - kedro.io.data_catalog - INFO - Loading data from `aqi_last_hour` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-19 00:51:30,26,41,,46,33,38,19,5,15,32,26,25.0,35
1,2022-09-19 01:51:30,17,53,,32,31,20,13,5,23,36,18,28.0,20
2,2022-09-19 02:51:30,18,55,,41,33,30,15,8,26,46,22,34.0,24
3,2022-09-19 03:51:30,21,33,,32,36,28,18,5,16,31,26,25.0,22
4,2022-09-19 04:51:30,22,37,,36,34,36,22,5,16,29,38,23.0,32


In [19]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(aqi_last_hour, 'AQI_LAST_HOUR')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - AQI Last 8 Hours

In [20]:
# Kedro Catalog
aqi_last_8h = catalog.load('aqi_last_8h')
# Data Frame Sensors
aqi_last_8h.head()

2022-09-19 23:52:37,391 - kedro.io.data_catalog - INFO - Loading data from `aqi_last_8h` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-19 07:51:30,26,46,,39,35,35,23,5,20,36,44,27,34
1,2022-09-19 15:51:30,29,29,24.0,33,31,32,26,7,25,43,38,24,37
2,2022-09-19 23:51:30,18,28,20.0,22,18,20,16,4,16,30,26,22,22


In [21]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(aqi_last_8h, 'AQI_LAST_8H')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - AQI Last 12 Hours

In [22]:
# Kedro Catalog
aqi_last_12h = catalog.load('aqi_last_12h')
# Data Frame Sensors
aqi_last_12h.head()

2022-09-19 23:52:39,124 - kedro.io.data_catalog - INFO - Loading data from `aqi_last_12h` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-19 11:51:30,33,46,42,42,41,42,30,7,27,50,48,32,43
1,2022-09-19 23:51:30,16,23,17,21,15,16,15,5,14,23,25,17,19


In [23]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(aqi_last_12h, 'AQI_LAST_12H')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object


# Extraction - AQI Last 24 Hours

In [24]:
# Kedro Catalog
aqi_last_24h = catalog.load('aqi_last_24h')
# Data Frame Sensors
aqi_last_24h.head()

2022-09-19 23:52:40,883 - kedro.io.data_catalog - INFO - Loading data from `aqi_last_24h` (CSVDataSet)...


Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-19 23:51:30,24,34,22,31,28,29,22,6,20,36,36,24,31


In [25]:
# Ingesting each Tangara Sensor DataFrame to InfluxDB
ingesting_influxdb(aqi_last_24h, 'AQI_LAST_24H')

data_sensors.dtypes:  DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object
