In [1]:
import pandas as pd
from datetime import datetime, timezone, timedelta
import requests
from io import StringIO
import geohash2

## Extraction Tangara Stations

In [2]:
# Get start and nowcast Timestamp
def get_start_nowcast_timestamp(nowcast_datetime, start_datetime = None):
    #
    # Please nowcast_datetime must be: NOWCAST_DATETIME=$(TZ='America/Bogota' date '+%Y-%m-%dT%H:%M:%S')
    # Check the script bash: run.sh
    #

    # NowCast DateTime
    nowcast_datetime = datetime.fromisoformat(nowcast_datetime)
    print('-------------nowcast_datetime-------------->>>> ', nowcast_datetime)

    # Start DateTime
    if not start_datetime:
        # Start DateTime last 24 hours
        start_datetime = datetime.fromisoformat((nowcast_datetime - timedelta(hours=24)).isoformat())
    else:
        # Start DateTime
        start_datetime = datetime.fromisoformat(start_datetime)
    print('-------------start_datetime-------------->>>> ', start_datetime)

    # Current DateTime
    nowcast_timestamp = int(nowcast_datetime.timestamp() * 1000)
    # Start DateTime
    start_timestamp = int(start_datetime.timestamp() * 1000)

    print('-------------nowcast_timestamp-------------->>>> ', nowcast_timestamp)
    print('-------------start_timestamp-------------->>>> ', start_timestamp)

    return [start_timestamp, nowcast_timestamp]

In [3]:
# Time Period to Calculate NowCast AQI
#
# AQICN: https://aqicn.org/faq/2015-03-15/air-quality-nowcast-a-beginners-guide/
# AirNow: http://airnow.gov/
# NowCast: https://en.wikipedia.org/wiki/NowCast_(air_quality_index)
#
#
# Pipeline Parameters
parameters = catalog.load('parameters')
# Current DateTime
nowcast_datetime = parameters['nowcast_datetime']
# Start DateTime, optional
start_datetime = parameters['start_datetime']

# Get start and nowcast Timestamp
start_timestamp, nowcast_timestamp = get_start_nowcast_timestamp(nowcast_datetime, start_datetime)

print(
    ' nowcast_datetime:', nowcast_datetime, '\n',
    'nowcast_timestamp:', nowcast_timestamp, '\n',
    'start_datetime:', start_datetime, '\n',
    'start_timestamp:', start_timestamp
)

2022-09-29 18:59:25,156 - kedro.io.data_catalog - INFO - Loading data from `parameters` (MemoryDataSet)...
-------------nowcast_datetime-------------->>>>  2022-09-15 00:00:00-05:00
-------------start_datetime-------------->>>>  2022-09-14 00:00:00-05:00
-------------nowcast_timestamp-------------->>>>  1663218000000
-------------start_timestamp-------------->>>>  1663131600000
 nowcast_datetime: 2022-09-15T00:00:00-05:00 
 nowcast_timestamp: 1663218000000 
 start_datetime: None 
 start_timestamp: 1663131600000


In [4]:
# Request to InfluxDB API REST
def request_to_influxdb(sql_query):
    endpoint = "http://influxdb.canair.io:8086/query"
    database = "canairio"
    parameters = {
        'db': database,
        'q': sql_query,
        'epoch': 'ms'
    }
    # To get response as CSV text
    headers = {'Accept': 'application/csv'}
    # GET Request
    return requests.get(endpoint, params=parameters, headers=headers)

In [5]:
# Get SQL Query Tangara Stations
def get_sql_query_tangara_stations(start_datetime, end_datetime):
    sql_query = ""
    # Period DateTime
    period_time = f"time >= {start_datetime}ms AND time <= {end_datetime}ms"
    # SQL
    sql_query = "SELECT DISTINCT(geo) AS \"geohash\" "\
                "FROM \"fixed_stations_01\" WHERE "\
                    "(\"geo3\" = 'd29') AND "\
                    f"{period_time} "\
                "GROUP BY \"name\";"
    return sql_query

In [6]:
# Get SQL Query Data Measurement
def get_sql_query_measurement(tangaras, measurement, start_datetime, end_datetime):
    sql_query = ""
    # Period DateTime
    period_time = f"time >= {start_datetime}ms AND time <= {end_datetime}ms"
    # SQL
    for mac in tangaras['MAC'].to_list():
        sql_query += "SELECT \"name\", "\
                    f"last(\"{measurement}\") "\
                    "FROM \"fixed_stations_01\" WHERE "\
                    f"(\"name\" = '{mac}') AND "\
                    f"{period_time} " \
                    "GROUP BY time(30s) fill(none); "
    return sql_query[:-2]

In [7]:
# Get Data Frame Measurement
def get_df_measurement(tangaras, measurement, start_datetime, end_datetime):
    # Data Frame Sensors
    df_sensors = []
    # SQL Query Data Sensors
    sql_query = get_sql_query_measurement(tangaras, measurement, start_datetime, end_datetime)
    # InfluxDB API REST Request
    influxdb_api_request = request_to_influxdb(sql_query)
    #print(influxdb_api_request)
    df_influxdb_api_sensors = pd.read_csv(StringIO(influxdb_api_request.text), sep=",")

    # Remove/Add Columns
    df_influxdb_api_sensors = df_influxdb_api_sensors[['time', 'name.1', 'last']]
    df_influxdb_api_sensors.rename(columns={'time': 'DATETIME', 'name.1': 'MAC', 'last': measurement.upper()}, inplace=True)

    # Truncate Response
    for index, row in tangaras.iterrows():
        df_sensor = df_influxdb_api_sensors.loc[df_influxdb_api_sensors['MAC'] == row['MAC']].reset_index(drop=True)[['DATETIME', measurement.upper()]] # Warning
        if not df_sensor.empty:
            df_sensor.rename(columns={measurement.upper(): row['ID']}, inplace=True)
            df_sensor.set_index('DATETIME', inplace=True)
            df_sensors.append(df_sensor)
    
    # Join Data Frames
    df_sensors = df_sensors[0].join(df_sensors[1:]).reset_index()

    # Update datetime
    tz = timezone(timedelta(hours=-5))
    df_sensors['DATETIME'] = df_sensors['DATETIME'].apply(lambda x: datetime.fromtimestamp(int(x) / 1000, tz=tz).isoformat())
    
    # Update dtype
    df_sensors[df_sensors.columns.to_list()[1:]] = df_sensors[df_sensors.columns.to_list()[1:]].astype('float64')
    
    return df_sensors

In [8]:
# Get Data Frame Tangara Stations
def get_tangara_stations(start_datetime, end_datetime):
    # SQL Query Tangaras
    sql_query = get_sql_query_tangara_stations(start_datetime, end_datetime)
    # InfluxDB API REST Request
    influxdb_api_request = request_to_influxdb(sql_query)
    #print(influxdb_api_request)
    tangara_stations = pd.read_csv(StringIO(influxdb_api_request.text), sep=",")
    
    # Remove/Add Columns
    tangara_stations = tangara_stations[['tags', 'geohash']]
    tangara_stations['MAC'] = tangara_stations['tags'].apply(lambda x: x.split('=')[1])
    tangara_stations['GEOLOCATION'] = tangara_stations['geohash'].apply(lambda x: " ".join(f'{value:.8f}' for value in list(geohash2.decode_exactly(x)[0:2])))
    tangara_stations['LATITUDE'] = tangara_stations['GEOLOCATION'].apply(lambda x: x.split(' ')[0])
    tangara_stations['LONGITUDE'] = tangara_stations['GEOLOCATION'].apply(lambda x: x.split(' ')[1])
    tangara_stations['tags'] = tangara_stations['tags'].apply(lambda x: f"TANGARA_{x[-4:]}")
    tangara_stations.rename(columns={'tags': 'ID', 'geohash': 'GEOHASH'}, inplace=True)
    tangara_stations['GEOREGION'] = tangara_stations['GEOHASH'].apply(lambda x: x[:3])

    # Date time when query is executed
    tz = timezone(timedelta(hours=-5))
    tangara_stations['DATETIME'] = datetime.now(tz=tz).isoformat()

    # Reorder Columns
    tangara_stations = tangara_stations[['DATETIME', 'ID', 'MAC', 'GEOHASH', 'GEOREGION', 'GEOLOCATION', 'LATITUDE', 'LONGITUDE']]
    
    # Update dtype
    tangara_stations[['LATITUDE', 'LONGITUDE']] = tangara_stations[['LATITUDE', 'LONGITUDE']].astype('float64')

    return tangara_stations

In [9]:
# Data Frame Tangara Stations
tangara_stations = get_tangara_stations(start_timestamp, nowcast_timestamp)
tangara_stations.head()

Unnamed: 0,DATETIME,ID,MAC,GEOHASH,GEOREGION,GEOLOCATION,LATITUDE,LONGITUDE
0,2022-09-29T18:59:31.060440-05:00,TANGARA_2BBA,D29ESP32DE02BBA,d29e6b4,d29,3.38447571 -76.51634216,3.384476,-76.516342
1,2022-09-29T18:59:31.060440-05:00,TANGARA_14D6,D29ESP32DED14D6,d29dfx4,d29,3.33503723 -76.52732849,3.335037,-76.527328
2,2022-09-29T18:59:31.060440-05:00,TANGARA_1CE2,D29ESP32DED1CE2,d29e4cv,d29,3.35014343 -76.51222229,3.350143,-76.512222
3,2022-09-29T18:59:31.060440-05:00,TANGARA_1FCA,D29ESP32DED1FCA,d29e48s,d29,3.34327698 -76.52458191,3.343277,-76.524582
4,2022-09-29T18:59:31.060440-05:00,TANGARA_2492,D29ESP32DED2492,d29e64g,d29,3.39958191 -76.54792786,3.399582,-76.547928


In [9]:
# Check Data Types
tangara_stations.dtypes

DATETIME        object
ID              object
MAC             object
GEOHASH         object
GEOLOCATION     object
LATITUDE       float64
LONGITUDE      float64
dtype: object

In [10]:
# Describe Data
tangara_stations.describe()

Unnamed: 0,LATITUDE,LONGITUDE
count,13.0,13.0
mean,3.408878,-76.527328
std,0.046642,0.016191
min,3.335037,-76.547928
25%,3.384476,-76.542435
50%,3.399582,-76.527328
75%,3.446274,-76.516342
max,3.484726,-76.49437


In [11]:
# Save Data Frame into Catalog
catalog.save('tangara_stations', tangara_stations)

2022-09-21 22:26:07,956 - kedro.io.data_catalog - INFO - Saving data to `tangara_stations` (CSVDataSet)...


## Extraction - PM25 Raw

In [12]:
# SQL Query Data Measurement
sql_query = get_sql_query_measurement(tangara_stations, 'pm25', start_timestamp, nowcast_timestamp)
# print(sql_query)

In [13]:
# Data Frame PM25 Raw
pm25_raw = get_df_measurement(tangara_stations, 'pm25', start_timestamp, nowcast_timestamp)
pm25_raw.head()

Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-05T13:35:00-05:00,21.0,20.0,24.0,26.0,25.0,26.0,16.0,7.0,21.0,34.0,26.0,25.0,25.0
1,2022-09-05T13:35:30-05:00,20.0,20.0,23.0,25.0,24.0,25.0,15.0,7.0,19.0,42.0,26.0,25.0,26.0
2,2022-09-05T13:36:00-05:00,20.0,20.0,24.0,166.0,23.0,26.0,16.0,8.0,18.0,39.0,27.0,25.0,24.0
3,2022-09-05T13:36:30-05:00,21.0,20.0,24.0,64.0,25.0,24.0,16.0,8.0,19.0,36.0,29.0,25.0,24.0
4,2022-09-05T13:37:00-05:00,22.0,22.0,24.0,49.0,26.0,26.0,14.0,7.0,19.0,39.0,29.0,26.0,23.0


In [14]:
# Check Data Types
pm25_raw.dtypes

DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object

In [15]:
# Describe Data
pm25_raw.describe()

Unnamed: 0,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
count,2838.0,2747.0,2796.0,842.0,2779.0,2801.0,2807.0,2763.0,2692.0,2755.0,2770.0,2739.0,2793.0
mean,21.417195,24.116491,25.281474,38.68171,27.880533,26.601928,16.470609,5.638075,16.235884,32.560436,28.472924,21.766338,25.704261
std,5.746803,5.982577,6.98128,59.070996,11.151621,5.989012,6.511554,2.852142,8.509186,20.8694,12.251082,15.567362,5.954451
min,10.0,12.0,12.0,23.0,13.0,14.0,0.0,2.0,3.0,9.0,11.0,9.0,15.0
25%,17.0,20.0,21.0,27.0,23.0,22.0,11.0,3.0,8.0,15.0,17.0,11.0,21.0
50%,21.0,23.0,25.0,29.0,26.0,26.0,17.0,5.0,15.0,29.0,29.0,20.0,25.0
75%,25.0,28.0,30.0,31.0,31.0,30.0,21.0,8.0,24.0,48.0,39.0,31.0,30.0
max,44.0,72.0,56.0,1110.0,182.0,42.0,60.0,12.0,86.0,583.0,170.0,449.0,51.0


In [16]:
# Save Data Frame into Catalog
catalog.save('pm25_raw', pm25_raw)

2022-09-21 22:26:11,089 - kedro.io.data_catalog - INFO - Saving data to `pm25_raw` (CSVDataSet)...


---

## Extraction - Temp Raw

In [17]:
# SQL Query Data Measurement
sql_query = get_sql_query_measurement(tangara_stations, 'tmp', start_timestamp, nowcast_timestamp)
# print(sql_query)

In [18]:
# Data Frame Temp Raw
temp_raw = get_df_measurement(tangara_stations, 'tmp', start_timestamp, nowcast_timestamp)
temp_raw.head()

Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-05T13:35:00-05:00,33.74,30.6,32.23,30.87,33.07,32.86,33.2,33.05,0.0,33.3,35.3,35.35,33.22
1,2022-09-05T13:35:30-05:00,33.8,30.55,32.26,30.86,33.05,32.86,33.2,33.05,0.0,33.29,35.28,35.37,33.23
2,2022-09-05T13:36:00-05:00,33.89,30.59,32.31,30.9,33.07,32.89,33.3,33.04,0.0,33.34,35.29,35.39,33.24
3,2022-09-05T13:36:30-05:00,33.99,30.61,32.45,30.91,33.08,32.98,33.3,33.04,0.0,33.39,35.28,35.36,33.23
4,2022-09-05T13:37:00-05:00,34.1,30.61,32.45,30.92,33.05,33.01,33.4,33.07,0.0,33.37,35.25,35.43,33.29


In [19]:
# Check Data Types
temp_raw.dtypes

DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object

In [20]:
# Describe Data
temp_raw.describe()

Unnamed: 0,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
count,2838.0,2747.0,2796.0,842.0,2779.0,2801.0,2807.0,2763.0,2692.0,2755.0,2770.0,2739.0,2793.0
mean,30.240236,28.9302,28.751513,30.536401,31.692918,30.167072,30.200819,31.922617,0.0,29.610613,31.464451,31.459273,30.686889
std,4.05327,1.802855,3.049627,1.327256,2.339783,2.727612,3.287388,1.388718,0.0,3.818666,2.73984,4.073062,2.328387
min,24.6,25.94,24.54,28.09,28.25,26.57,25.1,29.89,0.0,25.12,27.73,26.63,26.93
25%,26.66,27.22,26.06,29.43,30.01,27.43,27.5,30.71,0.0,26.49,28.75,28.12,28.56
50%,29.17,29.03,28.095,30.96,31.82,29.51,29.1,31.77,0.0,27.69,31.465,29.4,30.6
75%,34.34,30.34,31.39,31.72,32.87,32.75,33.1,33.07,0.0,33.475,34.25,35.49,32.78
max,38.29,32.13,34.89,32.7,37.78,35.27,37.9,34.47,0.0,37.28,36.25,39.65,34.79


In [21]:
# Save Data Frame into Catalog
catalog.save('temp_raw', temp_raw)

2022-09-21 22:26:14,289 - kedro.io.data_catalog - INFO - Saving data to `temp_raw` (CSVDataSet)...


---

## Extraction - Hum Raw

In [22]:
# SQL Query Data Measurement
sql_query = get_sql_query_measurement(tangara_stations, 'hum', start_timestamp, nowcast_timestamp)
# print(sql_query)

In [23]:
# Data Frame Hum Raw
hum_raw = get_df_measurement(tangara_stations, 'hum', start_timestamp, nowcast_timestamp)
hum_raw.head()

Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-05T13:35:00-05:00,0.0,50.66,46.74,46.71,43.14,45.35,51.1,45.6,0.0,38.22,38.62,37.81,42.98
1,2022-09-05T13:35:30-05:00,0.0,50.72,46.89,46.96,43.14,45.33,51.0,45.89,0.0,38.19,38.63,37.75,42.86
2,2022-09-05T13:36:00-05:00,0.0,50.74,46.68,47.85,43.36,45.23,50.9,45.75,0.0,38.23,38.74,37.74,43.54
3,2022-09-05T13:36:30-05:00,0.0,50.67,47.09,47.66,43.58,45.56,50.8,45.75,0.0,38.11,38.7,37.77,40.57
4,2022-09-05T13:37:00-05:00,0.0,50.68,47.09,47.61,43.28,44.93,50.8,45.38,0.0,38.11,38.89,37.76,41.65


In [24]:
# Check Data Types
hum_raw.dtypes

DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object

In [25]:
# Describe Data
hum_raw.describe()

Unnamed: 0,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
count,2838.0,2747.0,2796.0,842.0,2779.0,2801.0,2807.0,2763.0,2692.0,2755.0,2770.0,2739.0,2793.0
mean,0.0,57.19846,57.591388,49.099394,47.671925,53.878322,63.328963,49.962486,0.0,50.10506,48.429809,48.375294,49.312854
std,0.0,5.905023,9.523785,4.939122,4.576806,7.479805,12.036714,3.129195,0.0,9.764171,7.653289,8.894346,6.435545
min,0.0,46.75,39.92,43.46,35.67,41.89,43.1,43.44,0.0,32.76,36.31,32.66,38.87
25%,0.0,53.34,49.1575,45.37,44.99,45.81,51.7,47.045,0.0,38.48,40.7625,37.855,43.03
50%,0.0,56.53,59.82,46.705,48.08,55.65,63.9,50.31,0.0,54.76,46.825,52.74,49.73
75%,0.0,62.85,66.9425,52.335,51.11,61.17,73.5,52.65,0.0,58.51,55.7375,56.01,55.56
max,0.0,66.36,70.64,60.0,56.24,65.81,85.4,54.26,0.0,62.37,60.75,59.32,61.32


In [26]:
# Save Data Frame into Catalog
catalog.save('hum_raw', hum_raw)

2022-09-21 22:26:18,008 - kedro.io.data_catalog - INFO - Saving data to `hum_raw` (CSVDataSet)...


---

## Extraction - CO2 Raw

In [27]:
# SQL Query Data Measurement
sql_query = get_sql_query_measurement(tangara_stations, 'co2', start_timestamp, nowcast_timestamp)
# print(sql_query)

In [28]:
# Data Frame CO2 Raw
co2_raw = get_df_measurement(tangara_stations, 'co2', start_timestamp, nowcast_timestamp)
co2_raw.head()

Unnamed: 0,DATETIME,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
0,2022-09-05T13:35:00-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,449.0,338.0,171.0
1,2022-09-05T13:35:30-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,445.0,345.0,172.0
2,2022-09-05T13:36:00-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,447.0,346.0,172.0
3,2022-09-05T13:36:30-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,451.0,345.0,170.0
4,2022-09-05T13:37:00-05:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,458.0,344.0,169.0


In [29]:
# Check Data Types
co2_raw.dtypes

DATETIME         object
TANGARA_2BBA    float64
TANGARA_14D6    float64
TANGARA_1CE2    float64
TANGARA_1FCA    float64
TANGARA_2492    float64
TANGARA_2FF6    float64
TANGARA_48C6    float64
TANGARA_4D7A    float64
TANGARA_532E    float64
TANGARA_EA06    float64
TANGARA_F1AE    float64
TANGARA_FAC6    float64
TANGARA_06BE    float64
dtype: object

In [30]:
# Describe Data
co2_raw.describe()

Unnamed: 0,TANGARA_2BBA,TANGARA_14D6,TANGARA_1CE2,TANGARA_1FCA,TANGARA_2492,TANGARA_2FF6,TANGARA_48C6,TANGARA_4D7A,TANGARA_532E,TANGARA_EA06,TANGARA_F1AE,TANGARA_FAC6,TANGARA_06BE
count,2838.0,2747.0,2796.0,842.0,2779.0,2801.0,2807.0,2763.0,2692.0,2755.0,2770.0,2739.0,2793.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,398.905054,334.679445,198.188686
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,38.390384,22.251702,21.817437
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,357.0,300.0,55.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,375.0,315.0,185.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,387.0,332.0,199.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,410.0,349.0,210.0
max,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,755.0,443.0,277.0


In [31]:
# Save Data Frame into Catalog
catalog.save('co2_raw', co2_raw)

2022-09-21 22:26:21,316 - kedro.io.data_catalog - INFO - Saving data to `co2_raw` (CSVDataSet)...


---

## Extraction - X Raw