In [1]:
import os
import math
from dotenv import load_dotenv

# Explicitly providing path to '.env'
from pathlib import Path  # Python 3.6+ only
# Load .env variables
_ = load_dotenv(dotenv_path=f"{Path().resolve().parents[1]}/standalone/.env")

import import_ipynb
from utils import to_timestamp, df_data_sensors, to_be_checked, is_corr_ok, df_to_csv, df_from_csv

importing Jupyter notebook from utils.ipynb
importing Jupyter notebook from aqi_epa_pm25.ipynb
PM2.5: 35.9, AQI: 102
PM2.5: 35.9, Measure Level: MeasureLevels.UNHEALTHY_FOR_SENSITIVE_GROUPS, Range Values: Min: 35.5, Max: 55.4
AQI: 102, Measure Level: MeasureLevels.UNHEALTHY_FOR_SENSITIVE_GROUPS, Range Values: Min: 101, Max: 150


## Humidity Raw Data

In [2]:
# Load Tangaras
df_tangaras = df_from_csv("tangaras.csv", dtindex=False)

print(f"Total Tangara Sensors: {len(df_tangaras)}")

df_tangaras.head()

Total Tangara Sensors: 15


Unnamed: 0,ID,GEOHASH,MAC,GEOLOCATION,LATITUDE,LONGITUDE
0,TANGARA_2BBA,d29e6b4,D29ESP32DE02BBA,3.3844757080078125 -76.51634216308594,3.384476,-76.516342
1,TANGARA_25CE,d29e4r0,D29ESP32DE125CE,3.3789825439453125 -76.54106140136719,3.378983,-76.541061
2,TANGARA_260A,d29edyj,D29ESP32DE1260A,3.4613800048828125 -76.51222229003906,3.46138,-76.512222
3,TANGARA_4B1A,d29esj8,D29ESP32DE94B1A,3.4586334228515625 -76.46415710449219,3.458633,-76.464157
4,TANGARA_14D6,d29eg4k,D29ESP32DED14D6,3.4847259521484375 -76.50260925292969,3.484726,-76.502609


In [3]:
# Start Date Time ISO 8601 Format, TZ='America/Bogota' -05:00
START_ISO8601_DATETIME = os.getenv("START_ISO8601_DATETIME", None)
start_timestamp = to_timestamp(START_ISO8601_DATETIME)
# End Date Time ISO 8601 Format, TZ='America/Bogota' -05:00
END_ISO8601_DATETIME = os.getenv("END_ISO8601_DATETIME", None)
end_timestamp = to_timestamp(os.getenv("END_ISO8601_DATETIME", None))

# GROUP BY TIME
GROUP_BY_TIME = os.getenv("GROUP_BY_TIME", None)

print(f'Since: {START_ISO8601_DATETIME} -> {start_timestamp}, Until: {END_ISO8601_DATETIME} -> {end_timestamp}')
print(f"Group by Time: {GROUP_BY_TIME}")

Since: 2023-10-01T22:58:24-05:00 -> 1696219104000, Until: 2023-10-06T22:58:24-05:00 -> 1696651104000
Group by Time: 30s


In [4]:
# Humidity Data Frame Sensors
df_hum_raw = df_data_sensors(df_tangaras, start_timestamp, end_timestamp, 'hum', GROUP_BY_TIME)
df_hum_raw.head()
#df_hum_raw.shape

Unnamed: 0_level_0,TANGARA_2BBA,TANGARA_25CE,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_1EE6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_1712,TANGARA_48C6,TANGARA_532E,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2023-10-01 22:58:00-05:00,0.0,,,66.34,,,,,,,,,,,61.61
2023-10-01 22:58:30-05:00,0.0,,58.78,66.43,56.11,51.86,62.91,59.23,69.81,62.05,68.38,99.9,92.9,,61.58
2023-10-01 22:59:00-05:00,0.0,,58.68,66.27,56.13,51.85,62.97,58.93,69.83,62.03,68.19,99.9,92.9,,61.51
2023-10-01 22:59:30-05:00,0.0,,,66.3,56.27,51.87,63.02,59.18,69.88,61.99,68.45,99.9,93.0,,61.51
2023-10-01 23:00:00-05:00,0.0,,58.64,66.46,56.55,51.85,63.0,59.09,69.84,62.03,68.39,99.9,93.0,,61.57


## Descriptive Statistics

In [5]:
# Describe Data
df_hum_raw.describe()

Unnamed: 0,TANGARA_2BBA,TANGARA_25CE,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_1EE6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_1712,TANGARA_48C6,TANGARA_532E,TANGARA_F1AE,TANGARA_06BE
count,5960.0,1379.0,5827.0,5781.0,2798.0,5546.0,5534.0,5796.0,5535.0,5802.0,5625.0,5713.0,5911.0,4408.0,5919.0
mean,0.0,0.0,54.198999,59.619564,53.445693,54.08463,61.082293,56.628554,63.964258,53.006915,61.222114,99.099037,90.224125,50.955588,58.957092
std,0.0,0.0,8.037717,9.854417,9.065946,4.741756,7.723507,5.999349,11.37194,11.442778,8.930315,1.652354,8.793044,7.879376,10.918251
min,0.0,0.0,41.16,42.69,38.62,45.64,41.11,44.11,45.98,32.05,44.2,92.0,69.5,40.34,42.32
25%,0.0,0.0,45.97,50.9,45.315,50.2825,55.1825,51.6,52.905,41.295,52.38,99.3,82.0,43.9075,47.925
50%,0.0,0.0,54.17,60.5,52.58,53.83,62.31,57.655,63.37,56.11,63.5,99.9,92.9,48.545,58.05
75%,0.0,0.0,60.48,68.34,62.73,56.99,66.4975,62.31,74.31,62.8075,69.34,99.9,98.0,56.3125,68.15
max,0.0,0.0,69.83,76.61,66.4,61.86,78.27,65.81,82.86,69.03,74.29,99.9,99.9,66.82,79.26


## Missing Data

In [6]:
# Missing Data Threshold
threshold = 90
print(f'Threshold: {threshold}%')

# For each Tangara sensor
for id_tangara_sensor in df_hum_raw.columns:
    df_hum_sensor = df_hum_raw[[id_tangara_sensor]]
    # To be checked
    is_ok, data_percent, missing_data_percent = to_be_checked(df_hum_sensor, threshold)
    if not is_ok:
        print(f"Tangara Sensor: {id_tangara_sensor}, Data: {data_percent}%, Missing: {missing_data_percent}%, To be checked")

Threshold: 90%
Tangara Sensor: TANGARA_2BBA, Data: 41%, Missing: 59%, To be checked
Tangara Sensor: TANGARA_25CE, Data: 10%, Missing: 90%, To be checked
Tangara Sensor: TANGARA_260A, Data: 40%, Missing: 60%, To be checked
Tangara Sensor: TANGARA_4B1A, Data: 40%, Missing: 60%, To be checked
Tangara Sensor: TANGARA_14D6, Data: 19%, Missing: 81%, To be checked
Tangara Sensor: TANGARA_1EE6, Data: 39%, Missing: 61%, To be checked
Tangara Sensor: TANGARA_2B42, Data: 38%, Missing: 62%, To be checked
Tangara Sensor: TANGARA_2E9A, Data: 40%, Missing: 60%, To be checked
Tangara Sensor: TANGARA_2FF6, Data: 38%, Missing: 62%, To be checked
Tangara Sensor: TANGARA_307A, Data: 40%, Missing: 60%, To be checked
Tangara Sensor: TANGARA_1712, Data: 39%, Missing: 61%, To be checked
Tangara Sensor: TANGARA_48C6, Data: 40%, Missing: 60%, To be checked
Tangara Sensor: TANGARA_532E, Data: 41%, Missing: 59%, To be checked
Tangara Sensor: TANGARA_F1AE, Data: 31%, Missing: 69%, To be checked
Tangara Sensor: TAN

## Data Correlation

In [7]:
# Data Correlation Threshold
threshold = 0.9
print(f'Threshold: {threshold}%')

ID_REFE_TANGARA_SENSOR='TANGARA_06BE'
ID_TARG_TANGARA_SENSOR='TANGARA_2FF6'
# Reference Tangara Sensor
df_reference_sensor = df_hum_raw[ID_REFE_TANGARA_SENSOR]
# Target Tangara Sensor
df_target_sensor = df_hum_raw[ID_TARG_TANGARA_SENSOR]

# To be checked
if not math.isnan(df_reference_sensor.std()):
    is_ok, corr = is_corr_ok(df_reference_sensor, df_target_sensor, threshold)
    if not is_ok:
        print(f"Reference Tangara Sensor: {ID_REFE_TANGARA_SENSOR}, Target Tangara Sensor: {ID_TARG_TANGARA_SENSOR}, Correlation: {corr}, To be checked")

# For each Tangara sensor
# for id_tangara_sensor in df_hum_raw.columns:
#     # Target Tangara Sensor
#     df_target_sensor = df_hum_raw[id_tangara_sensor]
#     # To be checked
#     is_ok, corr = is_corr_ok(df_reference_sensor, df_target_sensor, threshold)
#     if not is_ok:
#         print(f"Reference Tangara Sensor: {ID_REFE_TANGARA_SENSOR}, Target Tangara Sensor: {id_tangara_sensor}, Correlation: {corr}, To be checked")


Threshold: 0.9%
Reference Tangara Sensor: TANGARA_06BE, Target Tangara Sensor: TANGARA_2FF6, Correlation: 0.0, To be checked


In [8]:
# Save Humidity Data Frame Sensors into CSV file
df_to_csv(df_hum_raw, "hum_raw.csv")