In [1]:
import os
import math
from dotenv import load_dotenv

# Explicitly providing path to '.env'
from pathlib import Path  # Python 3.6+ only
# Load .env variables
_ = load_dotenv(dotenv_path=f"{Path().resolve().parents[1]}/standalone/.env")

# import import_ipynb
# from utils import to_timestamp, df_data_sensors, to_be_checked, is_corr_ok, df_to_csv, df_from_csv

# with the new api
from importnb import imports
with imports("ipynb"):
    from utils import to_timestamp, df_data_sensors, to_be_checked, is_corr_ok, df_to_csv, df_from_csv

PM2.5: 35.9, AQI: 102
PM2.5: 35.9, Measure Level: MeasureLevels.UNHEALTHY_FOR_SENSITIVE_GROUPS, Range Values: Min: 35.5, Max: 55.4
AQI: 102, Measure Level: MeasureLevels.UNHEALTHY_FOR_SENSITIVE_GROUPS, Range Values: Min: 101, Max: 150


## Temperature Raw Data

In [2]:
# Load Tangaras
df_tangaras = df_from_csv("tangaras.csv", dtindex=False)

print(f"Total Tangara Sensors: {len(df_tangaras)}")

df_tangaras.head()

Total Tangara Sensors: 11


Unnamed: 0,ID,GEOHASH,MAC,GEOLOCATION,LATITUDE,LONGITUDE
0,TANGARA_4B1A,d29esj8,D29ESP32DE94B1A,3.4586334228515625 -76.46415710449219,3.458633,-76.464157
1,TANGARA_14D6,d29eg4k,D29ESP32DED14D6,3.4847259521484375 -76.50260925292969,3.484726,-76.502609
2,TANGARA_2B42,d29e6pg,D29ESP32DED2B42,3.4270477294921875 -76.54792785644531,3.427048,-76.547928
3,TANGARA_2E9A,d29ed62,D29ESP32DED2E9A,3.4407806396484375 -76.54106140136719,3.440781,-76.541061
4,TANGARA_2FF6,d29e66v,D29ESP32DED2FF6,3.3995819091796875 -76.53419494628906,3.399582,-76.534195


In [3]:
# Start Date Time ISO 8601 Format, TZ='America/Bogota' -05:00
START_ISO8601_DATETIME = os.getenv("START_ISO8601_DATETIME", None)
start_timestamp = to_timestamp(START_ISO8601_DATETIME)
# End Date Time ISO 8601 Format, TZ='America/Bogota' -05:00
END_ISO8601_DATETIME = os.getenv("END_ISO8601_DATETIME", None)
end_timestamp = to_timestamp(os.getenv("END_ISO8601_DATETIME", None))

# GROUP BY TIME
GROUP_BY_TIME = os.getenv("GROUP_BY_TIME", None)

print(f'Since: {START_ISO8601_DATETIME} -> {start_timestamp}, Until: {END_ISO8601_DATETIME} -> {end_timestamp}')
print(f"Group by Time: {GROUP_BY_TIME}")

Since: 2023-11-01T23:00:00-05:00 -> 1698897600000, Until: 2023-11-02T00:00:00-05:00 -> 1698901200000
Group by Time: 30s


In [4]:
# Temperature Data Frame Sensors
df_temp_raw = df_data_sensors(df_tangaras, start_timestamp, end_timestamp, 'tmp', GROUP_BY_TIME)
df_temp_raw.head()
#df_temp_raw.shape

Unnamed: 0_level_0,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_1712,TANGARA_48C6,TANGARA_532E,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-11-01 23:00:00-05:00,24.32,,25.19,28.82,24.99,24.82,24.22,24.5,23.4,26.56,24.48
2023-11-01 23:00:30-05:00,24.3,24.55,25.17,28.85,24.95,24.83,24.21,,23.4,26.53,24.48
2023-11-01 23:01:00-05:00,24.32,24.57,25.16,28.85,24.95,24.85,24.22,24.6,23.4,26.53,24.48
2023-11-01 23:01:30-05:00,24.32,24.58,25.16,28.82,24.96,24.83,24.26,24.6,23.3,26.56,24.48
2023-11-01 23:02:00-05:00,24.3,,25.13,28.85,24.93,24.83,24.21,24.5,23.3,26.53,24.51


## Descriptive Statistics

In [5]:
# Describe Data
df_temp_raw.describe()

Unnamed: 0,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_1712,TANGARA_48C6,TANGARA_532E,TANGARA_F1AE,TANGARA_06BE
count,117.0,115.0,117.0,117.0,117.0,116.0,115.0,111.0,117.0,117.0,117.0
mean,24.12906,24.558435,24.987863,28.906581,24.909744,24.839052,23.992522,24.718018,23.425641,26.403248,24.481624
std,0.113768,0.09869,0.078112,0.061366,0.067049,0.070809,0.235222,0.09165,0.07329,0.093346,0.045731
min,23.95,24.41,24.79,28.79,24.79,24.72,23.53,24.5,23.2,26.28,24.39
25%,24.01,24.51,24.94,28.85,24.85,24.8,23.835,24.65,23.4,26.32,24.44
50%,24.13,24.54,24.96,28.92,24.93,24.83,24.0,24.7,23.4,26.36,24.48
75%,24.23,24.58,25.04,28.95,24.96,24.9,24.22,24.8,23.5,26.52,24.53
max,24.32,24.83,25.19,29.02,25.02,24.97,24.26,24.8,23.6,26.56,24.58


## Missing Data

In [6]:
# Missing Data Threshold
threshold = 90
print(f'Threshold: {threshold}%')

# For each Tangara sensor
for id_tangara_sensor in df_temp_raw.columns:
    df_temp_sensor = df_temp_raw[[id_tangara_sensor]]
    # To be checked
    is_ok, data_percent, missing_data_percent = to_be_checked(df_temp_sensor, threshold)
    if not is_ok:
        print(f"Tangara Sensor: {id_tangara_sensor}, Data: {data_percent}%, Missing: {missing_data_percent}%, To be checked")

Threshold: 90%


  missing_data_percent = round(df_sensor.isna().sum()[0] * 100 / total)
  data_percent = round(df_sensor.count()[0] * 100 / total)
  missing_data_percent = round(df_sensor.isna().sum()[0] * 100 / total)
  data_percent = round(df_sensor.count()[0] * 100 / total)
  missing_data_percent = round(df_sensor.isna().sum()[0] * 100 / total)
  data_percent = round(df_sensor.count()[0] * 100 / total)
  missing_data_percent = round(df_sensor.isna().sum()[0] * 100 / total)
  data_percent = round(df_sensor.count()[0] * 100 / total)
  missing_data_percent = round(df_sensor.isna().sum()[0] * 100 / total)
  data_percent = round(df_sensor.count()[0] * 100 / total)
  missing_data_percent = round(df_sensor.isna().sum()[0] * 100 / total)
  data_percent = round(df_sensor.count()[0] * 100 / total)
  missing_data_percent = round(df_sensor.isna().sum()[0] * 100 / total)
  data_percent = round(df_sensor.count()[0] * 100 / total)
  missing_data_percent = round(df_sensor.isna().sum()[0] * 100 / total)
  data_perc

## Data Correlation

In [7]:
# Data Correlation Threshold
threshold = 0.9
print(f'Threshold: {threshold}%')

ID_REFE_TANGARA_SENSOR='TANGARA_06BE'
ID_TARG_TANGARA_SENSOR='TANGARA_2FF6'
# Reference Tangara Sensor
df_reference_sensor = df_temp_raw[ID_REFE_TANGARA_SENSOR]
# Target Tangara Sensor
df_target_sensor = df_temp_raw[ID_TARG_TANGARA_SENSOR]

# To be checked
if not math.isnan(df_reference_sensor.std()):
    is_ok, corr = is_corr_ok(df_reference_sensor, df_target_sensor, threshold)
    if not is_ok:
        print(f"Reference Tangara Sensor: {ID_REFE_TANGARA_SENSOR}, Target Tangara Sensor: {ID_TARG_TANGARA_SENSOR}, Correlation: {corr}, To be checked")

# For each Tangara sensor
# for id_tangara_sensor in df_temp_raw.columns:
#     # Target Tangara Sensor
#     df_target_sensor = df_temp_raw[id_tangara_sensor]
#     # To be checked
#     is_ok, corr = is_corr_ok(df_reference_sensor, df_target_sensor, threshold)
#     if not is_ok:
#         print(f"Reference Tangara Sensor: {ID_REFE_TANGARA_SENSOR}, Target Tangara Sensor: {id_tangara_sensor}, Correlation: {corr}, To be checked")


Threshold: 0.9%
Reference Tangara Sensor: TANGARA_06BE, Target Tangara Sensor: TANGARA_2FF6, Correlation: 0.0, To be checked


In [8]:
# Save Temperature Data Frame Sensors into CSV file
df_to_csv(df_temp_raw, "temp_raw.csv")