In [1]:
import os

from dotenv import load_dotenv

# Explicitly providing path to '.env'
from pathlib import Path  # Python 3.6+ only
# Load .env variables
_ = load_dotenv(dotenv_path=f"{Path().resolve().parents[1]}/standalone/.env")

import import_ipynb
from utils import to_timestamp, df_data_sensors, to_be_checked, is_corr_ok, df_to_csv, df_from_csv

importing Jupyter notebook from utils.ipynb
importing Jupyter notebook from aqi_epa_pm25.ipynb
PM2.5: 35.9, AQI: 102
PM2.5: 35.9, Measure Level: MeasureLevels.UNHEALTHY_FOR_SENSITIVE_GROUPS, Range Values: Min: 35.5, Max: 55.4
AQI: 102, Measure Level: MeasureLevels.UNHEALTHY_FOR_SENSITIVE_GROUPS, Range Values: Min: 101, Max: 150


## Temperature Raw Data

In [2]:
# Load Tangaras
df_tangaras = df_from_csv("tangaras.csv", dtindex=False)

print(f"Total Tangara Sensors: {len(df_tangaras)}")

df_tangaras.head()

Total Tangara Sensors: 13


Unnamed: 0,ID,GEOHASH,MAC,GEOLOCATION,LATITUDE,LONGITUDE
0,TANGARA_2BBA,d29e6b4,D29ESP32DE02BBA,3.3844757080078125 -76.51634216308594,3.384476,-76.516342
1,TANGARA_260A,d29edyj,D29ESP32DE1260A,3.4613800048828125 -76.51222229003906,3.46138,-76.512222
2,TANGARA_4B1A,d29esj8,D29ESP32DE94B1A,3.4586334228515625 -76.46415710449219,3.458633,-76.464157
3,TANGARA_1EE6,d29edh3,D29ESP32DED1EE6,3.4517669677734375 -76.55067443847656,3.451767,-76.550674
4,TANGARA_2B42,d29e6pg,D29ESP32DED2B42,3.4270477294921875 -76.54792785644531,3.427048,-76.547928


In [3]:
# Start Date Time ISO 8601 Format, TZ='America/Bogota' -05:00
START_ISO8601_DATETIME=os.getenv("START_ISO8601_DATETIME", None)
start_timestamp = to_timestamp(START_ISO8601_DATETIME)
# End Date Time ISO 8601 Format, TZ='America/Bogota' -05:00
END_ISO8601_DATETIME=os.getenv("END_ISO8601_DATETIME", None)
end_timestamp = to_timestamp(os.getenv("END_ISO8601_DATETIME", None))

print(f'Since: {START_ISO8601_DATETIME} -> {start_timestamp}, Until: {END_ISO8601_DATETIME} -> {end_timestamp}')

Since: 2023-10-05T19:17:08-05:00 -> 1696551428000, Until: 2023-10-05T20:17:08-05:00 -> 1696555028000


In [4]:
# Temperature Data Frame Sensors
df_temp_raw = df_data_sensors(df_tangaras, start_timestamp, end_timestamp, 'tmp')
df_temp_raw.head()
#df_temp_raw.shape

Unnamed: 0_level_0,TANGARA_2BBA,TANGARA_260A,TANGARA_4B1A,TANGARA_1EE6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_1712,TANGARA_48C6,TANGARA_532E,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-10-05 19:17:00-05:00,-147.2,30.81,30.23,,,30.84,31.43,31.19,29.01,,29.2,,
2023-10-05 19:17:30-05:00,-147.2,30.82,30.2,30.45,28.6,30.84,31.43,31.19,29.0,30.6,,31.93,31.39
2023-10-05 19:18:00-05:00,-147.2,30.81,30.2,30.45,28.64,30.82,31.45,31.22,29.0,30.6,29.1,31.94,31.4
2023-10-05 19:18:30-05:00,-147.2,30.81,30.21,30.45,28.64,30.82,31.45,31.25,28.98,30.6,29.1,31.91,31.4
2023-10-05 19:19:00-05:00,-147.2,30.81,30.23,30.45,28.61,30.82,31.45,31.27,28.97,30.6,29.1,31.91,31.42


## Descriptive Statistics

In [5]:
# Describe Data
df_temp_raw.describe()

Unnamed: 0,TANGARA_2BBA,TANGARA_260A,TANGARA_4B1A,TANGARA_1EE6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_1712,TANGARA_48C6,TANGARA_532E,TANGARA_F1AE,TANGARA_06BE
count,111.0,108.0,107.0,107.0,109.0,107.0,108.0,106.0,108.0,103.0,107.0,110.0,107.0
mean,-147.2,30.548981,29.840187,30.413738,28.662018,30.738505,31.101852,31.021415,28.391852,30.656311,29.085047,31.838727,31.275701
std,2.855061e-14,0.300451,0.312561,0.094615,0.111975,0.09316,0.238837,0.337993,0.359122,0.1506,0.06414,0.077686,0.158971
min,-147.2,29.67,29.25,30.08,28.49,30.54,30.66,30.45,27.92,30.1,29.0,31.7,30.92
25%,-147.2,30.505,29.61,30.39,28.6,30.685,30.92,30.605,28.12,30.5,29.0,31.77,31.1
50%,-147.2,30.6,29.83,30.44,28.64,30.74,31.11,31.045,28.235,30.7,29.1,31.84,31.35
75%,-147.2,30.81,30.17,30.475,28.68,30.81,31.29,31.33,28.75,30.8,29.1,31.9,31.4
max,-147.2,30.84,30.27,30.57,29.03,30.91,31.49,31.46,29.01,30.9,29.2,32.0,31.45


## Missing Data

In [6]:
# Missing Data Threshold
threshold = 90
print(f'Threshold: {threshold}%')

# For each Tangara sensor
for id_tangara_sensor in df_temp_raw.columns:
    df_temp_sensor = df_temp_raw[[id_tangara_sensor]]
    # To be checked
    is_ok, data_percent, missing_data_percent = to_be_checked(df_temp_sensor, threshold)
    if not is_ok:
        print(f"Tangara Sensor: {id_tangara_sensor}, Data: {data_percent}%, Missing: {missing_data_percent}%, To be checked")

Threshold: 90%
Tangara Sensor: TANGARA_260A, Data: 89%, Missing: 11%, To be checked
Tangara Sensor: TANGARA_4B1A, Data: 88%, Missing: 12%, To be checked
Tangara Sensor: TANGARA_1EE6, Data: 88%, Missing: 12%, To be checked
Tangara Sensor: TANGARA_2E9A, Data: 88%, Missing: 12%, To be checked
Tangara Sensor: TANGARA_2FF6, Data: 89%, Missing: 11%, To be checked
Tangara Sensor: TANGARA_307A, Data: 88%, Missing: 12%, To be checked
Tangara Sensor: TANGARA_1712, Data: 89%, Missing: 11%, To be checked
Tangara Sensor: TANGARA_48C6, Data: 85%, Missing: 15%, To be checked
Tangara Sensor: TANGARA_532E, Data: 88%, Missing: 12%, To be checked
Tangara Sensor: TANGARA_06BE, Data: 88%, Missing: 12%, To be checked


## Data Correlation

In [7]:
# Data Correlation Threshold
threshold = 0.9
print(f'Threshold: {threshold}%')

ID_REFE_TANGARA_SENSOR='TANGARA_06BE'
ID_TARG_TANGARA_SENSOR='TANGARA_2FF6'
# Reference Tangara Sensor
df_reference_sensor = df_temp_raw[ID_REFE_TANGARA_SENSOR]
# Target Tangara Sensor
df_target_sensor = df_temp_raw[ID_TARG_TANGARA_SENSOR]

# To be checked
is_ok, corr = is_corr_ok(df_reference_sensor, df_target_sensor, threshold)
if not is_ok:
    print(f"Reference Tangara Sensor: {ID_REFE_TANGARA_SENSOR}, Target Tangara Sensor: {ID_TARG_TANGARA_SENSOR}, Correlation: {corr}, To be checked")

# For each Tangara sensor
# for id_tangara_sensor in df_temp_raw.columns:
#     # Target Tangara Sensor
#     df_target_sensor = df_temp_raw[id_tangara_sensor]
#     # To be checked
#     is_ok, corr = is_corr_ok(df_reference_sensor, df_target_sensor, threshold)
#     if not is_ok:
#         print(f"Reference Tangara Sensor: {ID_REFE_TANGARA_SENSOR}, Target Tangara Sensor: {id_tangara_sensor}, Correlation: {corr}, To be checked")


Threshold: 0.9%
Reference Tangara Sensor: TANGARA_06BE, Target Tangara Sensor: TANGARA_2FF6, Correlation: 0.0, To be checked


In [8]:
# Save Temperature Data Frame Sensors into CSV file
df_to_csv(df_temp_raw, "temp_raw.csv")