In [1]:
import os
import pandas as pd
from dotenv import load_dotenv

# Explicitly providing path to '.env'
from pathlib import Path  # Python 3.6+ only
# Load .env variables
_ = load_dotenv(dotenv_path=f"{Path().resolve().parents[1]}/standalone/.env")

# with the new api
from importnb import imports
with imports("ipynb"):
    from utils import df_from_csv, df_pm25_to_df_aqi, plot_hist, plot_boxes, plot_lines, df_to_csv

PM2.5: 35.9, AQI: 102
PM2.5: 35.9, Measure Level: MeasureLevels.UNHEALTHY_FOR_SENSITIVE_GROUPS, Range Values: Min: 35.5, Max: 55.4
AQI: 102, Measure Level: MeasureLevels.UNHEALTHY_FOR_SENSITIVE_GROUPS, Range Values: Min: 101, Max: 150


## PM2.5 to AQI

In [2]:
# Load Raw Data
df_pm25_clean = df_from_csv("pm25_clean.csv", datafolder="1_clean")

# Check Data Types
print(df_pm25_clean.dtypes)

# Sorting by Descending
df_pm25_clean = df_pm25_clean.sort_values(by='DATETIME', ascending=False)

df_pm25_clean.head()

TANGARA_260A    float64
TANGARA_4B1A    float64
TANGARA_14D6    float64
TANGARA_2B42    float64
TANGARA_2E9A    float64
TANGARA_2FF6    float64
TANGARA_307A    float64
TANGARA_48C6    float64
TANGARA_F1AE    float64
TANGARA_06BE    float64
dtype: object


Unnamed: 0_level_0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-11-11 23:59:30-05:00,,,,10.0,,,,,8.0,
2023-11-11 23:59:00-05:00,,,,9.0,,,,,7.0,
2023-11-11 23:58:30-05:00,8.0,15.0,,9.0,7.0,7.0,8.0,6.0,8.0,7.0
2023-11-11 23:58:00-05:00,8.0,17.0,,9.0,7.0,6.0,7.0,8.0,8.0,7.0
2023-11-11 23:57:30-05:00,8.0,,,8.0,8.0,6.0,7.0,6.0,8.0,8.0


## Descriptive Statistics

In [3]:
# Describe Data
df_pm25_clean.describe()

Unnamed: 0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
count,5541.0,5427.0,1038.0,4079.0,5467.0,5340.0,5211.0,5323.0,5355.0,5127.0
mean,7.969681,8.674774,9.102119,6.257906,5.95043,6.739326,7.794089,5.600413,7.01718,8.779598
std,4.182716,2.988458,3.405874,4.583022,4.692591,3.42848,2.66822,3.309623,2.961981,4.429641
min,1.0,1.0,3.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
25%,4.0,7.0,6.0,1.0,2.0,5.0,6.0,3.0,5.0,6.0
50%,8.0,9.0,9.0,7.0,5.0,7.0,8.0,5.0,7.0,9.0
75%,10.0,10.0,12.0,9.0,9.0,9.0,9.0,8.0,9.0,11.0
max,21.0,17.0,20.0,22.0,22.0,15.0,14.0,15.0,15.0,21.0


## Group by hour on DATETIME and resample by mean

In [4]:
# Resample mean grouping by hour on DATETIME
df_pm25_clean = df_pm25_clean.resample('H').mean()

# Sorting by Descending
df_pm25_clean = df_pm25_clean.sort_values(by='DATETIME', ascending=False)

df_pm25_clean.head()

Unnamed: 0_level_0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-11-11 23:00:00-05:00,8.677966,9.72973,,7.739496,7.273504,6.512821,8.376068,7.201754,7.930435,8.0
2023-11-11 22:00:00-05:00,9.6,10.859649,,7.741667,5.558333,8.825,8.4,6.247788,8.283333,10.672727
2023-11-11 21:00:00-05:00,6.764706,10.858696,,10.932773,1.775,11.9,7.125,6.843478,7.033333,13.247706
2023-11-11 20:00:00-05:00,3.445378,8.21978,,4.341667,1.941176,8.614035,8.788136,2.191304,3.825,15.833333
2023-11-11 19:00:00-05:00,2.641026,4.956522,,0.890756,1.66087,2.322034,9.327586,2.289474,4.075,3.09009


## Descriptive Statistics

In [5]:
# Describe Data
df_pm25_clean.describe()

Unnamed: 0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
count,48.0,48.0,9.0,36.0,48.0,47.0,48.0,48.0,48.0,48.0
mean,8.081627,8.749661,9.140645,6.833339,6.441095,7.036252,8.091946,5.840353,7.206209,9.363282
std,3.945866,2.183096,3.013046,5.076072,5.095937,3.446962,2.529335,3.093497,2.541497,4.661229
min,2.283333,4.285714,4.598291,0.890756,1.254237,1.233333,2.221239,1.155172,3.05,2.025
25%,4.477083,7.228045,6.82906,1.970168,1.972363,5.548853,6.603279,3.849981,4.874737,6.480276
50%,7.977981,8.769602,8.321739,6.930423,5.185764,6.760684,8.381017,5.536766,7.037853,9.861345
75%,10.471959,10.040701,12.230769,9.55625,9.186203,9.089655,9.460644,7.288325,9.033333,12.170328
max,15.686441,13.304348,12.854701,21.8,21.666667,14.571429,13.666667,14.48,13.356436,21.0


## Movil 24h, Rolling Function in Pandas

In [6]:
# Movil 24h, Rolling Function in Pandas
df_pm25_clean = df_pm25_clean.rolling(window=24, min_periods=1).mean()

df_pm25_clean.head()

Unnamed: 0_level_0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-11-11 23:00:00-05:00,8.677966,9.72973,,7.739496,7.273504,6.512821,8.376068,7.201754,7.930435,8.0
2023-11-11 22:00:00-05:00,9.138983,10.294689,,7.740581,6.415919,7.66891,8.388034,6.724771,8.106884,9.336364
2023-11-11 21:00:00-05:00,8.347557,10.482692,,8.804645,4.868946,9.079274,7.967023,6.76434,7.749034,10.640145
2023-11-11 20:00:00-05:00,7.122013,9.916964,,7.688901,4.137004,8.962964,8.172301,5.621081,6.768025,11.938442
2023-11-11 19:00:00-05:00,6.225815,8.924875,,6.329272,3.641777,7.634778,8.403358,4.95476,6.22942,10.168771


## Descriptive Statistics

In [7]:
# Describe Data
df_pm25_clean.describe()

Unnamed: 0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
count,48.0,48.0,9.0,48.0,48.0,48.0,48.0,48.0,48.0,48.0
mean,7.701292,8.70452,11.055809,6.691385,5.978045,7.099537,8.187322,5.767664,6.840544,9.56726
std,1.168944,0.720353,1.354225,1.119133,1.431495,0.760919,0.522651,0.963816,0.906886,1.150167
min,4.779872,7.354717,9.140645,3.899618,2.711877,5.327207,7.014227,3.479822,4.93033,7.291854
25%,7.565976,8.378434,9.968512,6.042454,5.782836,6.616887,7.745686,5.275655,6.297942,8.623092
50%,8.236856,8.666111,11.303664,7.201408,6.579377,7.081811,8.20399,6.236859,7.036279,9.7105
75%,8.395682,9.194076,12.230769,7.474071,6.901054,7.643311,8.654128,6.465368,7.559326,10.523408
max,9.138983,10.482692,12.552707,8.804645,7.273504,9.079274,8.884253,7.201754,8.106884,11.938442


## Filter just the last 24 hours

In [8]:
# Sorting by Ascending
df_pm25_clean = df_pm25_clean.sort_values(by='DATETIME', ascending=True)

# Filter just the las 24 hours
df_pm25_clean = df_pm25_clean.loc[df_pm25_clean.index >= df_pm25_clean.index[0] + pd.Timedelta(hours=24)]

df_pm25_clean.head()

Unnamed: 0_level_0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-11-11 00:00:00-05:00,8.505181,8.634837,,7.477313,7.084946,7.716681,8.748794,6.482162,7.003648,10.566137
2023-11-11 01:00:00-05:00,8.632525,8.628034,,7.47991,7.05828,7.805789,8.802523,6.573301,7.096198,10.738267
2023-11-11 02:00:00-05:00,8.705585,8.669186,,7.577993,7.034214,7.864001,8.83234,6.650849,7.076095,10.854361
2023-11-11 03:00:00-05:00,8.404236,8.663037,,7.634859,6.85522,7.958451,8.862533,6.603312,6.898887,11.045442
2023-11-11 04:00:00-05:00,8.291821,8.580222,,7.534062,6.869167,7.954938,8.876529,6.543477,6.759798,11.09563


In [9]:
df_pm25_clean.tail()

Unnamed: 0_level_0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-11-11 19:00:00-05:00,6.225815,8.924875,,6.329272,3.641777,7.634778,8.403358,4.95476,6.22942,10.168771
2023-11-11 20:00:00-05:00,7.122013,9.916964,,7.688901,4.137004,8.962964,8.172301,5.621081,6.768025,11.938442
2023-11-11 21:00:00-05:00,8.347557,10.482692,,8.804645,4.868946,9.079274,7.967023,6.76434,7.749034,10.640145
2023-11-11 22:00:00-05:00,9.138983,10.294689,,7.740581,6.415919,7.66891,8.388034,6.724771,8.106884,9.336364
2023-11-11 23:00:00-05:00,8.677966,9.72973,,7.739496,7.273504,6.512821,8.376068,7.201754,7.930435,8.0


## Descriptive Statistics

In [10]:
# Describe Data
df_pm25_clean.describe()

Unnamed: 0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
count,24.0,24.0,0.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,7.159645,8.457819,,6.496631,5.32503,7.234147,8.396565,5.555103,6.270899,9.919835
std,1.449117,0.903951,,1.419047,1.782891,0.997473,0.558938,1.243275,0.949912,1.351983
min,4.779872,7.354717,,3.899618,2.711877,5.327207,7.014227,3.479822,4.93033,7.291854
25%,5.735451,7.72972,,5.176282,3.538065,6.449678,8.159819,4.434336,5.504578,8.963858
50%,7.513038,8.529407,,7.175072,6.077767,7.632901,8.612468,6.020122,6.275102,10.518854
75%,8.361727,8.664574,,7.545045,7.0157,7.818302,8.809977,6.583766,6.925077,10.894485
max,9.138983,10.482692,,8.804645,7.273504,9.079274,8.884253,7.201754,8.106884,11.938442


## PM2.5 to AQI

In [11]:
# AQI DataFrame Sensors
df_aqi = df_pm25_to_df_aqi(df_pm25_clean)
df_aqi = df_aqi.astype('float')
df_aqi.head()

Unnamed: 0_level_0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-11-11 00:00:00-05:00,36.0,36.0,,32.0,30.0,33.0,37.0,28.0,30.0,45.0
2023-11-11 01:00:00-05:00,36.0,36.0,,32.0,30.0,33.0,37.0,28.0,30.0,45.0
2023-11-11 02:00:00-05:00,37.0,37.0,,32.0,30.0,33.0,37.0,28.0,30.0,46.0
2023-11-11 03:00:00-05:00,36.0,37.0,,32.0,29.0,34.0,38.0,28.0,29.0,46.0
2023-11-11 04:00:00-05:00,35.0,36.0,,32.0,29.0,34.0,38.0,28.0,29.0,47.0


## Describe Statistics

In [12]:
# Describe Data
df_aqi.describe()

Unnamed: 0,TANGARA_260A,TANGARA_4B1A,TANGARA_14D6,TANGARA_2B42,TANGARA_2E9A,TANGARA_2FF6,TANGARA_307A,TANGARA_48C6,TANGARA_F1AE,TANGARA_06BE
count,24.0,24.0,0.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,30.291667,35.708333,,27.625,22.708333,30.75,35.666667,23.666667,26.625,41.875
std,6.053954,3.723943,,5.93305,7.392588,4.067367,2.371326,5.2309,3.932169,5.558484
min,20.0,31.0,,17.0,12.0,23.0,30.0,15.0,21.0,31.0
25%,24.75,32.75,,22.25,15.75,27.75,34.75,18.75,23.0,37.75
50%,32.0,36.0,,30.5,25.5,32.0,36.0,25.5,26.5,44.5
75%,35.25,37.0,,32.0,30.0,33.0,37.0,28.0,29.25,46.0
max,38.0,44.0,,37.0,31.0,38.0,38.0,31.0,34.0,50.0


## Histograms

In [13]:
# Plot Histograms
plot_hist(df_aqi) if bool(os.getenv("PLOT_CHARTS", None)) else print("PLOT_CHARTS:", False, "Plot charts were ignored")

PLOT_CHARTS: False Plot charts were ignored


## Boxplots

In [14]:
# Plot Boxplots
plot_boxes(df_aqi) if bool(os.getenv("PLOT_CHARTS", None)) else print("PLOT_CHARTS:", False, "Plot charts were ignored")

PLOT_CHARTS: False Plot charts were ignored


## Lineplots

In [15]:
# Plot Lineplots
plot_lines(df_aqi) if bool(os.getenv("PLOT_CHARTS", None)) else print("PLOT_CHARTS:", False, "Plot charts were ignored")

PLOT_CHARTS: False Plot charts were ignored


In [16]:
# Save AQI Data Frame Sensors into CSV file
df_to_csv(df_aqi, "aqi.csv", datafolder='2_features')