Data source: [Open-Meteo](https://open-meteo.com/) \
Location: `Ho Chi Minh City` (lat: 10.823099 - long: 106.629664) \
Date: `01/10/2022` to `30/09/2024`

# 1. Data Collection
## Install and import libraries

In [1]:
!pip install openmeteo-requests
!pip install requests-cache retry-requests

Collecting openmeteo-requests
  Downloading openmeteo_requests-1.3.0-py3-none-any.whl.metadata (9.7 kB)
Collecting openmeteo-sdk>=1.4.0 (from openmeteo-requests)
  Downloading openmeteo_sdk-1.17.0-py3-none-any.whl.metadata (934 bytes)
Collecting flatbuffers>=24.0.0 (from openmeteo-sdk>=1.4.0->openmeteo-requests)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Downloading openmeteo_requests-1.3.0-py3-none-any.whl (6.0 kB)
Downloading openmeteo_sdk-1.17.0-py3-none-any.whl (7.6 kB)
Downloading flatbuffers-24.3.25-py2.py3-none-any.whl (26 kB)
Installing collected packages: flatbuffers, openmeteo-sdk, openmeteo-requests
Successfully installed flatbuffers-24.3.25 openmeteo-requests-1.3.0 openmeteo-sdk-1.17.0
Collecting retry-requests
  Downloading retry_requests-2.0.0-py3-none-any.whl.metadata (2.6 kB)
Collecting attrs<22.0,>=21.2 (from requests-cache)
  Downloading attrs-21.4.0-py2.py3-none-any.whl.metadata (9.8 kB)
Collecting cattrs<2.0,>=1.8 (from requests-cach

In [2]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

## Usage

### Weather

In [10]:
# Setup the Open-Meteo API for weather data
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [11]:
# Get the weather data
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 10.823099,
	"longitude": 106.629664,
	"start_date": "2022-10-01",
	"end_date": "2024-09-30",
	"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", 
            "apparent_temperature", "precipitation", "cloud_cover", 
            "vapour_pressure_deficit", "wind_speed_10m", "wind_direction_10m", "weather_code"]
}
responses = openmeteo.weather_api(url, params=params)

In [12]:
# Process hourly data
response = responses[0]
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(6).ValuesAsNumpy()
hourly_vapour_pressure_deficit = hourly.Variables(7).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(8).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(9).ValuesAsNumpy()
hourly_weather_code = hourly.Variables(5).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["precipitation"] = hourly_precipitation
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["weather_code"] = hourly_weather_code

hourly_dataframe = pd.DataFrame(data = hourly_data)

In [13]:
# Save data to a CSV file
hourly_dataframe.to_csv('data/hcmc_weather_data.csv', index = False)

### Air Quality

In [14]:
# Setup the Open-Meteo API for AQ data
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [15]:
# Get the AQ data
url = "https://air-quality-api.open-meteo.com/v1/air-quality"
params = {
	"latitude": 10.823099,
	"longitude": 106.629664,
	"hourly": ["pm10", "pm2_5", "carbon_monoxide", "nitrogen_dioxide", "sulphur_dioxide", "ozone", "us_aqi"],
	"start_date": "2022-10-01",
	"end_date": "2024-09-30",
}
responses = openmeteo.weather_api(url, params=params)

In [16]:
# Process hourly data
response = responses[0]
hourly = response.Hourly()
hourly_pm10 = hourly.Variables(0).ValuesAsNumpy()
hourly_pm2_5 = hourly.Variables(1).ValuesAsNumpy()
hourly_carbon_monoxide = hourly.Variables(2).ValuesAsNumpy()
hourly_nitrogen_dioxide = hourly.Variables(3).ValuesAsNumpy()
hourly_sulphur_dioxide = hourly.Variables(4).ValuesAsNumpy()
hourly_ozone = hourly.Variables(5).ValuesAsNumpy()
hourly_us_aqi = hourly.Variables(6).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["pm10"] = hourly_pm10
hourly_data["pm2_5"] = hourly_pm2_5
hourly_data["carbon_monoxide"] = hourly_carbon_monoxide
hourly_data["nitrogen_dioxide"] = hourly_nitrogen_dioxide
hourly_data["sulphur_dioxide"] = hourly_sulphur_dioxide
hourly_data["ozone"] = hourly_ozone
hourly_data["us_aqi"] = hourly_us_aqi

hourly_dataframe = pd.DataFrame(data = hourly_data)

In [19]:
# Save data to csv filel
hourly_dataframe.to_csv('data/hcmc_air_quality_data.csv', index = False)