# Extract AOD for each OpenAQ measurement

## Import library

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
import requests
from pprint import pp
from tqdm.notebook import tqdm
import requests
import time
from zkyhaxpy import io_tools, pd_tools, gis_tools
from datetime import datetime, timedelta
import os
import json
import ast

## Prepare GeoDataframe of OpenAQ data

In [None]:
if not os.path.exists('../data/gdf_openaq_chiangmai_by_location.gpkg'):
    dir_data = r'../data'
    dir_openaq_chiangmai = os.path.join(dir_data, 'openaq', 'chiangmai_by_location')
    list_files = io_tools.get_list_files_re(dir_openaq_chiangmai)
    df_openaq = pd.concat([pd.read_csv(path_file) for path_file in list_files])
    
    
    df_openaq['datetime'] = df_openaq['date'].str.slice(9, 25)
    df_openaq['date'] = df_openaq['datetime'].str.slice(0, 10)
    df_openaq['year'] = df_openaq['date'].str.slice(0, 4)
    df_openaq['month'] = df_openaq['date'].str.slice(6, 8)
    df_openaq['year_month'] = df_openaq['date'].str.slice(0, 8)
    df_openaq['time'] = df_openaq['datetime'].str.slice(11, 17)
    df_openaq['lat'] = df_openaq['coordinates'].apply(lambda coord: ast.literal_eval(coord)['latitude'])
    df_openaq['lon'] = df_openaq['coordinates'].apply(lambda coord: ast.literal_eval(coord)['longitude'])
    df_openaq = df_openaq.reset_index()
    del(df_openaq['index'])
    del(df_openaq['coordinates'])

    df_openaq['measurement_id'] = df_openaq.index
    df_openaq.index.name = 'measurement_id'

    df_openaq['geometry'] = 'POINT (' + df_openaq['lon'].astype(str) + ' ' + df_openaq['lat'].astype(str) + ')'
    
    gdf_openaq = gis_tools.df_to_gdf(df_openaq, 'geometry')

    gdf_openaq = gdf_openaq.set_index('measurement_id')
    gdf_openaq.to_file('../data/gdf_openaq_chiangmai_by_location.gpkg')
    print('gdf_openaq has been saved.')
else:
    gdf_openaq = gpd.read_file('../data/gdf_openaq_chiangmai_by_location.gpkg')
    print('gdf_openaq has been loaded.')
if gdf_openaq.index.name is None:
    gdf_openaq = gdf_openaq.set_index('measurement_id')

In [None]:
gdf_openaq.datetime = gdf_openaq.datetime.astype('datetime64[ns]')

In [None]:
dir_extracted_aod_root = r'../../../data/aod/translated/aod_055'
df_files_aod = io_tools.get_list_files(dir_extracted_aod_root, '.tif$', return_df=True)
df_tmp = df_files_aod['file_path'].str.split('\\', expand=True)
df_files_aod['tile_id'] = df_tmp[df_tmp.columns[-3]]
df_files_aod['date'] = df_files_aod['file_nm'].str.slice(8, 18)
df_files_aod['time'] = df_files_aod['file_nm'].str.slice(19, 21) + ':' + df_files_aod['file_nm'].str.slice(21, 23)
df_files_aod['datetime'] = df_files_aod['date'] + 'T' + df_files_aod['time'] + ':00+00:00'
df_files_aod['datetime'] = df_files_aod['datetime'].apply(lambda datetime: pd.Timestamp(datetime).tz_convert("UTC"))
df_files_aod['year'] = df_files_aod['datetime'].dt.year
df_files_aod

In [None]:
#Filter only image covering Thailand and time matched with OpenAQ data
list_tile_id = ['h27v07', 'h27v08', 'h28v07', 'h28v08']
df_files_aod = df_files_aod[df_files_aod['tile_id'].isin(list_tile_id)]
df_files_aod = df_files_aod[df_files_aod['year'] >= 2021]
df_files_aod

In [None]:
s_row.datetime - timedelta(hours=MAX_DIFF_HOURS)

In [None]:
min_datetime = np.datetime64(s_row.datetime - timedelta(hours=MAX_DIFF_HOURS)a)
min_datetime

In [None]:
MAX_DIFF_HOURS = 1
list_df_extracted_aod055 = []
list_error = []
df_files_aod = df_files_aod.sample(frac=1.0)
pbar = tqdm(df_files_aod.iterrows(), total=len(df_files_aod))
for s_idx, s_row in pbar:
    
    min_datetime = np.datetime64(s_row.datetime - timedelta(hours=MAX_DIFF_HOURS))
    max_datetime = np.datetime64(s_row.datetime + timedelta(hours=MAX_DIFF_HOURS))
    gdf_openaq_tmp = gdf_openaq.copy()
    gdf_openaq_tmp = gdf_openaq_tmp[gdf_openaq_tmp.datetime.between(min_datetime, max_datetime)]
    gdf_openaq_tmp = gdf_openaq_tmp[gdf_openaq_tmp.datetime.between(min_datetime, max_datetime)]
    
    with rasterio.open(s_row.file_path) as ds:
        crs_aod = ds.crs
        
    if len(gdf_openaq_tmp) > 0:
        try:
            df_extracted_aod055_curr = gis_tools.extract_pixval_single_file(
                in_s_polygon=gdf_openaq_tmp['geometry'].to_crs(crs_aod),
                in_raster_path=s_row.file_path,
                in_list_out_col_nm=['aod_055'],
                in_list_target_raster_band_id=[1],
                nodata_val=-28672
                )
            df_extracted_aod055_curr['tile_id'] = s_row.tile_id
            df_extracted_aod055_curr['aod_datetime'] = s_row.datetime
            list_df_extracted_aod055.append(df_extracted_aod055_curr)            
        except ValueError:
            list_error.append({
                'gdf_openaq_tmp':gdf_openaq_tmp,
                'raster_path':s_row.file_path
            })

    pbar.set_description(f'success={len(list_df_extracted_aod055)} / error={len(list_error)} ')
    

In [None]:
df_extracted_aod055 = pd.concat(list_df_extracted_aod055)
df_extracted_aod055.to_csv(r'../data/df_extracted_openaq_aod055_v2.csv', index=False)

In [None]:
(df_extracted_aod055['aod_055']==-28672).mean()