# Extract dem for each OpenAQ measurement

## Import library

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
import requests
from pprint import pp
from tqdm.notebook import tqdm
import requests
import time
from zkyhaxpy import io_tools, pd_tools, gis_tools
from datetime import datetime, timedelta
import os

## Prepare GeoDataframe of OpenAQ data

In [None]:
if not os.path.exists('../data/gdf_openaq_chiangmai_by_location.gpkg'):
    dir_data = r'../data'
    dir_openaq_chiangmai = os.path.join(dir_data, 'openaq', 'chiangmai_by_location')
    list_files = io_tools.get_list_files_re(dir_openaq_chiangmai)
    df_openaq = pd.concat([pd.read_csv(path_file) for path_file in list_files])
    
    
    df_openaq['datetime'] = df_openaq['date'].str.slice(9, 25)
    df_openaq['date'] = df_openaq['datetime'].str.slice(0, 10)
    df_openaq['year'] = df_openaq['date'].str.slice(0, 4)
    df_openaq['month'] = df_openaq['date'].str.slice(6, 8)
    df_openaq['year_month'] = df_openaq['date'].str.slice(0, 8)
    df_openaq['time'] = df_openaq['datetime'].str.slice(11, 17)
    df_openaq['lat'] = df_openaq['coordinates'].apply(lambda coord: ast.literal_eval(coord)['latitude'])
    df_openaq['lon'] = df_openaq['coordinates'].apply(lambda coord: ast.literal_eval(coord)['longitude'])
    df_openaq = df_openaq.reset_index()
    del(df_openaq['index'])
    del(df_openaq['coordinates'])

    df_openaq['measurement_id'] = df_openaq.index
    df_openaq.index.name = 'measurement_id'

    df_openaq['geometry'] = 'POINT (' + df_openaq['lon'].astype(str) + ' ' + df_openaq['lat'].astype(str) + ')'
    
    gdf_openaq = gis_tools.df_to_gdf(df_openaq, 'geometry')

    gdf_openaq = gdf_openaq.set_index('measurement_id')
    gdf_openaq.to_file('../data/gdf_openaq_chiangmai_by_location.gpkg')
    print('gdf_openaq has been saved.')
else:
    gdf_openaq = gpd.read_file('../data/gdf_openaq_chiangmai_by_location.gpkg')
    print('gdf_openaq has been loaded.')
if gdf_openaq.index.name is None:
    gdf_openaq = gdf_openaq.set_index('measurement_id')

In [None]:
gdf_openaq.head()

In [None]:
dir_extracted_dem_root = r'../data/dem'
df_files_dem = io_tools.get_list_files(dir_extracted_dem_root, '.tif$', return_df=True)
df_files_dem

In [None]:

list_df_extracted_dem = []
list_error = []

pbar = tqdm(df_files_dem.iterrows(), total=len(df_files_dem))
for s_idx, s_row in pbar:
    with rasterio.open(s_row.file_path) as ds:
        left = ds.bounds.left
        right = ds.bounds.right
        top = ds.bounds.top
        bottom = ds.bounds.bottom
        gdf_openaq_tmp = gdf_openaq.copy()
        gdf_openaq_tmp = gdf_openaq_tmp[gdf_openaq_tmp['lat'].between(bottom, top)].copy()
        gdf_openaq_tmp = gdf_openaq_tmp[gdf_openaq_tmp['lon'].between(left, right)].copy()
        
    with rasterio.open(s_row.file_path) as ds:
        crs_dem = ds.crs
            
    try:
        df_extracted_dem_curr = gis_tools.extract_pixval_single_file(
            in_s_polygon=gdf_openaq_tmp['geometry'].to_crs(crs_dem),
            in_raster_path=s_row.file_path,
            in_list_out_col_nm=['dem'],
            in_list_target_raster_band_id=[1],
            nodata_val=-28672
            )        
        
        list_df_extracted_dem.append(df_extracted_dem_curr)            
    except ValueError:
        list_error.append({
            'gdf_openaq_tmp':gdf_openaq_tmp,
            'raster_path':s_row.file_path
        })

    pbar.set_description(f'success={len(list_df_extracted_dem)} / error={len(list_error)} ')
    


In [None]:
df_extracted_dem = pd.concat(list_df_extracted_dem)
df_extracted_dem

In [None]:

df_extracted_dem.to_csv(r'../data/df_extracted_openaq_dem_v2.csv', index=False)