# Extract dem for each OpenAQ measurement

## Import library

In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
import requests
from pprint import pp
from tqdm.notebook import tqdm
import requests
import time
from zkyhaxpy import io_tools, pd_tools, gis_tools
from datetime import datetime, timedelta
import os

## Prepare GeoDataframe of OpenAQ data

In [3]:
if not os.path.exists('../data/gdf_openaq.gpkg'):
    print('Preparing gdf_openaq')
    df_openaq = pd.read_csv('../data/openaqi_thailand_pm25_data.csv', sep=',' )
    df_openaq['measurement_id'] = df_openaq.index

    df_openaq['geometry'] = 'POINT (' + df_openaq['long'].astype(str) + ' ' + df_openaq['lat'].astype(str) + ')'
    gdf_openaq = gis_tools.df_to_gdf(df_openaq, 'geometry')

    gdf_openaq['datetime'] = gdf_openaq['date_utc'].apply(lambda date_utc: pd.Timestamp(date_utc).tz_convert("UTC"))
    gdf_openaq['year'] = gdf_openaq['date_utc'].str.slice(0, 4).astype(int)
    gdf_openaq['month'] = gdf_openaq['date_utc'].str.slice(5, 7).astype(int)
    del(gdf_openaq['date_utc'])
    gdf_openaq = gdf_openaq.set_index('measurement_id')
    gdf_openaq.to_file('../data/gdf_openaq.gpkg')
    print('gdf_openaq has been saved.')
else:
    print('Loading gdf_openaq')
    gdf_openaq = gpd.read_file('../data/gdf_openaq.gpkg')
    print('gdf_openaq has been loaded.')
if gdf_openaq.index.name is None:
    gdf_openaq = gdf_openaq.set_index('measurement_id')

Loading gdf_openaq
gdf_openaq has been loaded.


In [19]:
gdf_openaq.head()

Unnamed: 0_level_0,locationId,location,parameter,value,unit,country,city,isMobile,isAnalysis,entity,sensorType,lat,long,datetime,year,month,geometry
measurement_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,8893,Vientiane,pm25,10.0,µg/m³,TH,,False,,Governmental Organization,reference grade,17.896122,102.64,2019-06-30 00:00:00+00:00,2019,6,POINT (102.64000 17.89612)
1,8893,Vientiane,pm25,18.0,µg/m³,TH,,False,,Governmental Organization,reference grade,17.896122,102.64,2019-06-29 23:00:00+00:00,2019,6,POINT (102.64000 17.89612)
2,8893,Vientiane,pm25,17.0,µg/m³,TH,,False,,Governmental Organization,reference grade,17.896122,102.64,2019-06-29 22:00:00+00:00,2019,6,POINT (102.64000 17.89612)
3,8893,Vientiane,pm25,12.0,µg/m³,TH,,False,,Governmental Organization,reference grade,17.896122,102.64,2019-06-29 21:00:00+00:00,2019,6,POINT (102.64000 17.89612)
4,8893,Vientiane,pm25,15.0,µg/m³,TH,,False,,Governmental Organization,reference grade,17.896122,102.64,2019-06-29 20:00:00+00:00,2019,6,POINT (102.64000 17.89612)


In [6]:
dir_extracted_dem_root = r'../data/dem'
df_files_dem = io_tools.get_list_files(dir_extracted_dem_root, '.tif$', return_df=True)
df_files_dem

Total of 2 files have been listed.


Unnamed: 0,file_path,file_nm,folder_nm,file_ext
0,../data/dem\GMTED2010N10E090_300\10n090e_20101...,10n090e_20101117_gmted_med300.tif,GMTED2010N10E090_300,tif
1,../data/dem\GMTED2010S10E090_300\10s090e_20101...,10s090e_20101117_gmted_med300.tif,GMTED2010S10E090_300,tif


# DEV

In [20]:

list_df_extracted_dem = []
list_error = []

pbar = tqdm(df_files_dem.iterrows(), total=len(df_files_dem))
for s_idx, s_row in pbar:
    with rasterio.open(s_row.file_path) as ds:
        left = ds.bounds.left
        right = ds.bounds.right
        top = ds.bounds.top
        bottom = ds.bounds.bottom
        gdf_openaq_tmp = gdf_openaq.copy()
        gdf_openaq_tmp = gdf_openaq_tmp[gdf_openaq_tmp['lat'].between(bottom, top)].copy()
        gdf_openaq_tmp = gdf_openaq_tmp[gdf_openaq_tmp['long'].between(left, right)].copy()
    
    try:
        df_extracted_dem_curr = gis_tools.extract_pixval_single_file(
            in_s_polygon=gdf_openaq_tmp['geometry'].to_crs(crs_dem),
            in_raster_path=s_row.file_path,
            in_list_out_col_nm=['dem'],
            in_list_target_raster_band_id=[1],
            nodata_val=-28672
            )        
        
        list_df_extracted_dem.append(df_extracted_dem_curr)            
    except ValueError:
        list_error.append({
            'gdf_openaq_tmp':gdf_openaq_tmp,
            'raster_path':s_row.file_path
        })

    pbar.set_description(f'success={len(list_df_extracted_dem)} / error={len(list_error)} ')
    


  0%|          | 0/2 [00:00<?, ?it/s]

/tmp\b4a7a5596ed7455e97fd7d0cb3b5690b\rowcol_map_10n090e_20101117_gmted_med300.tif has been created


Getting row&col of pixels...:   0%|          | 0/208539 [00:00<?, ?it/s]

Getting pixel values...:   0%|          | 0/1 [00:00<?, ?it/s]

/tmp\f3273e0878c44425a60e437fa1e4077c\rowcol_map_10s090e_20101117_gmted_med300.tif has been created


Getting row&col of pixels...:   0%|          | 0/9670 [00:00<?, ?it/s]

Getting pixel values...:   0%|          | 0/1 [00:00<?, ?it/s]

In [21]:
df_extracted_dem = pd.concat(list_df_extracted_dem)
df_extracted_dem

Unnamed: 0,measurement_id,row,col,dem
0,0,1452,1516,174.0
1,1,1452,1516,174.0
2,2,1452,1516,174.0
3,3,1452,1516,174.0
4,4,1452,1516,174.0
...,...,...,...,...
9665,205085,4,1036,23.0
9666,205086,4,1036,23.0
9667,205087,4,1036,23.0
9668,205088,4,1036,23.0


In [22]:

df_extracted_dem.to_csv(r'../data/df_extracted_dem.csv', index=False)

In [None]:
gdf_openaq_remain = gdf_openaq[~(gdf_openaq.index.isin(df_extracted_dem['measurement_id'].values))].copy()

In [None]:
df_files_dem['datetime'] = df_files_dem['datetime'].apply(lambda datetime: pd.Timestamp(datetime).tz_convert("UTC"))

In [None]:
df_files_dem['datetime'] 

In [None]:
df_extracted_dem['dem_datetime'].dt.hour.hist(bins=24)

In [None]:
gdf_openaq_remain['datetime'].dt.hour.hist(bins=24)

In [None]:
df_extracted_dem_curr

In [None]:
profile

In [None]:
gdf_openaq_tmp.crs