## Import lib

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from zkyhaxpy import io_tools, gis_tools
import rasterio
import os
import shutil
import numpy as np
from tqdm.notebook import tqdm

## Prepare Grid of Chiangmai Lat/Lon

In [4]:
def generate_grid(lat_min, lat_max, lon_min, lon_max, step_size_km=1):
    """
    Generates a regular grid of latitude and longitude coordinates within the specified bounding box.
    :param lat_min: Minimum latitude
    :param lat_max: Maximum latitude
    :param lon_min: Minimum longitude
    :param lon_max: Maximum longitude
    :param step_size_km: Spacing between grid points (default: 1 km)
    :return: List of (latitude, longitude) pairs
    """
    grid_points = []
    step_size_deg = step_size_km / 111.32  # Approximate conversion from km to degrees
    
    for lat in np.arange(lat_min, lat_max, step_size_deg):
        for lon in np.arange(lon_min, lon_max, step_size_deg):
            grid_points.append((round(lat, 4), round(lon, 4)))  # Convert back to decimal degrees

    return grid_points


# Example usage:
chiangmai_lat_min = 17
chiangmai_lat_max = 20.5
chiangmai_lon_min = 97.8
chiangmai_lon_max = 99.8
pad_size = 1.0

#Get shapefile of Chiangmai
gdf_thailand = gpd.read_file(r'../data/thailandWithName.json')
gdf_chiangmai = gdf_thailand[gdf_thailand['name'] == 'Chiang Mai']

#Create grid lat/lon for Chiangmai every 1 km
chiangmai_grid = generate_grid(chiangmai_lat_min-pad_size, chiangmai_lat_max+pad_size, chiangmai_lon_min-pad_size, chiangmai_lon_max+pad_size)
df_chiangmai_grid = pd.DataFrame(chiangmai_grid, columns=['lat', 'lon'])
df_chiangmai_grid['geometry'] = 'POINT (' + df_chiangmai_grid['lon'].astype(str) + ' ' + df_chiangmai_grid['lat'].astype(str) + ')'
gdf_chiangmai_grid = gis_tools.df_to_gdf(df_chiangmai_grid, geometry='geometry')
gdf_chiangmai_grid.index.name = 'grid_id'
gdf_chiangmai_grid = gdf_chiangmai_grid[gdf_chiangmai_grid['geometry'].intersects(gdf_chiangmai['geometry'].iloc[0])].copy()
gdf_chiangmai_grid

Unnamed: 0_level_0,lat,lon,geometry
grid_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
62620,17.2576,98.4170,POINT (98.41700 17.25760)
62621,17.2576,98.4259,POINT (98.42590 17.25760)
62622,17.2576,98.4349,POINT (98.43490 17.25760)
62623,17.2576,98.4439,POINT (98.44390 17.25760)
63065,17.2666,98.4080,POINT (98.40800 17.26660)
...,...,...,...
205907,20.1412,99.5039,POINT (99.50390 20.14120)
205908,20.1412,99.5129,POINT (99.51290 20.14120)
205909,20.1412,99.5219,POINT (99.52190 20.14120)
205910,20.1412,99.5309,POINT (99.53090 20.14120)


## DEM

In [16]:
dir_chiangmai_dem = r'../data/chiangmai_dem'
io_tools.create_folders(dir_chiangmai_dem)

df_list_path_dem = io_tools.get_list_files('../data/dem', '.tif$', return_df=True)
df_list_path_dem['tile_id'] = df_list_path_dem['file_nm'].apply(lambda file_nm: file_nm[:7])
df_list_path_dem

Total of 2 files have been listed.


Unnamed: 0,file_path,file_nm,folder_nm,file_ext,tile_id
0,../data/dem\10n090e_20101117_gmted_med300.tif,10n090e_20101117_gmted_med300.tif,dem,tif,10n090e
1,../data/dem\10s090e_20101117_gmted_med300.tif,10s090e_20101117_gmted_med300.tif,dem,tif,10s090e


In [26]:
path_chiangmai_dem = r'../data/df_chiangmai_dem.parquet'
if not os.path.exists(path_chiangmai_dem):

    pbar_dem = tqdm(df_list_path_dem.iterrows(), total=len(df_list_path_dem))

    list_df_pixval_dem = []
    list_error = []
    for _, s_row in pbar_dem:    
        tile_id = s_row.tile_id
        path_out = os.path.join(dir_chiangmai_dem, f'df_chiangmai_dem-{tile_id}.parquet')
        if os.path.exists(path_out):
            print(f'{path_out} already exists')
            df_pixval_dem = pd.read_parquet(path_out)
            list_df_pixval_dem.append(df_pixval_dem)
            continue
        
        pbar_dem.set_description(f'success={len(list_df_pixval_dem)} / error={len(list_error)}')
            
        #Get CRS of raster image
        path_raster = s_row.file_path
        with rasterio.open(path_raster) as ds:
            crs = ds.crs

        #Extract monthly dem
        try:
            df_pixval_dem = gis_tools.extract_pixval_single_file(
                in_s_polygon=gdf_chiangmai_grid.geometry.to_crs(crs),  
                in_raster_path=path_raster,  
                in_list_out_col_nm=['dem'],  
                in_list_target_raster_band_id=[1],  
                nodata_val=-28672,             
                )            
            df_pixval_dem = df_pixval_dem.set_index('grid_id')
            del(df_pixval_dem['row'])
            del(df_pixval_dem['col'])
            df_pixval_dem.loc[:, ] = np.where(df_pixval_dem.loc[:, ].values==-28672, np.nan, df_pixval_dem.loc[:, ].values)
            df_pixval_dem.to_parquet(path_out)
            list_df_pixval_dem.append(df_pixval_dem)    
        except ValueError:
            list_error.append(tile_id)
        


    df_pixval_dem = pd.concat(list_df_pixval_dem)
    df_pixval_dem.to_parquet(path_chiangmai_dem)
    print(f'{path_chiangmai_dem} has been saved.')
    
else:
    df_pixval_dem = pd.read_parquet(path_chiangmai_dem)
    print(f'{path_chiangmai_dem} has been loaded.')


../data/df_chiangmai_dem.parquet has been loaded.


## AOD

In [7]:
dir_chiangmai_aod_daily = r'../data/chiangmai_aod_daily'
io_tools.create_folders(dir_chiangmai_aod_daily)

df_list_path_aod = io_tools.get_list_files('../../../data/aod/translated', '.tif$', return_df=True)
df_list_path_aod['year'] = df_list_path_aod['folder_nm'].astype(int)
df_list_path_aod['date'] = df_list_path_aod['file_nm'].str.slice(8, 18)
df_list_path_aod['year_month'] = df_list_path_aod['file_nm'].str.slice(8, 15)
df_list_path_aod['datetime'] = df_list_path_aod['file_nm'].str.slice(8, 23)
df_list_path_aod['tile_id'] = df_list_path_aod['file_path'].apply(lambda file_path: file_path.split('\\')[-3])
df_list_path_aod

In [28]:
YEAR_START_PREDICT = 2019
# list_tile_id = ['h27v07', 'h27v08', 'h28v07', 'h28v08']
list_tile_id = ['h27v07']

df_list_path_aod_curr = df_list_path_aod[(df_list_path_aod['year']>=YEAR_START_PREDICT) & (df_list_path_aod['tile_id'].isin(list_tile_id))].copy()
df_list_path_aod_curr = df_list_path_aod_curr.sample(frac=1.0)

pbar_aod_year_month = tqdm(df_list_path_aod_curr.groupby(['tile_id', 'year_month'], sort=False))

  0%|          | 0/63 [00:00<?, ?it/s]

In [29]:
YEAR_START_PREDICT = 2015
# list_tile_id = ['h27v07', 'h27v08', 'h28v07', 'h28v08']
list_tile_id = ['h27v07']

df_list_path_aod_curr = df_list_path_aod[(df_list_path_aod['year']>=YEAR_START_PREDICT) & (df_list_path_aod['tile_id'].isin(list_tile_id))].copy()
df_list_path_aod_curr = df_list_path_aod_curr.sample(frac=1.0)

pbar_aod_year_month = tqdm(df_list_path_aod_curr.groupby(['tile_id', 'year_month'], sort=False))

list_df_pixval_aod_daily = []
list_error = []
for (tile_id, year_month), df_list_path_aod_tile_year_month in pbar_aod_year_month:    
    path_out = os.path.join(dir_chiangmai_aod_daily, f'df_chiangmai_aod_daily-{year_month}-{tile_id}.parquet')
    if os.path.exists(path_out):
        print(f'{path_out} already exists')
        df_pixval_aod_daily = pd.read_parquet(path_out)
        list_df_pixval_aod_daily.append(df_pixval_aod_daily)
        continue
    
    pbar_aod_year_month.set_description(f'success={len(list_df_pixval_aod_daily)} / error={len(list_error)}')
    #Prepare list of columns and raster paths
    list_out_col = df_list_path_aod_tile_year_month['datetime'].to_list()
    list_path_raster = df_list_path_aod_tile_year_month['file_path'].to_list()

    #Get CRS of raster image
    path_raster_tmp = list_path_raster[0]
    with rasterio.open(path_raster_tmp) as ds:
        crs = ds.crs

    #Extract monthly AOD
    try:
        df_pixval_aod = gis_tools.extract_pixval_multi_files(    
            in_s_polygon=gdf_chiangmai_grid.geometry.to_crs(crs),  
            in_list_raster_path=list_path_raster,  
            in_list_out_col_nm=list_out_col,  
            in_target_raster_band_id=1,  
            nodata_val=-28672,  
            check_raster_consistent=True, 
            )
        
        df_pixval_aod = df_pixval_aod.set_index('grid_id')
        del(df_pixval_aod['row'])
        del(df_pixval_aod['col'])
        df_pixval_aod.loc[:, ] = np.where(df_pixval_aod.loc[:, ].values==-28672, np.nan, df_pixval_aod.loc[:, ].values)

        #Aggregate into daily data
        df_pixval_aod_daily = pd.DataFrame(index=df_pixval_aod.index)
        df_pixval_aod_daily['year_month'] = year_month
        df_pixval_aod_daily['tile_id'] = tile_id
        list_date = [datetime[:10] for datetime in df_pixval_aod.columns]
        list_date = sorted(set(list_date))

        for date in list_date:
            list_col_curr = [col for col in df_pixval_aod.columns if col.startswith(date)]
            df_pixval_aod_daily[date] = np.nanmedian(df_pixval_aod.loc[:, list_col_curr].values, axis=1)
        df_pixval_aod_daily.to_parquet(path_out)
        list_df_pixval_aod_daily.append(df_pixval_aod_daily)
    except ValueError:
        list_error.append((tile_id, year_month))


df_pixval_aod_daily = pd.concat(list_df_pixval_aod_daily)
df_pixval_aod_daily.to_parquet(r'../data/df_chiangmai_aod_daily.parquet')
# df_pixval_aod_daily.to_csv(r'../data/df_chiangmai_aod_daily.csv')



  0%|          | 0/111 [00:00<?, ?it/s]

../data/chiangmai_aod_daily\df_chiangmai_aod_daily-2024-04-h27v07.parquet already exists
../data/chiangmai_aod_daily\df_chiangmai_aod_daily-2021-09-h27v07.parquet already exists
../data/chiangmai_aod_daily\df_chiangmai_aod_daily-2022-05-h27v07.parquet already exists
../data/chiangmai_aod_daily\df_chiangmai_aod_daily-2022-07-h27v07.parquet already exists
/tmp\84a61c365b1640c8aa7d80768037d31a\rowcol_map_aod_055-2016-08-06_0720.tif has been created


Getting row&col of pixels...:   0%|          | 0/24197 [00:00<?, ?it/s]

In [25]:
YEAR_START_PREDICT = 2015
# list_tile_id = ['h27v07', 'h27v08', 'h28v07', 'h28v08']
list_tile_id = ['h27v07']

df_list_path_aod_curr = df_list_path_aod[(df_list_path_aod['year']>=YEAR_START_PREDICT) & (df_list_path_aod['tile_id'].isin(list_tile_id))].copy()

pbar_aod_year_month = tqdm(df_list_path_aod_curr.groupby(['tile_id', 'year_month']))

list_df_pixval_aod_daily = []
list_error = []
for (tile_id, year_month), df_list_path_aod_tile_year_month in pbar_aod_year_month:    
    path_out = os.path.join(dir_chiangmai_aod_daily, f'df_chiangmai_aod_daily-{year_month}-{tile_id}.parquet')
    if os.path.exists(path_out):
        print(f'{path_out} already exists')
        df_pixval_aod_daily = pd.read_parquet(path_out)
        list_df_pixval_aod_daily.append(df_pixval_aod_daily)
        continue
    
    pbar_aod_year_month.set_description(f'success={len(list_df_pixval_aod_daily)} / error={len(list_error)}')
    #Prepare list of columns and raster paths
    list_out_col = df_list_path_aod_tile_year_month['datetime'].to_list()
    list_path_raster = df_list_path_aod_tile_year_month['file_path'].to_list()

    #Get CRS of raster image
    path_raster_tmp = list_path_raster[0]
    with rasterio.open(path_raster_tmp) as ds:
        crs = ds.crs

    #Extract monthly AOD
    try:
        df_pixval_aod = gis_tools.extract_pixval_multi_files(    
            in_s_polygon=gdf_chiangmai_grid.geometry.to_crs(crs),  
            in_list_raster_path=list_path_raster,  
            in_list_out_col_nm=list_out_col,  
            in_target_raster_band_id=1,  
            nodata_val=-28672,  
            check_raster_consistent=True, 
            )
        
        df_pixval_aod = df_pixval_aod.set_index('grid_id')
        del(df_pixval_aod['row'])
        del(df_pixval_aod['col'])
        df_pixval_aod.loc[:, ] = np.where(df_pixval_aod.loc[:, ].values==-28672, np.nan, df_pixval_aod.loc[:, ].values)

        #Aggregate into daily data
        df_pixval_aod_daily = pd.DataFrame(index=df_pixval_aod.index)
        df_pixval_aod_daily['year_month'] = year_month
        df_pixval_aod_daily['tile_id'] = tile_id
        list_date = [datetime[:10] for datetime in df_pixval_aod.columns]
        list_date = sorted(set(list_date))

        for date in list_date:
            list_col_curr = [col for col in df_pixval_aod.columns if col.startswith(date)]
            df_pixval_aod_daily[date] = np.nanmedian(df_pixval_aod.loc[:, list_col_curr].values, axis=1)
        df_pixval_aod_daily.to_parquet(path_out)
        list_df_pixval_aod_daily.append(df_pixval_aod_daily)
    except ValueError:
        list_error.append((tile_id, year_month))


df_pixval_aod_daily = pd.concat(list_df_pixval_aod_daily)
df_pixval_aod_daily.to_parquet(r'../data/df_chiangmai_aod_daily.parquet')
# df_pixval_aod_daily.to_csv(r'../data/df_chiangmai_aod_daily.csv')



  0%|          | 0/111 [00:00<?, ?it/s]

/tmp\0fc92a8dfe634568855907eb13db3938\rowcol_map_aod_055-2015-01-30_0645.tif has been created


Getting row&col of pixels...:   0%|          | 0/24197 [00:00<?, ?it/s]

Getting pixel values...:   0%|          | 0/103 [00:00<?, ?it/s]

  df_polygon_row_col_pixval[col_nm] = np.where(arr_pixval_1d==nodata_val, np.nan, arr_pixval_1d)
  df_polygon_row_col_pixval[col_nm] = np.where(arr_pixval_1d==nodata_val, np.nan, arr_pixval_1d)
  df_polygon_row_col_pixval[col_nm] = np.where(arr_pixval_1d==nodata_val, np.nan, arr_pixval_1d)
  df_polygon_row_col_pixval[col_nm] = np.where(arr_pixval_1d==nodata_val, np.nan, arr_pixval_1d)
  df_polygon_row_col_pixval[col_nm] = np.where(arr_pixval_1d==nodata_val, np.nan, arr_pixval_1d)
  df_polygon_row_col_pixval[col_nm] = np.where(arr_pixval_1d==nodata_val, np.nan, arr_pixval_1d)
  df_pixval_aod_daily[date] = np.nanmedian(df_pixval_aod.loc[:, list_col_curr].values, axis=1)
  df_pixval_aod_daily[date] = np.nanmedian(df_pixval_aod.loc[:, list_col_curr].values, axis=1)
  df_pixval_aod_daily[date] = np.nanmedian(df_pixval_aod.loc[:, list_col_curr].values, axis=1)
  df_pixval_aod_daily[date] = np.nanmedian(df_pixval_aod.loc[:, list_col_curr].values, axis=1)
  df_pixval_aod_daily[date] = np.nanme

/tmp\011a59ba5c8447a5a82f1f3ebfaa541a\rowcol_map_aod_055-2015-02-27_0710.tif has been created


Getting row&col of pixels...:   0%|          | 0/24197 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
gis_tools.extract_pixval_single_file(