# MODIS - Get pixel values of NDVI

## Install packages

In [None]:
!pip install ipython-autotime
%load_ext autotime

In [None]:
!pip install zkyhaxpy rasterio utm geopandas


## Import libraries

In [None]:
## for all ##
from zkyhaxpy import io_tools, pd_tools, np_tools, console_tools, timer_tools, json_tools, dict_tools, colab_tools, gis_tools
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import re
import rasterio
import geopandas as gpd


In [None]:
colab_tools.mount_drive()
colab_tools.authen_gcp()

## Define paths

In [None]:
folder_modis_ndvi = '/temp/modis/ndvi'
folder_modis_ndvi_reproj = '/temp/modis/ndvi_reproj'
folder_modis_reference = '/temp/modis/reference'


io_tools.create_folders(folder_modis_ndvi, folder_modis_ndvi, folder_modis_reference)


folder_modis_ndvi_pixval = '/temp/modis/ndvi_pixval'
io_tools.create_folders(folder_modis_ndvi_pixval)

## Get data from GCS

In [None]:
!gsutil -m cp -r gs://unbdh2022-multiverseofdata-dev/modis/ndvi /temp/modis
!gsutil cp -r gs://unbdh2022-multiverseofdata-dev/modis/reference /temp/modis


## Get pixval

In [None]:
#Get a list of all reprojected MODIS NDVI rasters
df_list_files_modis_reproj = io_tools.get_list_files_re(folder_modis_ndvi_reproj, '.tif$', return_df=True)
df_list_files_modis_reproj['img_date_int'] = df_list_files_modis_reproj['file_nm'].str.findall('\d{8}').apply(lambda val: int(val[0]))
df_list_files_modis_reproj['img_date'] = df_list_files_modis_reproj['file_nm'].str.findall('\d{8}').apply(lambda val: f'{val[0][:4]}-{val[0][4:6]}-{val[0][6:]}')
df_list_files_modis_reproj['img_year'] = df_list_files_modis_reproj['img_date'].str.slice(0, 4).astype(int)
df_list_files_modis_reproj['img_month'] = 'M' + df_list_files_modis_reproj['img_date'].str.slice(5, 7)

#Get yearly pixel values
for year in tqdm(range(2001, 2023), 'Iterate year...'):
    path_df_pixval = os.path.join(folder_modis_ndvi_pixval, f'df_pixval_{year}.parquet')
    io_tools.create_folders(path_df_pixval)
    if os.path.exists(path_df_pixval):
        print(f'{path_df_pixval} already exists. Skip...')
        continue
    else:
        print(f'Preparing {path_df_pixval}...')

    list_arr_pixval = []
    list_img_month = []
    df_list_files_modis_curr = df_list_files_modis_reproj[df_list_files_modis_reproj['img_year'] == year]
    df_list_files_modis_curr = df_list_files_modis_curr.sort_values('img_date')
    for img_month, df_list_files_modis_curr_month in tqdm(df_list_files_modis_curr.groupby('img_month'), 'Iterate month...'):
        list_arr_pixval_tmp = []
        for _, s_file_path_info in df_list_files_modis_curr_month.iterrows():
            path_raster = s_file_path_info.file_path
            with rasterio.open(path_raster) as ds:
                arr_pixval = ds.read(1)
                
                #Remove pixel value of out of bound
                arr_pixval = np.where(arr_pixval==0, np.nan, arr_pixval)

                #Remove pixel value of invalid value
                arr_pixval = np.where(arr_pixval==-3000, np.nan, arr_pixval)

                #Convert value to NDVI range
                arr_pixval = arr_pixval.astype(np.float32) * 0.0001

                list_arr_pixval_tmp.append(arr_pixval)

        arr_pixval = np.mean(list_arr_pixval_tmp, axis=0)
        list_img_month.append(img_month)
        list_arr_pixval.append(arr_pixval)

    
    arr_pixval_yearly = np.array(list_arr_pixval)

    #Reformat pixval array
    arr_shape = arr_pixval_yearly.shape[1:]
    arr_row_col = gis_tools.create_row_col_arr(arr_shape)
    arr_pixval_yearly_row_col = np.concatenate([arr_row_col, arr_pixval_yearly])
    n_layers = arr_pixval_yearly_row_col.shape[0]
    arr_pixval_yearly_row_col = arr_pixval_yearly_row_col.T.reshape(-1, n_layers)

    #Drop NA pixels
    arr_na_pixel = np.isnan(arr_pixval_yearly_row_col[:, 2:]).any(axis=1)
    arr_pixval_yearly_row_col = arr_pixval_yearly_row_col[~arr_na_pixel]

    #Convert to DF
    df_pixval_curr = pd.DataFrame(arr_pixval_yearly_row_col, columns=['row', 'col'] + list_img_month)
    df_pixval_curr.index = (df_pixval_curr.row * 10000 + df_pixval_curr.col).astype(np.int64)
    df_pixval_curr = df_pixval_curr.drop(columns=['row', 'col']).copy()
    
    #Recheck columns    
    
    if list_img_month == [f'M{m:02d}' for m in range(1,13)]:
        pass
    else:
        df_pixval_curr = df_pixval_curr.reindex(columns=[f'M{m:02d}' for m in range(1,13)])

    #Save to parquet
    df_pixval_curr.to_parquet(path_df_pixval)

!gsutil -m cp -r -n /temp/modis/ndvi_pixval gs://unbdh2022-multiverseofdata-dev/modis