# MODIS - Get pixel values of NDVI

## Install packages

In [None]:
!pip install zkyhaxpy rasterio utm geopandas
!pip install ipython-autotime
!pip install gcsfs

## Import libraries

In [None]:
## for all ##
from zkyhaxpy import io_tools, pd_tools, np_tools, console_tools, timer_tools, json_tools, dict_tools, colab_tools, gis_tools, gcp_tools
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import re
import rasterio
import geopandas as gpd


%load_ext autotime

In [None]:
colab_tools.mount_drive()
colab_tools.authen_gcp()

## Define paths

In [None]:
folder_modis_ndvi_reproj = '/temp/modis/ndvi_reproj'
io_tools.create_folders(folder_modis_ndvi_reproj)

## Get data from GCS

In [None]:
# !gsutil -m cp -r -n gs://unbdh2022-multiverseofdata-dev/modis/ndvi_reproj/2010 /temp/modis/ndvi_reproj
# !gsutil -m cp -r -n gs://unbdh2022-multiverseofdata-dev/modis/ndvi_reproj/2011 /temp/modis/ndvi_reproj
# !gsutil -m cp -r -n gs://unbdh2022-multiverseofdata-dev/modis/ndvi_reproj/2012 /temp/modis/ndvi_reproj


!gsutil -m cp -r -n gs://unbdh2022-multiverseofdata-dev/modis/ndvi_reproj /temp/modis

## Get pixval

In [None]:
#Get a list of all reprojected MODIS NDVI rasters
df_list_files_modis_reproj = io_tools.get_list_files_re(folder_modis_ndvi_reproj, '.tif$', return_df=True)
df_list_files_modis_reproj['img_date_int'] = df_list_files_modis_reproj['file_nm'].str.findall('\d{8}').apply(lambda val: int(val[0]))
df_list_files_modis_reproj['img_date'] = df_list_files_modis_reproj['file_nm'].str.findall('\d{8}').apply(lambda val: f'{val[0][:4]}-{val[0][4:6]}-{val[0][6:]}')
df_list_files_modis_reproj['img_year'] = df_list_files_modis_reproj['img_date'].str.slice(0, 4).astype(int)
df_list_files_modis_reproj['img_month'] = 'M' + df_list_files_modis_reproj['img_date'].str.slice(5, 7)


In [None]:
list_month = [m for m in range(1, 13)]


for as_of_year in tqdm(range(2001, 2023), 'Iterate as of year...'):    
    for as_of_month in tqdm(list_month, 'Iterate as of month'):
        path_df_pixval = f'gs://unbdh2022-multiverseofdata-dev/modis/ndvi_pixval_nrt_v2/df_pixval_nrt_{as_of_year}-{as_of_month:02d}.parquet' 

        if gcp_tools.check_file_exists_gcs(path_df_pixval):
            print(f'{path_df_pixval} already exists. Skip...')
            continue
        elif ((as_of_year == 2022) & (as_of_month >= 11)) | ((as_of_year == 2001) & (as_of_month <= 2)):
            print('No data. Skip')
            continue
        else:
            print(f'Preparing {path_df_pixval}...')

        #Get relavant images (1 year window prior to as-of-date)   
        as_of_date = np.datetime64(f'{as_of_year}-{as_of_month:02d}-01')
        img_date_start = np.datetime64(f'{as_of_year-1}-{as_of_month:02d}-01')
        img_date_end = as_of_date - np.timedelta64(1,'D')        
        df_list_files_modis_reproj_curr = df_list_files_modis_reproj[df_list_files_modis_reproj['img_date'].between(str(img_date_start), str(img_date_end))]
        df_list_files_modis_reproj_curr = df_list_files_modis_reproj_curr.sort_values('img_date')        

        #Initialize variables
        list_arr_pixval = []
        list_img_month = []        
        m = 0
        dict_img_month = {}
        for img_month, df_list_files_modis_reproj_curr_month in tqdm(df_list_files_modis_reproj_curr.groupby('img_month'), 'Iterate image month...'):
            m += 1
            dict_img_month.update({img_month:f'window_m{m:02}'})
            list_arr_pixval_tmp = []
            for _, s_file_path_info in df_list_files_modis_reproj_curr_month.iterrows():
                path_raster = s_file_path_info.file_path
                with rasterio.open(path_raster) as ds:
                    arr_pixval = ds.read(1)
                    
                    #Remove pixel value of out of bound
                    arr_pixval = np.where(arr_pixval==0, np.nan, arr_pixval)

                    #Remove pixel value of invalid value
                    arr_pixval = np.where(arr_pixval==-3000, np.nan, arr_pixval)

                    #Convert value to NDVI range
                    arr_pixval = arr_pixval.astype(np.float32) * 0.0001

                    list_arr_pixval_tmp.append(arr_pixval)

            arr_pixval = np.mean(list_arr_pixval_tmp, axis=0)
            list_img_month.append(img_month)
            list_arr_pixval.append(arr_pixval)

        #Reformat pixval array
        assert(len(list_img_month)==12)   
        arr_pixval_yearly = np.array(list_arr_pixval)
        arr_shape = arr_pixval_yearly.shape[1:]
        arr_row_col = gis_tools.create_row_col_arr(arr_shape)
        arr_pixval_yearly_row_col = np.concatenate([arr_row_col, arr_pixval_yearly])
        n_layers = arr_pixval_yearly_row_col.shape[0]
        arr_pixval_yearly_row_col = arr_pixval_yearly_row_col.T.reshape(-1, n_layers)


        #Drop NA pixels
        arr_na_pixel = np.isnan(arr_pixval_yearly_row_col[:, 2:]).any(axis=1)
        arr_pixval_yearly_row_col = arr_pixval_yearly_row_col[~arr_na_pixel]

        #Convert to DF
        df_pixval_curr = pd.DataFrame(arr_pixval_yearly_row_col, columns=['row', 'col'] + list_img_month)
        df_pixval_curr.index = (df_pixval_curr.row.astype(np.int64) * 10000 + df_pixval_curr.col.astype(np.int64))
        df_pixval_curr = df_pixval_curr.rename(columns=dict_img_month)
        df_pixval_curr = df_pixval_curr.drop(columns=['row', 'col']).copy()
             
        df_pixval_curr['as_of_month'] = as_of_month
        
        
        #Save to parquet
        df_pixval_curr.to_parquet(path_df_pixval)
