# MODIS - Rice Classification

## Install packages

In [None]:
!pip install gcsfs 
!pip install ipython-autotime
%load_ext autotime

In [None]:
!pip install zkyhaxpy rasterio utm geopandas


## Import libraries

In [None]:
## for all ##
from zkyhaxpy import io_tools, pd_tools, np_tools, console_tools, timer_tools, json_tools, dict_tools, colab_tools, gis_tools
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging

import re
import rasterio
import geopandas as gpd
from sklearn.model_selection import train_test_split


In [None]:
colab_tools.mount_drive()

In [None]:
colab_tools.authen_gcp()

## Define paths

In [None]:
path_modis_raster_ref = '/temp/modis/reference/modis_raster_ref.tif'
io_tools.create_folders(path_modis_raster_ref)

folder_modis_ndvi_pixval_nrt = '/temp/modis/ndvi_pixval_nrt_v2'
io_tools.create_folders(folder_modis_ndvi_pixval_nrt)

folder_gistda_root = '/temp/gistda_rice'
folder_gistda_rice_zip = os.path.join(folder_gistda_root, 'zip')
folder_gistda_rice_shp = os.path.join(folder_gistda_root, 'shp')
io_tools.create_folders(folder_gistda_rice_zip, folder_gistda_rice_shp,)

path_modis_raster_src = 'gs://unbdh2022-multiverseofdata-dev/modis/reference/mod250m16d-ndvi-reproj.tif'
path_df_gistda_rice_f = 'gs://unbdh2022-multiverseofdata-dev/gistda/df_gistda_rice_f.parquet'
folder_gistda_rice_tif = '/temp/gistda_rice/tif'
io_tools.create_folders(folder_gistda_rice_tif)
 
folder_training_data = '/temp/training_data'
path_df_pixval_nrt_rice_f = 'gs://unbdh2022-multiverseofdata-dev/training_data/df_pixval_rice_f.parquet'
io_tools.create_folders(folder_training_data)

# Execute

## Prepare dataframe of rice pixel as ground truth

In [None]:

#Download & Unzip GISTDA rice area shapefiles of in-season rice year 2022 
#Data is published in https://rice.gistda.or.th/ 
#To allow easy to execute, we create an excel file containing all url of the shape files
df_gistda_rice_shape_url = pd.read_excel('gs://unbdh2022-multiverseofdata-dev/gistda/gistda_rice_shape_url.xlsx')
for url in df_gistda_rice_shape_url.url:
    file_nm = os.path.basename(url)
    path_dest_zip = os.path.join(folder_gistda_rice_zip, file_nm)    
    if os.path.exists(path_dest_zip):
        pass
    else:
        console_tools.execute_cmd(f'wget {url} -O {path_dest_zip}')
    console_tools.execute_cmd(f'unzip -o {path_dest_zip} -d {folder_gistda_rice_shp} ')    
list_gistda_rice_shp = io_tools.get_list_files_re(folder_gistda_rice_shp, '.shp$')

#Download raster reference file to use as template for rice area pixel
console_tools.execute_cmd(f'gsutil cp -n {path_modis_raster_src} {path_modis_raster_ref}')

#Rasterize rice area from shapefile
for path_gistda_rice_shp in list_gistda_rice_shp:
    file_nm_shp = os.path.basename(path_gistda_rice_shp)
    file_nm_tif = file_nm_shp.replace('.shp', '.tif')
    gis_tools.shape_to_raster(
        path_gistda_rice_shp,
        os.path.join(folder_gistda_rice_tif, file_nm_tif),
        path_modis_raster_ref, all_touched=False
        )

#Get cumulative rice pixel & write into a raster
list_files_gistda_rice_tif = io_tools.get_list_files_re(folder_gistda_rice_tif, '.tif$')    
path_gistda_rice_cum = '/temp/gistda_rice/gistda_rice_cum.tif'
list_arr_gistda_rice_pixval = []
for path_file in list_files_gistda_rice_tif:
    with rasterio.open(path_file) as ds:
        profile = ds.profile
        arr_gistda_rice_pixval = ds.read(1)
        list_arr_gistda_rice_pixval.append(arr_gistda_rice_pixval)
arr_gistda_rice_pixval_cum = np.max(np.array(list_arr_gistda_rice_pixval), axis=0)
with rasterio.open(path_gistda_rice_cum, 'w', **profile) as ds:    
    ds.write(arr_gistda_rice_pixval_cum, 1)

#Convert rice pixel raster into a dataframe
arr_row_col = gis_tools.create_row_col_arr((profile['height'], profile['width']))
arr_gistda_rice_pixval_cum_row_col = np.concatenate([arr_row_col, [arr_gistda_rice_pixval_cum]])
df_gistda_rice_f = pd.DataFrame(arr_gistda_rice_pixval_cum_row_col.T.reshape(-1, 3), columns=['row', 'col', 'rice_f'])
df_gistda_rice_f.index = ((df_gistda_rice_f.row.astype(np.int64) * 10000) + df_gistda_rice_f.col.astype(np.int64))
df_gistda_rice_f = df_gistda_rice_f.drop(columns=['row', 'col'])
df_gistda_rice_f.index.name = 'row_col_id'
df_gistda_rice_f = df_gistda_rice_f[df_gistda_rice_f.rice_f==1].copy()
df_gistda_rice_f.to_parquet(path_df_gistda_rice_f)    

## Prepare training data

In [None]:
#Copy dataframe near-realtim pixel values of 2022 from storage
!gsutil -m cp -n gs://unbdh2022-multiverseofdata-dev/modis/ndvi_pixval_nrt_v2/df_pixval_nrt_2022-*.parquet /temp/modis/ndvi_pixval_nrt_v2

#Join MODIS NDVI pixel values & GISTDA rice pixels
list_files_pixval_nrt = io_tools.get_list_files_re(folder_modis_ndvi_pixval_nrt)
list_files_pixval_nrt.sort()
list_files_pixval_nrt_rice_f = []

for path_df_pixval_nrt in tqdm(list_files_pixval_nrt):
    df_pixval_nrt = pd.read_parquet(path_df_pixval_nrt)
    df_pixval_nrt.index.name = 'row_col_id'
    df_pixval_nrt_rice_f = df_pixval_nrt.merge(df_gistda_rice_f, how='left', left_index=True, right_index=True)
    df_pixval_nrt_rice_f['rice_f'] = df_pixval_nrt_rice_f['rice_f'].fillna(0)
    list_files_pixval_nrt_rice_f.append(df_pixval_nrt_rice_f)

df_pixval_nrt_rice_f = pd.concat(list_files_pixval_nrt_rice_f)
df_pixval_nrt_rice_f.to_parquet('/temp/training_data/df_pixval_nrt_rice_f.parquet')

#Copy back to storage
!gsutil cp /temp/training_data/df_pixval_nrt_rice_f.parquet gs://unbdh2022-multiverseofdata-dev/training_data