# MODIS - Rice Pixel Classification

In [None]:
DOWNSIZED = True
DEV = False

## Install packages

In [None]:
!pip install zkyhaxpy rasterio utm geopandas ipython-autotime gcsfs h2o

## Import libraries

In [None]:
## for all ##
from zkyhaxpy import io_tools, pd_tools, np_tools, console_tools, timer_tools, json_tools, dict_tools, colab_tools, gcp_tools
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import re

from sklearn.model_selection import train_test_split
import h2o
from h2o.automl import H2OAutoML

%load_ext autotime

In [None]:
colab_tools.mount_drive()
colab_tools.authen_gcp()

## Define paths

In [None]:
# path_df_pixval_nrt_rice_f = 'gs://unbdh2022-multiverseofdata-dev/training_data/df_pixval_nrt_rice_f.parquet'
path_df_pixval_nrt_rice_f = 'gs://unbdh2022-multiverseofdata-dev/training_data/df_pixval_nrt_rice_f.parquet'
folder_training_data = '/temp/training_data'

io_tools.create_folders(folder_training_data)

# Execute

In [None]:
h2o.init(
    nthreads=-1,     # number of threads when launching a new H2O server    
    max_mem_size=40  # in gigabytes
)

In [None]:
df_pixval_nrt_rice_f = pd.read_parquet(path_df_pixval_nrt_rice_f)
df_pixval_nrt_rice_f.rice_f = df_pixval_nrt_rice_f.rice_f.astype(int)

if DOWNSIZED == True:
    df_pixval_nrt_rice_f = df_pixval_nrt_rice_f.sample(frac=0.01).copy()
    
print(df_pixval_nrt_rice_f.shape)
if DEV == True:
    hdf = h2o.H2OFrame(df_pixval_nrt_rice_f.sample(1000))
else:
    hdf = h2o.H2OFrame(df_pixval_nrt_rice_f)

x = hdf.columns
y = "rice_f"
x.remove(y)

hdf[y] = hdf[y].asfactor()

hdf_train, hdf_test = hdf.split_frame(ratios=[0.8], seed = 0)

aml = H2OAutoML(max_models=10, balance_classes=True, nfolds=5, seed=0)
aml.train(x=x, y=y, training_frame=hdf_train)



lb = aml.leaderboard
lb.head(rows=lb.nrows)

best_model = aml.get_best_model()
print(best_model)
print('### Best model performance (test data) ###')
best_model.model_performance(hdf_test)


folder_models_mojo = '/temp/models/clf/mojo'
folder_models_basic = '/temp/models/clf/basic'
io_tools.create_folders(folder_models_mojo, folder_models_basic)
path_best_model_basic = h2o.save_model(model=best_model,path=folder_models_basic, force=True)
print(path_best_model_basic)

aml.leader.download_mojo(path = folder_models_mojo)
!gsutil cp -r /temp/models/clf gs://unbdh2022-multiverseofdata-dev/models

In [None]:
h2o.cluster().shutdown()

In [None]:
io_tools.create_folders('/temp')
!gsutil cp -r -n gs://unbdh2022-multiverseofdata-dev/models /temp
path_best_model_basic = '/temp/models/clf/basic/StackedEnsemble_AllModels_1_AutoML_2_20221109_161322'


h2o.init(
    nthreads=-1,     # number of threads when launching a new H2O server    
    max_mem_size=40  # in gigabytes
)
loaded_model = h2o.load_model(path=path_best_model_basic)

# Predict

In [None]:

for year in range(2001, 2023):
    for month in range(1, 13):

        path_df_pixval_nrt_rice_f_curr = f'gs://unbdh2022-multiverseofdata-dev/modis/ndvi_pixval_nrt_v2/df_pixval_nrt_{year}-{month:02d}.parquet'
        path_df_prediction = f'gs://unbdh2022-multiverseofdata-dev/prediction/rice_pixel_v2/df_prediction_{year}-{month:02d}.parquet'

        if not gcp_tools.check_file_exists_gcs(path_df_pixval_nrt_rice_f_curr):
            print(f'{path_df_pixval_nrt_rice_f_curr} not found. Continue...')
            continue
        elif gcp_tools.check_file_exists_gcs(path_df_prediction):
            print(f'{path_df_prediction} already exists skip')
            continue
        else:
            print(f'Predicting {year}-{month}...')    

        df_pixval_nrt_rice_f = pd.read_parquet(path_df_pixval_nrt_rice_f_curr)
        
        hdf = h2o.H2OFrame(df_pixval_nrt_rice_f)
        hdf_preds = loaded_model.predict(hdf)
        df_prediction = hdf_preds.as_data_frame()
        df_prediction.index = df_pixval_nrt_rice_f.index
        df_prediction.to_parquet(path_df_prediction)
        
