<a href="https://colab.research.google.com/github/nghianguyen7171/FacialAnalysis-GUI/blob/main/CropHarvest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

---
```
Author: Van-Thong Huynh
Affiliation: Dept. of AI Convergence, Chonnam Nat'l Univ.
```

---

In [None]:
!pip install -qq cropharvest tensorflow-addons

In [None]:
import numpy as np
from cropharvest.datasets import CropHarvest
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.losses import BinaryCrossentropy, BinaryFocalCrossentropy
from tensorflow_addons.metrics import F1Score
from keras.models import clone_model
from keras.callbacks import ModelCheckpoint
from keras import utils
import pathlib
from sklearn.model_selection import StratifiedKFold
import random, os
from sklearn.feature_selection import SelectKBest

def set_seed(seed=1):
    # utils.set_random_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    tf.config.experimental.enable_op_determinism()

set_seed(101)
DATA_DIR = './'

In [None]:
def load_data(data_dir, kenya_only=True, flatten_x=False, return_test=False):
    evaluation_datasets = CropHarvest.create_benchmark_datasets(data_dir)
    if return_test:
        return evaluation_datasets[0].test_data(flatten_x=flatten_x)

    if kenya_only:
        kenya_dataset = evaluation_datasets[0]
        x_train, y_train = kenya_dataset.as_array(flatten_x=flatten_x)
    else:
        x_train = []
        y_train = []
        for idx in range(3):
            cur_x_train, cur_y_train = evaluation_datasets[idx].as_array(flatten_x=flatten_x)
            x_train.append(cur_x_train)
            y_train.append(cur_y_train)

    return x_train, y_train

def generate_test_prediction(test_model, save_path='./'):
    test_data = load_data(DATA_DIR, kenya_only=True, flatten_x=False, return_test=True)
    for test_id, test_instance in test_data:
        os.makedirs(os.path.join(save_path, 'prediction'), exist_ok=True)
        results_nc = os.path.join(save_path, 'prediction/{}.nc'.format(test_id))

        x = test_instance.x
        preds = test_model.predict(x)

        ds = test_instance.to_xarray(preds.flatten())
        ds.to_netcdf(results_nc)

## Define models and model utils

In [None]:
def set_gpu_growth():
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        # Restrict TensorFlow to only use the first GPU
        try:
            tf.config.set_visible_devices(gpus[0], 'GPU')
            tf.config.experimental.set_memory_growth(gpus[0], True)
            logical_gpus = tf.config.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
        except RuntimeError as e:
            # Visible devices must be set before GPUs have been initialized
            print(e)

def create_model(seq_len=12, input_dim=18, num_hid=128, num_layers=2, dropout=0.2, kernel_initializer='he_normal', num_classes=1, output_bias=None):
    model = keras.Sequential()
    model.add(layers.InputLayer(input_shape=(seq_len, input_dim)))
    
    for idx in range(num_layers):
        return_seq = True if idx < num_layers-1 else False
        model.add(layers.LSTM(units=num_hid, return_sequences=return_seq, dropout=dropout))

    model.add(layers.Dense(num_hid*2, activation='relu', kernel_initializer=kernel_initializer))
    model.add(layers.Dropout(dropout))
    # model.add(layers.GlobalAveragePooling1D())
    model.add(layers.Dense(num_classes, activation='linear', kernel_initializer=kernel_initializer, bias_initializer=output_bias))
    model.build()
    return model

def build_model(input_dim=18, optim='rmsprop', lr=0.001, num_classes=1, model=None, build=True, output_bias=None):
    if model is None:
        model = create_model(seq_len=12, input_dim=input_dim, num_hid=128, num_layers=2, dropout=0., num_classes=1, output_bias=output_bias)

    if not build:
        return model

    if optim == 'rmsprop':
        print('Use RMSprop optimizer')
        opt = keras.optimizers.RMSprop(learning_rate=lr)
    else:
        print('Use Adam optimizer')
        opt = keras.optimizers.Adam(learning_rate=lr)

    loss_fn = BinaryCrossentropy(from_logits=True)
    metric_fn = F1Score(num_classes=num_classes, name='f1_score', threshold=0.5, average='macro')
    model.compile(loss=loss_fn, optimizer=opt, metrics=metric_fn)
    return model

def run_kfold(X, y, input_dim=18, num_fold=5, bsz=32, n_epochs=30):
    skf = StratifiedKFold(n_splits=num_fold, shuffle=True)
    
    kfold_models = []
    kfold_history = []
    kfold_scores = []
    outputs = []
    optim_ = 'rmsprop'
    
    # Create model and save inited weights
    base_model = build_model(input_dim=input_dim, optim=optim_, lr=0.001, num_classes=1, build=False, )
    init_weights = base_model.get_weights()  # Save inited weights

    fold_idx = 1
    for train_index, test_index in skf.split(X, y):
        print('Running on fold {}'.format(fold_idx))
        X_train, X_val = X[train_index], X[test_index]
        y_train, y_val = y[train_index].reshape(-1, 1), y[test_index].reshape(-1, 1)

        num_pos = np.count_nonzero(y_train.flatten())
        num_neg = len(y_train) - num_pos
        class_weight = {0: (1 / num_neg) * (len(y_train) / 2.0), 1: (1 / num_pos) * (len(y_train) / 2.0)}

        # Reset model weight and compile
        cur_model = build_model(input_dim=input_dim, optim=optim_, lr=0.001, num_classes=1,)
        cur_model.set_weights(init_weights)

        best_ckpt_path = f'./ckpts/fold_{fold_idx}/checkpoint'
        os.makedirs(f'./ckpts/fold_{fold_idx}', exist_ok=True)

        ckpt_callbacks = ModelCheckpoint(best_ckpt_path, save_best_only=True, save_weights_only=True, monitor='val_f1_score', mode='max')
        # Fitting
        his = cur_model.fit(X_train, y_train, batch_size=bsz, epochs=n_epochs, validation_data=(X_val, y_val), verbose=0, callbacks=[ckpt_callbacks],
                            class_weight=class_weight)
        kfold_history.append(his)
        # Evaluating
        cur_model.load_weights(best_ckpt_path)
        fold_eval = cur_model.evaluate(X_val, y_val)
        kfold_scores.append(fold_eval[-1])

        outputs.append([y_val, cur_model.predict(X_val)])
        kfold_models.append(best_ckpt_path)
        fold_idx += 1

    print('Mean F1: {}. Std F1: {}. Best: {}'.format(np.mean(kfold_scores), np.std(kfold_scores), np.max(kfold_scores)))
    return kfold_history, outputs

## Get data

In [None]:
x_train, y_train = load_data(DATA_DIR, kenya_only=True, flatten_x=False)

In [None]:
from sklearn.feature_selection import RFE
from sklearn.svm import SVC

x_train_mean = np.mean(x_train, axis=1)
estimator = SVC(kernel="linear")
selector = RFE(estimator, n_features_to_select=8, step=16)
selector = selector.fit(x_train_mean, y_train)
sel_idx = [int(x[1:]) for x in selector.get_feature_names_out()]
print(sel_idx)

[2, 3, 4, 5, 6, 8, 10, 17]


## Running

In [None]:
run_history, fold_outputs = run_kfold(x_train[:, :, :], y_train, input_dim=18, num_fold=5, bsz=32, n_epochs=30)

Running on fold 1
Use RMSprop optimizer
Running on fold 2
Use RMSprop optimizer
Running on fold 3
Use RMSprop optimizer
Running on fold 4
Use RMSprop optimizer
Running on fold 5
Use RMSprop optimizer
Mean F1: 0.7614126205444336. Std F1: 0.03170246968032412. Best: 0.7966101765632629


In [None]:
np.min(x_train_mean)

-3.9081658585624357

In [None]:
# f_classif [2, 3, 4, 5, 12, 14, 16, 17]


In [None]:
# Running on fold 1
# Use Adam optimizer
# Class weight:  {0: 0.6235224586288416, 1: 2.5239234449760763}
# 9/9 [==============================] - 0s 4ms/step - loss: 0.3345 - f1_score: 0.7231
# 42/42 [==============================] - 1s 4ms/step - loss: 0.2613 - f1_score: 0.7844
# Running on fold 2
# Use Adam optimizer
# Class weight:  {0: 0.6235224586288416, 1: 2.5239234449760763}
# 9/9 [==============================] - 0s 4ms/step - loss: 0.3535 - f1_score: 0.7480
# 42/42 [==============================] - 1s 4ms/step - loss: 0.2400 - f1_score: 0.8051
# Running on fold 3
# Use Adam optimizer
# Class weight:  {0: 0.6242603550295858, 1: 2.5119047619047623}
# 9/9 [==============================] - 0s 5ms/step - loss: 0.3292 - f1_score: 0.7556
# 42/42 [==============================] - 1s 4ms/step - loss: 0.2780 - f1_score: 0.7736
# Running on fold 4
# Use Adam optimizer
# Class weight:  {0: 0.6242603550295858, 1: 2.5119047619047623}
# 9/9 [==============================] - 0s 4ms/step - loss: 0.2649 - f1_score: 0.7840
# 42/42 [==============================] - 1s 5ms/step - loss: 0.2471 - f1_score: 0.7791
# Running on fold 5
# Use Adam optimizer
# Class weight:  {0: 0.624113475177305, 1: 2.5142857142857147}
# 9/9 [==============================] - 0s 4ms/step - loss: 0.3946 - f1_score: 0.7344
# 42/42 [==============================] - 1s 4ms/step - loss: 0.2482 - f1_score: 0.8006