# CropHarvest Demo

In [1]:
from cropharvest.datasets import CropHarvestLabels, CropHarvest

In [2]:
labels = CropHarvestLabels("data")

In [3]:
labels[0]

index                                                                   0
is_crop                                                                 1
lat                                                               10.7126
lon                                                               37.0831
dataset                                                          ethiopia
collection_date                                       2020-10-22T00:00:00
export_end_date                                       2021-02-01T00:00:00
harvest_date                                                         None
planting_date                                                        None
label                                                                None
classification_label                                                 None
is_test                                                             False
geometry                POLYGON ((37.0825205254332 10.712735028397, 37...
Name: 0, dtype: object

In [4]:
evaluation_datasets = CropHarvest.create_benchmark_datasets(labels)

In [5]:
evaluation_datasets

{'Kenya_maize': <cropharvest.datasets.CropHarvest at 0x7fca6f2f1e80>,
 'Brazil_coffee': <cropharvest.datasets.CropHarvest at 0x7fca6f2f1e10>,
 'United States of America_almond': <cropharvest.datasets.CropHarvest at 0x7fca6f2f7080>,
 'togo-eval': <cropharvest.datasets.CropHarvest at 0x7fca6f2f7860>}

In [6]:
# train a Random Forest model on the Togo dataset

In [7]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

In [8]:
togo_dataset = evaluation_datasets["togo-eval"]

In [9]:
X, Y = [], []
for i in range(len(togo_dataset)):
    x, y = togo_dataset[i]
    X.append(x)
    Y.append(y)
X_np, Y_np = np.stack(X), np.stack(Y)

In [10]:
X_np.shape, Y_np.shape

((1290, 12, 18), (1290,))

In [11]:
def flatten(array): return array.reshape(array.shape[0], -1)

In [12]:
model = RandomForestClassifier()
model.fit(flatten(X_np), Y_np)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [14]:
test_preds, test_y = [], []
for _, test_instance in togo_dataset.test_data():
    
    test_preds.append(model.predict_proba(flatten(test_instance.x))[:, 1])
    test_y.append(test_instance.y)

In [15]:
print(
    f"For the Random Forest classifier, "
    f"AUC ROC score: {roc_auc_score(test_y[0], test_preds[0])}, "
)

For the Random Forest classifier, AUC ROC score: 0.8884433962264151, 
