## **Environment setup.**

In [None]:
!pip3 install tqdm torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip3 install GPUtil netCDF4 geopandas geocube dask_ml zarr
!pip3 install cuml-cu11 --extra-index-url=https://pypi.nvidia.com

Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting netCDF4
  Downloading netCDF4-1.6.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
Collecting geocube
  Downloading geocube-0.4.2-py3-none-any.whl (21 kB)
Collecting dask_ml
  Downloading dask_ml-2023.3.24-py3-none-any.whl (148 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m148.7/148.7 kB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting zarr
  Downloading zarr-2.16.0-py3-none-any.whl (206 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m206.5/206.5 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cftime (from netCDF4)
  Downloading cftime-1.6.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.

## **Pull code from GitHub.**

In [None]:
!rm -Rf /content/learnergy/
!rm -Rf /content/SIT_FUSE
!git clone https://github.com/nlahaye/learnergy.git
!git clone https://github.com/nlahaye/SIT_FUSE.git

Cloning into 'learnergy'...
remote: Enumerating objects: 3549, done.[K
remote: Counting objects: 100% (966/966), done.[K
remote: Compressing objects: 100% (374/374), done.[K
remote: Total 3549 (delta 626), reused 888 (delta 585), pack-reused 2583[K
Receiving objects: 100% (3549/3549), 811.03 KiB | 5.83 MiB/s, done.
Resolving deltas: 100% (2275/2275), done.
Cloning into 'SIT_FUSE'...
remote: Enumerating objects: 1169, done.[K
remote: Counting objects: 100% (101/101), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 1169 (delta 55), reused 73 (delta 37), pack-reused 1068[K
Receiving objects: 100% (1169/1169), 399.40 KiB | 10.79 MiB/s, done.
Resolving deltas: 100% (793/793), done.


## **Mount GDrive - TODO change out for VRT access**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## **Run Initial Feature Extraction & Top Level Clustering**

In [None]:
%env PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
!cd SIT_FUSE; torchrun --nnodes=1 --nproc_per_node=1 dbn_learnergy.py --yaml /content/SIT_FUSE/config/dbn/palm_oil_dbn_colab.yaml

env: PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
HERE CUDA_VISIBLE_DEVICES 0
/content/drive/MyDrive/oil_palm_data_science/ucayali_condensed_stacks_2020_h1v1.tiff
(4, 5000, 5000)
STATS -8.9276705 30.682579 -0.00019300147 1.0001402
2023-07-27 07:44:57,256 - learnergy.models.deep.dbn — INFO — Overriding class: Model -> DBN.
2023-07-27 07:45:05,986 - learnergy.core.model — DEBUG — Device: cuda.
2023-07-27 07:45:05,986 - learnergy.models.gaussian.gaussian_rbm — INFO — Overriding class: GaussianRBM -> GaussianSeluRBM.
2023-07-27 07:45:05,986 - learnergy.models.gaussian.gaussian_rbm — INFO — Overriding class: RBM -> GaussianRBM.
2023-07-27 07:45:05,986 - learnergy.models.bernoulli.rbm — INFO — Overriding class: Model -> RBM.
2023-07-27 07:45:05,987 - learnergy.core.model — DEBUG — Device: cuda.
2023-07-27 07:45:07,152 - learnergy.models.bernoulli.rbm — INFO — Class overrided.
2023-07-27 07:45:07,152 - learnergy.models.bernoulli.rbm — DEBUG — Size: (36, 2000) | Learning: CD-1

## **Discretize Model Output - Assign Top Level Labels/Clusters**

In [None]:
%env PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
!cd SIT_FUSE; python3 discretize_clusters.py --yaml /content/SIT_FUSE/config/cluster/discretize_palm_oil_colab.yaml

env: PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
/content/drive/MyDrive/oil_palm_data_science/ML_Test/ucayali_condensed_stacks_2020_h1v1.tiff.clustoutput_test.data
(150,) UNIQUE LABELS
ASSIGNING LABELS 0 150
(5000, 5000) (24985000,) (24985000, 3)
FINISHED WITH LABEL ASSIGNMENT
FINAL DATA TO DASK
HERE CLUSTERS MIN MAX MEAN STD -1.0 149.0 100.34557424 35.56576003243361
102.4042574619998


## **Visualize Initial Results**

In [None]:
# import image module
from IPython.display import Image

# get the image
Image(url="/content/drive/MyDrive/oil_palm_data_science/ML_Test/ucayali_condensed_stacks_2020_h1v1.tiff.clustoutput_test.data_150clusters.png", width=300, height=300)

## **Create Heirarchical Tree/Dendrogram of Labels - Assign More Precise Labels**

In [None]:
%env PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
!cd SIT_FUSE; torchrun --nnodes=1 --nproc_per_node=1 dbn_learnergy_heirarchical.py --yaml /content/SIT_FUSE/config/dbn/palm_oil_dbn_colab.yaml

env: PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
/content/drive/MyDrive/oil_palm_data_science/ucayali_condensed_stacks_2020_h1v1.tiff
(4, 5000, 5000)
STATS -8.9276705 30.682579 -0.00019300207 1.0001425
2023-07-27 08:23:53,562 - learnergy.models.deep.dbn — INFO — Overriding class: Model -> DBN.
2023-07-27 08:24:01,344 - learnergy.core.model — DEBUG — Device: cuda.
2023-07-27 08:24:01,344 - learnergy.models.gaussian.gaussian_rbm — INFO — Overriding class: GaussianRBM -> GaussianSeluRBM.
2023-07-27 08:24:01,344 - learnergy.models.gaussian.gaussian_rbm — INFO — Overriding class: RBM -> GaussianRBM.
2023-07-27 08:24:01,344 - learnergy.models.bernoulli.rbm — INFO — Overriding class: Model -> RBM.
2023-07-27 08:24:01,345 - learnergy.core.model — DEBUG — Device: cuda.
2023-07-27 08:24:04,870 - learnergy.models.bernoulli.rbm — INFO — Class overrided.
2023-07-27 08:24:04,871 - learnergy.models.bernoulli.rbm — DEBUG — Size: (36, 2000) | Learning: CD-10 | Hyperparameters: lr = 0.

## **GeoTiff/Intermediate Product Generation**

In [None]:
%env PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
!cd SIT_FUSE/postprocessing/; python3 generate_cluster_geotiffs.py --yaml ../config/postprocess/palm_oil_geotiff_gen_colab.yaml

env: PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
Traceback (most recent call last):
  File "/content/SIT_FUSE/postprocessing/generate_cluster_geotiffs.py", line 484, in <module>
    main(args.yaml)
  File "/content/SIT_FUSE/postprocessing/generate_cluster_geotiffs.py", line 470, in main
    generate_cluster_gtiffs(data_reader = reader, data_reader_kwargs = data_reader_kwargs, subset_inds = subset_inds,
  File "/content/SIT_FUSE/postprocessing/generate_cluster_geotiffs.py", line 136, in generate_cluster_gtiffs
    dbnDat1 = read_func(cluster_data[p], **data_reader_kwargs).astype(np.int32)
  File "/content/SIT_FUSE/utils.py", line 66, in numpy_from_zarr
    return np.array(zarr_load(filename).compute())
  File "/content/SIT_FUSE/utils.py", line 63, in zarr_load
    return da.from_zarr(filename)
  File "/usr/local/lib/python3.10/dist-packages/dask/array/core.py", line 3593, in from_zarr
    z = zarr.Array(store, read_only=True, path=component, **kwargs)
  File "/usr/loca

## **Generate Final Product**

In [None]:
%env PYTHONPATH=$PYTHONPATH:/content/learnergy/:/content/SIT_FUSE/
!cd SIT_FUSE/postprocessing/; python3 contour_and_fill.py --yaml ../config/postprocess/palm_oil_contour_colab.yaml