# OpenMapFlow Tutorial

<img src="https://storage.googleapis.com/harvest-public-assets/openmapflow/3maps.gif" width="80%"/>

## 1. Clone Github repo and install OpenMapFlow

If you don't already have one, obtain a Github Personal Access Token using the steps [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). Save this token somewhere private.

In [None]:
from getpass import getpass
from pathlib import Path

github_url = input("Github HTTPS URL: ")
email = input("Github email: ")
username = input("Github username: ")

!git config --global user.email $username
!git config --global user.name $email

token = getpass('Github Personal Access Token:')

!git clone {github_url.replace("https://", f"https://{username}:{token}@")}

# Temporarily install from Github
!pip install git+https://ivanzvonkov:$token@github.com/nasaharvest/openmapflow.git -q
!pip install pyyaml==5.4.1 -q

In [None]:
# CLI
!openmapflow

## 2. Create or navigate to existing OpenMapFlow project

<img src="https://storage.googleapis.com/harvest-public-assets/openmapflow/title.png" width="70%"/>

In [None]:
%cd {Path(github_url).stem}
project_name = input("Project name: ")
cwd = Path.cwd()

if cwd.stem != project_name:
    if not (cwd / project_name).exists():
        print("Project root does not exist, creating...")
        Path(project_name).mkdir()
    else:
        print("Project root exists as subdirectory")
    %cd {project_name}
else:
    print("Github repo is project root")

if not (Path.cwd() / "openmapflow.yaml").exists():
    !openmapflow generate

## 3. Obtain labeled earth observation data

<img src="https://storage.googleapis.com/harvest-public-assets/openmapflow/step1.png" width="70%"/>

### 3a. Pull in already processed data

In [None]:
# Pull in data already available
!dvc pull -q
!tar -xzf $(openmapflow datapath COMPRESSED_FEATURES) -C data

In [None]:
# See report of data already available
!openmapflow datasets

### 3b. [OPTIONAL] Add new labeled earth observation data

<img src="https://storage.googleapis.com/harvest-public-assets/openmapflow/new_data.png" width="70%"/>

In [None]:
# Currently not implemented here, see: https://github.com/nasaharvest/openmapflow#adding-data-

### 3c. Explore labels

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from datasets import datasets
from openmapflow.constants import LAT, LON, DATASET, SUBSET

In [None]:
# Load labels as csv
df = pd.concat([d.load_labels() for d in datasets])
df.head()

In [None]:
# Plot map where labels should go
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.plot(facecolor="lightgray", figsize=(15, 15));

In [None]:
# Convert pandas dataframe to geopandas dataframe
gdf = gpd.GeoDataFrame(df)
gdf["geometry"] = [Point(xy) for xy in zip(gdf[LON], gdf[LAT])]


In [None]:
ax = world.plot(figsize=(20,20), facecolor="lightgray")
ax.set_title("Label Locations")
ax.axis('off')
gdf.plot(
    ax=ax, 
    marker='o', 
    categorical=True,
    markersize=1,
    column=DATASET,
    legend=True,
    legend_kwds={'loc': 'lower left'});

In [None]:
# TASK: Plot points by train, val, test subset

### 3d. Explore earth observation data

In [None]:
import matplotlib.pyplot as plt

from openmapflow.constants import FEATURE_PATH, CLASS_PROB, MONTHS
from openmapflow.features import load_feature

In [None]:
# Get a label with postive class
label = df[(df[CLASS_PROB] == 1.0) & (df[SUBSET] == "validation")].iloc[0]
label

In [None]:
label[FEATURE_PATH]

In [None]:
# Load earth observation data for label
feature_instance = load_feature(label[FEATURE_PATH])
earth_observation_data = feature_instance.labelled_array
earth_observation_data.shape

**Available earth observation bands**

<img src="https://storage.googleapis.com/harvest-public-assets/openmapflow/cropharvest_bands.png" width="80%"/>

In [None]:
fig, ax = plt.subplots(1,1, figsize=(10,5))
ax.plot(MONTHS, earth_observation_data[:12, -1]);
ax.set_title("NDVI")
plt.xticks(rotation=45);

In [None]:
# TASK: Plot NDVI for non-crop example from the validation set

## 4. Train model

<img src="https://storage.googleapis.com/harvest-public-assets/openmapflow/step2.png" width="80%"/>

<img src="https://storage.googleapis.com/harvest-public-assets/openmapflow/train_model.png" width="80%" />

In [None]:
!pip install tsai -q

In [None]:
import os

In [None]:
!python train.py

In [None]:
os.environ["MODEL_NAME"] = input("MODEL_NAME=")

In [None]:
!python evaluate.py --model_name $MODEL_NAME

## 5. Visualize results

In [None]:
!pip install cmocean -q

In [None]:
from openmapflow.train_utils import model_path_from_name
from openmapflow.config import PROJECT
from cropharvest.inference import Inference
from cropharvest.bands import DYNAMIC_BANDS
from tqdm.notebook import tqdm
from pathlib import Path
from datetime import date
import cmocean
import numpy as np
import rasterio as rio
import torch
import tempfile

In [None]:
tifs_dir = Path(f"{tempfile.tempdir}/tifs")
preds_dir = Path(f"{tempfile.tempdir}/preds")
tifs_dir.mkdir(exist_ok=True)
preds_dir.mkdir(exist_ok=True)

def merge_tifs(full_prefix):
  vrt_in_file = f"{full_prefix}*"
  vrt_out_file = f"{full_prefix}.vrt"
  merged_file = f"{full_prefix}.tif"
  !gdalbuildvrt {vrt_out_file} {vrt_in_file}
  !gdal_translate -a_srs EPSG:4326 -of GTiff {vrt_out_file} {merged_file}
  return merged_file

### 5a. Download example inference data

In [None]:
paths = [
  "gs://harvest-public-assets/openmapflow/Togo_2019_demo_2019-02-01_2020-02-01/00000000000-0000000000.tif",
  "gs://harvest-public-assets/openmapflow/Togo_2019_demo_2019-02-01_2020-02-01/00000000000-0000000256.tif",
  "gs://harvest-public-assets/openmapflow/Togo_2019_demo_2019-02-01_2020-02-01/00000000256-0000000000.tif",
  "gs://harvest-public-assets/openmapflow/Togo_2019_demo_2019-02-01_2020-02-01/00000000256-0000000256.tif"         
]

for p in tqdm(paths):
  !gsutil -m cp {p} {tifs_dir}/{Path(p).name}

In [None]:
merged_eo_file = merge_tifs(full_prefix=f"{tifs_dir}/")

In [None]:
def normalize(array):
    array_min, array_max = array.min(), array.max()*0.6
    return ((array - array_min)/(array_max - array_min))

month = 2
rgb_indexes = [DYNAMIC_BANDS.index(b) for b in ["B4", "B3", "B2"]]
eo_data = rio.open(merged_eo_file)
colors = [eo_data.read(i + month*len(DYNAMIC_BANDS)) for i in rgb_indexes]
normalized_colors = [normalize(c) for c in colors]
rgb = np.dstack(normalized_colors)
plt.figure(figsize=(10,10))
plt.title("Earth Observation data for one month")
plt.axis('off')
plt.imshow(rgb);

### 5b. Make predictions with model

In [None]:
model = torch.jit.load(model_path_from_name(os.environ["MODEL_NAME"]))
inference = Inference(model=model, normalizing_dict=None)
local_pred_paths = []
tifs = list(Path(tifs_dir).glob("*.tif"))
for local_tif_path in tqdm(tifs, desc="Making predictions"):
  local_pred_path = Path(f"{preds_dir}/pred_{local_tif_path.stem}.nc")
  inference.run(
      local_path=local_tif_path, 
      start_date=date(2019, 2, 1), 
      dest_path=local_pred_path
  )
  local_pred_paths.append(local_pred_path)

### 5c. Merge predictions into map

<img src="https://storage.googleapis.com/harvest-public-assets/openmapflow/merging_predictions.png" width="50%"/>

In [None]:
merged_pred_file = merge_tifs(full_prefix=f"{preds_dir}/")

### 5d. Visualize predictions

In [None]:
# Visualize
predictions_map = rio.open(merged_pred_file)
if "maize" in PROJECT:
  cmap = cmocean.cm.solar
elif "crop" in PROJECT:
  cmap = cmocean.cm.speed
else:
  cmap = cmocean.cm.thermal

plt.figure(figsize=(10,10))
plt.imshow(predictions_map.read(1).clip(0,1), cmap=cmap)
plt.title(f"Map Preview: {PROJECT}")
plt.colorbar(fraction=0.03, pad=0.04)
plt.axis("off");

## 6. Push to dvc and git

In [None]:
!dvc commit -q 
!dvc push -q

In [None]:
!git checkout -b"$MODEL_NAME"
!git add .
!git commit -m "$MODEL_NAME"
!git push --set-upstream origin "$MODEL_NAME"