# Initialise the FloodMapper GCP bucket

Create the FloodMapper bucket structure and import the default model.

In [1]:
# Necessary imports
import os
os.environ['USE_PYGEOS'] = '0'
import fsspec
from dotenv import load_dotenv

from ml4floods.data import utils

## Load environment and project details

As with the other notebooks, we load credentials and project details from a hidden ```.env``` file.

In [2]:
# Load environment variables (including path to credentials) from '.env' file
#env_file_path = "../../.env"
env_file_path = "../../.credentials"

assert load_dotenv(dotenv_path=env_file_path) == True, "[ERR] Failed to load environment!"
assert "GOOGLE_APPLICATION_CREDENTIALS" in os.environ, "[ERR] Missing $GOOGLE_APPLICATION_CREDENTIAL!"
assert "GS_USER_PROJECT" in os.environ, "[ERR] Missing $GS_USER_PROJECT!"
key_file_path = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
assert os.path.exists(key_file_path), f"[ERR] Google credential key file does not exist: \n{key_file_path} "
assert "ML4FLOODS_BASE_DIR" in os.environ, "[ERR] Missing $ML4FLOODS_BASE_DIR!"
base_path = os.environ["ML4FLOODS_BASE_DIR"]
assert os.path.exists(base_path), f"[ERR] Base path does not exist: \n{base_path} "
print("[INFO] Successfully loaded FloodMapper environment.")

[INFO] Successfully loaded FloodMapper environment.


## Create 'folder' structures on the bucket

Technically, the GCP bucket does not contain folders - it is a flat structure that contains only files. However, each filename includes the path as a way to emulate folders. In practise this means that folders spring into existence when files that include their path names are created.

Here we will just write some temporary files as a check that we can access the bucket correctly.

In [3]:
# Bucket Name
#bucket_name = "gs://ml4floods_nema"
bucket_name = "gs://floodmapper-test"

In [4]:
# Required folders in the bucket
dir_lst = ["0_DEV/1_Staging/GRID",
           "0_DEV/1_Staging/operational",
           "0_DEV/2_Mart/2_MLModelMart"]

In [5]:
# Loop through the directories and upload a temp file
fs = utils.get_filesystem(bucket_name)
for directory in dir_lst:
    tmp_path = os.path.join(bucket_name, directory, "tmp.txt")
    print(f"Creating ''{tmp_path}''")
    with fs.open(tmp_path, "w") as fh:
        fh.write("This is a placeholder.\n")
    print("\tDoes file exist? ->", fs.exists(tmp_path))

Creating ''gs://floodmapper-test/0_DEV/1_Staging/GRID/tmp.txt''
	Does file exist? -> True
Creating ''gs://floodmapper-test/0_DEV/1_Staging/operational/tmp.txt''
	Does file exist? -> True
Creating ''gs://floodmapper-test/0_DEV/2_Mart/2_MLModelMart/tmp.txt''
	Does file exist? -> True


## Upload the model 

The trained model is distributed across several files under the 'WF2_unet_rbgiswirs' directory. 

In [6]:
# Assemble the source and destination paths
model_name = "WF2_unet_rbgiswirs"
model_path_gcp = os.path.join(bucket_name, "0_DEV/2_Mart/2_MLModelMart/", model_name)
model_path_local = os.path.join(base_path, "resources/models", model_name)
file_lst = [["config.json", ""],
            ["model.pt", "b"],
            ["test.json", ""],
            ["val.json", ""],
            ["checkpoint/epoch=6-step=14077.ckpt", "b"],
            ["checkpoint/epoch=9-step=23800.ckpt", "b"]]

# Copy each file in turn
for file_path, mode in file_lst:
    full_gs_path = os.path.join(model_path_gcp, file_path)
    full_local_path = os.path.join(model_path_local, file_path)
    print(f"Copying {file_path} ... ", end="")
    with open(full_local_path, "r" + mode) as f1:
        content = f1.read()
        with fs.open(full_gs_path, "w" + mode) as f2:
            f2.write(content)
    print("done.")

Copying config.json ... done.
Copying model.pt ... done.
Copying test.json ... done.
Copying val.json ... done.
Copying checkpoint/epoch=6-step=14077.ckpt ... done.
Copying checkpoint/epoch=9-step=23800.ckpt ... done.


You can check if the bucket contains the correct files at the [GCP Console Storage](https://console.cloud.google.com/storage) page.