# Initialise the FloodMapper GCP bucket

Create the FloodMapper bucket structure and import the default model.

In [None]:
# Necessary imports
import os
os.environ['USE_PYGEOS'] = '0'
import fsspec
from dotenv import load_dotenv

from ml4floods.data import utils

# Set bucket will not be requester pays
utils.REQUESTER_PAYS_DEFAULT = False

## Load environment and project details

As with the other notebooks, we load credentials and project details from a hidden ```.env``` file.

In [None]:
# Load environment variables (including path to credentials) from '.env' file
env_file_path = "../.env"

# Uncomment for alternative version for Windows (r"" indicates raw string)
#env_file_path = r"C:/Users/User/floodmapper/.env"

assert load_dotenv(dotenv_path=env_file_path) == True, "[ERR] Failed to load environment!"
assert "GOOGLE_APPLICATION_CREDENTIALS" in os.environ, "[ERR] Missing $GOOGLE_APPLICATION_CREDENTIAL!"
assert "GS_USER_PROJECT" in os.environ, "[ERR] Missing $GS_USER_PROJECT!"
key_file_path = os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
assert os.path.exists(key_file_path), f"[ERR] Google credential key file does not exist: \n{key_file_path} "
assert "ML4FLOODS_BASE_DIR" in os.environ, "[ERR] Missing $ML4FLOODS_BASE_DIR!"
base_path = os.environ["ML4FLOODS_BASE_DIR"]
assert os.path.exists(base_path), f"[ERR] Base path does not exist: \n{base_path} "
bucket_name = os.environ["BUCKET_URI"]
assert bucket_name is not None and bucket_name != "", f"Bucket name not defined {bucket_name}"
print("[INFO] Successfully loaded FloodMapper environment.")

## Create 'folder' structures on the bucket

Technically, the GCP bucket does not contain folders - it is a flat structure that contains only files. However, each filename includes the path as a way to emulate folders. In practise this means that folders spring into existence when files that include their path names are created.

Here we will just write some temporary files as a check that we can access the bucket correctly.

In [None]:
# Required folders in the bucket
dir_lst = ["0_DEV/1_Staging/GRID",
           "0_DEV/1_Staging/operational",
           "0_DEV/2_Mart/2_MLModelMart"]

In [None]:
# Loop through the directories and upload a temp file
fs = utils.get_filesystem(bucket_name)
for directory in dir_lst:
    tmp_path = os.path.join(bucket_name, directory, "tmp.txt").replace("\\","/")
    print(f"Creating ''{tmp_path}''")
    with fs.open(tmp_path, "w") as fh:
        fh.write("This is a placeholder.\n")
    print("\tDoes file exist? ->", fs.exists(tmp_path))

The block above should report 'True' that the three files now exist on the bucket. If an error is reported, it is likely due to a problem with credentials. Check in the ```.env``` file that:
 * The path to the JSON key file is correct.
 * The correct key file is being used (inspect the key file for the correct project name).
 * The project name is correct.

## Upload the model 

The trained model is distributed across several files under the 'WF2_unet_rbgiswirs' directory. The next block of code uploads these files to the GCP bucket.

In [None]:
# Assemble the source and destination paths
model_name = "WF2_unet_rbgiswirs"
model_path_gcp = os.path.join(bucket_name, "0_DEV/2_Mart/2_MLModelMart/", model_name).replace("\\","/")
model_path_local = os.path.join(base_path, "resources/models", model_name).replace("\\","/")
file_lst = [["config.json", ""],
            ["model.pt", "b"],
            ["test.json", ""],
            ["val.json", ""],
            ["checkpoint/epoch=6-step=14077.ckpt", "b"],
            ["checkpoint/epoch=9-step=23800.ckpt", "b"]]

# Copy each file in turn
for file_path, mode in file_lst:
    full_gs_path = os.path.join(model_path_gcp, file_path).replace("\\","/")
    full_local_path = os.path.join(model_path_local, file_path).replace("\\","/")
    print(f"Copying {file_path} ... ", end="")
    with open(full_local_path, "r" + mode) as f1:
        content = f1.read()
        with fs.open(full_gs_path, "w" + mode) as f2:
            f2.write(content)
    print("done.")

You can check if the bucket contains the correct files at the [GCP Console Storage](https://console.cloud.google.com/storage) page.