# Summary

Using the combined habitat degradation rasters from Google Cloud, we create polygons of the most intact spaces.


# Import AOI and setup

In [None]:
import os
from google.cloud import storage
import rasterio
from rasterio.plot import show
import numpy as np
from scipy.ndimage import generic_filter
import matplotlib.pyplot as plt
import xarray as xr
import rioxarray
import dask_gateway

In [None]:
cluster = dask_gateway.GatewayCluster()
client = cluster.get_client()
cluster.scale(4)
print(cluster.dashboard_link)

In [None]:
cluster

In [None]:
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../keys/gc_key.json"

In [None]:
def download_blob(bucket_name, blob_name, local_dir):
    """Downloads a blob from the bucket to a specified directory."""
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    # Construct file path in the specified local directory
    local_file_path = os.path.join(local_dir, blob_name)

    # Create directories if they don't exist
    os.makedirs(os.path.dirname(local_file_path), exist_ok=True)

    # Download the file
    blob.download_to_filename(local_file_path)
    return local_file_path


In [None]:
bucket_name = 'nature-watch-bucket'
local_dir = '../data/nature-watch/'

image_16 = download_blob(bucket_name, 'COGS/all/2023/all2023_16.tif', local_dir)
image_17 = download_blob(bucket_name, 'COGS/all/2023/all2023_17.tif', local_dir)

In [None]:
# Load individual rasters
raster1 = rioxarray.open_rasterio(image_16, lock=False, chunks={'band': 1, 'x': 'auto', 'y': 'auto'})
raster2 = rioxarray.open_rasterio(image_17, lock=False, chunks={'band': 1, 'x': 'auto', 'y': 'auto'})

raster1.name = "value"
raster2.name = "value"

# Concatenate the rasters
combined_raster = xr.merge([raster1, raster2])

In [None]:
combined_raster['value'].plot()

In [None]:
import dask.array as da

def quart3_xarray(da, kernel_size=3):
    # Ensure that the function is compatible with Dask arrays
    # and does not force computation until necessary
    def quart3_dask(window):
        # Custom operation compatible with Dask
        return da.percentile(window, 75, interpolation='nearest')

    return xr.apply_ufunc(
        lambda x: da.map_overlap(quart3_dask, x, depth=kernel_size, boundary='none'),
        da,
        input_core_dims=[['y', 'x']],
        output_core_dims=[['y', 'x']],
        vectorize=True,
        dask='parallelized',
        output_dtypes=[da.dtype]
    )


In [None]:
%%time
filtered_raster = quart3_xarray(combined_raster['value'], kernel_size=5)
filtered_raster.compute()

In [None]:
plt.imshow(filtered_raster[0], cmap='gray')
plt.colorbar()
plt.show()

In [None]:
def test_file_access():
    try:
        return os.getcwd()
    except Exception as e:
        return str(e)

future = client.submit(test_file_access)
result = future.result()
print(result)


In [None]:
cluster.close()

In [None]:
import dask.array as da
import rioxarray
import gcsfs

# Initialize a GCS filesystem object
fs = gcsfs.GCSFileSystem(project='nature-watch', token="../keys/gc_key.json")

# Example: Constructing the URL for a file in GCS
gcs_path_16 = 'gs://nature-watch-bucket/COGS/all/2023/all2023_16.tif'

# Open the dataset directly from GCS
with fs.open(gcs_path) as f:
    raster1 = rioxarray.open_rasterio(f, chunks={'x': 'auto', 'y': 'auto'})
    
    
gcs_path_17 = 'gs://nature-watch-bucket/COGS/all/2023/all2023_17.tif'

# Open the dataset directly from GCS
with fs.open(gcs_path) as f:
    raster2 = rioxarray.open_rasterio(f, chunks={'x': 'auto', 'y': 'auto'})

raster1.name = "value"
raster2.name = "value"

# Concatenate the rasters
combined_raster = xr.merge([raster1, raster2])
# Now you can work with the raster as a Dask array
