In [None]:
%load_ext autoreload
%autoreload 2

import psutil
import dask.distributed
import rioxarray
import numpy as np
import xarray as xr
from odc.stac import stac_load
from pystac_client import Client
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="angle from rectified to skew grid parameter lost in conversion to CF")

# Get the available memory in gigabytes (to later adapt chunks size)

available_memory = psutil.virtual_memory().available
available_memory_gb = available_memory / (1024 ** 3)

print(f"Available memory: {available_memory_gb:.2f} GB")

In [None]:
# Initiate Dask Env
client = dask.distributed.Client()
# display(client)

In [None]:
catalog = Client.open("https://explorer.swissdatacube.org/stac")

product = 'arealstatistik'
measurements = ['AS85_27','AS18_27', 'AS18_4']


longitude =  (7.05, 7.2) 
latitude =  (46.7, 46.85) 
crs = 'epsg:4326'

# time is not a valid dimension for the arealstatistik datasets - time is denoted only through the measurement name.

output_crs = 'epsg:2056'
# Let's look at the data at their native resolution of 100x100 m
resolution = -100.0, 100.0

# chunks = {"x": 2048, "y": 2048, "time": 1}  # 2048 values are OK with ~1Gb memory available

In [None]:
# # Mask function
# # See https://custom-scripts.sentinel-hub.com/custom-scripts/sentinel-2/scene-classification/
# # for valid_cats
# def create_scl_clean_mask(scl, valid_cats = [4, 5, 6, 7, 11]):
#     return xr.apply_ufunc(np.isin, scl, valid_cats, dask='allowed')

In [None]:
%%time

# search items in catalog
query = catalog.search(
    collections=[product],
    # datetime=f"{time[0]}/{time[1]}",
    limit=100 #,
    # bbox=(longitude[0], latitude[0],
    #       longitude[1], latitude[1])
)
items = list(query.items())

# load identified items
lazy_ds = stac_load(
    items,
    # lon=longitude,
    # lat=latitude,
    bands=measurements,
    crs=output_crs,
    resolution=resolution[1],
    # chunks=chunks,
)


In [None]:
%%time

# Perform calculation (you can open the link generated by
# "Initiate Dask Env" cell to monitor how your resources are used
ds_in = lazy_ds.load()

In [None]:
# With this horrible colour scheme we can quickly take a look at land cover in the 1985 period over 27 classes.
ds_in.AS85_27.plot(cmap='nipy_spectral', size=(10),add_colorbar=True)
plt.title("") 
plt.gca().set_aspect('equal')

In [None]:
# We can look at a single category like this:
ds_in.AS18_27.where(ds_in.AS18_27 == 27).plot(cmap='nipy_spectral', size=(10), vmin=0, vmax=27,add_colorbar=True)
plt.title("") 
plt.gca().set_aspect('equal')

In [None]:
# https://stackoverflow.com/questions/9707676/defining-a-discrete-colormap-for-imshow-in-matplotlib
from matplotlib import colors
cmap = colors.ListedColormap(['black', '#eff7e4', '#c3e3ae', '#d3f0fd'])
bounds=[1,2,3,4,5]
norm = colors.BoundaryNorm(bounds, cmap.N)

ds_in.AS18_4.plot(cmap=cmap, norm=norm, size=(10),add_colorbar=True)
plt.title("") 
plt.gca().set_aspect('equal')

<a name="shapefiles" > </a>

## Looking at canton Fribourg in the arealstatistik dataset

To do this we need to supply some extra information. In your sgg00425 directory there should be a folder `swissBOUNDARIES3D`, which contains shapefiles of the different administrative levels of Switzerland that we downloaded from the Federal Office of Topography for you. (https://www.swisstopo.admin.ch/en/geodata/landscape/boundaries3d.html)

In [None]:
# To work with the cantons data we need two additional modules
# We also need to be sure that rioxarray has been loaded before the datacube was opened...
# ...in the first cell of the notebook, make sure that "import rioxarray as rio" appears before "import datacube"
import geopandas as gpd
import shapely

In [None]:
# Open the Cantons shapefile
cantons = gpd.read_file('data/swissbounds/swissBOUNDARIES3D_1_4_TLM_KANTONSGEBIET.shp')

In [None]:
# Take a look at what data are provided with the file
cantons.head()

In [None]:
# Let's take a look at canton Fribourg
fribourg = cantons[cantons.NAME == 'Fribourg']

In [None]:
# How many rows do you expect to see here?
fribourg

In [None]:
# 2024-10-22 Obsolete. Geopandas and xarray now seem to handle the z-coordinate

# # The cantons come from the 'SwissBoundaries3D' dataset. 
# # As this name suggests, they contain not only X,Y data but also Z (elevation) information.
# # The DataCube cannot understand the Z information, so we need to use this function here to remove it.
# # Don't worry about the warning which appears!
# fribourg.geometry = shapely.force_2d(fribourg.geometry)

In [None]:
fribourg.geometry

In [None]:
# Let's plot just the canton of Fribourg
# Further information on this operation is here: https://corteva.github.io/rioxarray/stable/examples/clip_geom.html
ds_in.AS18_4.rio.clip(fribourg.geometry).plot()

In [None]:
# Now we can save just the canton's results for further analysis (for all measurements that we loaded)
# This results in a variable containing only the data in the area we just looked at, but for 
# each of the different surveys that we loaded.
stats_fribourg = ds_in.rio.clip(fribourg.geometry)

In [None]:
# How about a histogram to briefly summarise land cover in the period ending 2018?
# Remove the `0` category as this is the masked areas outside canton Fribourg
stats_fribourg.AS18_4.plot.hist(range=(1,4))

In [None]:
# If we want to take a more detailed look using Pandas, 
# then we can first use groupby to count the pixels in each category
# and then finally we save it to a Pandas Series.
stats_pd = stats_fribourg.AS18_4.groupby(stats_fribourg.AS18_4).count().to_pandas()
stats_pd

In [None]:
# Let's convert these to percentages.
# We need to get rid of the 0-class, which is just masked areas outside canton Fribourg.
stats_pd = stats_pd.loc[1:4]
percentages = 100 / stats_pd.sum() * stats_pd

# What we should find is that Fribourg is 55% agricultural land.
percentages

## Using Landsat and arealstatistik together

Here, you first need to use `ts1_data_preparation.ipynb` to get a datacube file containing both Landsat **and arealstatistik** data.

We have provided a small example data file which should be in the main folder and used with the cell below.

Then, we use xarray's `.where()` functionality to apply masks to the Landsat data based on the arealstatistik values.

In [None]:
nc_filename = "mydata.nc"

In [None]:
data = xr.open_dataset(nc_filename)

In [None]:
# First take a quick look at the land use for our area of interest
data.AS18_27.plot()

In [None]:
# Let's take a look at the regions of this area which are 'closed forest' and 'open forest'
data.AS18_27.where((data.AS18_27 == 19) | (data.AS18_27 == 20)).plot()

# Note: because the two landcover classes are consecutive (19 and 20) we could also do the selection like this:
# data.AS18_27.where((data.AS18_27 >= 19) & (data.AS18_27 <= 20)).plot()
# The logical operators are combined with
# " & " - AND
# " | " - OR

In [None]:
# Let's look at the value of NDVI in the forest areas.
# To do this we 'mask' the data using xarray's where() command
# Here we are specifying a range of values...
ndvi_in_forests = data.ndvi.where((data.AS18_27 >= 19) & (data.AS18_27 <= 20))

#... as another example, one could just look at a single category, for example rivers.
#rivers = data.ndvi.where(data.AS18_27 == 24)

In [None]:
# We can calculate a mean through time
# - and compare it to the average of the whole area
ndvi_in_forests.mean(dim=('x','y')).plot(label='Forests')
data.ndvi.mean(dim=('x','y')).plot(label='Whole area')
plt.legend()

In [None]:
# here the example with the rivers:
rivers = data.ndvi.where(data.AS18_27 == 24)
rivers.mean(dim=('x','y')).plot(label='Rivers')
data.ndvi.mean(dim=('x','y')).plot(label='Whole area')
plt.legend()

In [None]:
client.close()