In [11]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt 

# Extracting necessary SINMOD data

Extracting more than a few time steps will take a while and potentially cause crashing. Then we will need to run on IDUN. Can set up via VS code ssh.

We will now:
- Look at a few data sets to check the numbers make sense
- Standardise and normalise the data

In [105]:
import netCDF4 as nc
from netCDF4 import Dataset

# Importing SINMOD data
filename_PhysStates = 'PhysStates.nc'

PhysStates_data = Dataset(filename_PhysStates, 'r')

# Looking at our list of available variables
variables_list = list(PhysStates_data.variables.keys())
for variable in variables_list:
    units = PhysStates_data.variables[variable].units if 'units' in PhysStates_data.variables[variable].ncattrs() else 'No units'
    print(f"{variable:<20} {units}")

time                 No units
grid_mapping         No units
LayerDepths          m
xc                   meter
yc                   meter
zc                   m
depth                m
DXxDYy               m2
u_velocity           m/s
v_velocity           m/s
elevation            m
temperature          degC
salinity             psu
ice_thickness        m
ice_compactness      -
salinity_ice         psu


In [103]:
# Extracting all the map dimensions, we can extract both ways, I don't think it matters
xc = PhysStates_data.variables['xc'][:]  # x-coordinates (meters)
yc = PhysStates_data.variables['yc'][:]  # y-coordinates (meters)
zc = PhysStates_data['LayerDepths'][:]  # z-coordinates (meters)

# Now checking temperature data matches what we expect from the dimensions
# Extracting temperature at t = 0, and check the dimensions
temperature_var = PhysStates_data.variables['temperature']
temperature = temperature_var[0,:,:,:]
print("SINMOD grid dimensions:")
print(f"xc shape: {xc.shape}")
print(f"yc shape: {yc.shape}")
print(f"zc shape: {zc.shape}")

print(f"\nTemperature data shape: {temperature.shape}")
# Now checking mean, max and min temperature to see if it makes sense
print(f"Mean temperature:\t{temperature.mean():.2f}")
print(f"Max temperature:\t{temperature.max():.2f}")
print(f"Min temperature:\t{temperature.min():.2f}")

# Repeating for salinity
salinity_var = PhysStates_data.variables['salinity']
salinity = salinity_var[0,:,:,:]

print(f"\nSalinity data shape:\t{salinity.shape}")
print(f"Mean salinity:\t\t{salinity.mean():.2f}")
print(f"Max salinity:\t\t{salinity.max():.2f}")
print(f"Min salinity:\t\t{salinity.min():.2f}")


time                 No units
grid_mapping         No units
LayerDepths          m
xc                   meter
yc                   meter
zc                   m
depth                m
DXxDYy               m2
u_velocity           m/s
v_velocity           m/s
elevation            m
temperature          degC
salinity             psu
ice_thickness        m
ice_compactness      -
salinity_ice         psu


In [98]:
from sklearn.preprocessing import StandardScaler

# Many options for what sort of standardisation we want here, but a simple one:
# Initialize the StandardScaler
scaler = StandardScaler()

# Standardize the temperature and salinity data
temperature_standardized = scaler.fit_transform(temperature.reshape(-1, temperature.shape[-1])).reshape(temperature.shape)
salinity_standardized = scaler.fit_transform(salinity.reshape(-1, salinity.shape[-1])).reshape(salinity.shape)

# Print the mean and standard deviation of the standardized data to verify
print(f"Standardized Temperature - Mean: {temperature_standardized.mean():.2f}, Std Dev: {temperature_standardized.std():.2f}")
print(f"Standardized Salinity - Mean: {salinity_standardized.mean():.2f}, Std Dev: {salinity_standardized.std():.2f}")


Standardized Temperature - Mean: 0.00, Std Dev: 1.00
Standardized Salinity - Mean: -0.00, Std Dev: 1.00


Now repeating for Biostates

In [None]:
# Importing BioStates data
filename_BioStates = 'BioStates.nc'

BioStates_data = Dataset(filename_BioStates, 'r')

# Extracting all the map dimensions, we can extract both ways, I don't think it matters
xc_bio = BioStates_data.variables['xc'][:]  # x-coordinates (meters)
yc_bio = BioStates_data.variables['yc'][:]  # y-coordinates (meters)
zc_bio = BioStates_data['LayerDepths'][:]  # z-coordinates (meters)

# Now checking chlorophyll data matches what we expect from the dimensions
# Extracting chlorophyll at t = 0, and check the dimensions
chlorophyll_var = BioStates_data.variables['chlorophyll']
chlorophyll = chlorophyll_var[0,:,:,:]
print("BioStates grid dimensions:")
print(f"xc shape: {xc_bio.shape}")
print(f"yc shape: {yc_bio.shape}")
print(f"zc shape: {zc_bio.shape}")

print(f"\nChlorophyll data shape: {chlorophyll.shape}")
# Now checking mean, max and min chlorophyll to see if it makes sense
print(f"Mean chlorophyll:\t{chlorophyll.mean():.2f}")
print(f"Max chlorophyll:\t{chlorophyll.max():.2f}")
print(f"Min chlorophyll:\t{chlorophyll.min():.2f}")

# Repeating for nitrate
nitrate_var = BioStates_data.variables['nitrate']
nitrate = nitrate_var[0,:,:,:]

print(f"\nNitrate data shape:\t{nitrate.shape}")
print(f"Mean nitrate:\t\t{nitrate.mean():.2f}")
print(f"Max nitrate:\t\t{nitrate.max():.2f}")
print(f"Min nitrate:\t\t{nitrate.min():.2f}")

In [78]:
# Importing coral reef data

# gml_file_coral_reefs = "./KystOgFiskeri_50_Trondelag_25832_Korallrev_GML.gml"
gml_file_coral_reefs = "KystOgFiskeri_50_Trondelag_25833_Korallrev_GML.gml"

gdf_coral_reefs = gpd.read_file(gml_file_coral_reefs)

# Not sure this is necessary since all the naturtypeNavn are the same
# unique_naturtypeNavn = gdf_coral_reefs['naturtypeNavn'].unique()
# print(unique_naturtypeNavn)
coral_data = gdf_coral_reefs[gdf_coral_reefs['naturtypeNavn'] == 'Korallforekomster']

# I think it's already a dataframe, so also unnecessary
coral_df = pd.DataFrame(coral_data)


coral_location_df = coral_df[['lengdegrad', 'breddegrad']]

In [79]:
# Now to transform the coordinates to the same projection as the SINMOD data

from pyproj import CRS, Transformer

# Print grid mapping to see what the horizontal resolution is
# In the case of gin it is 20km, for nor4km it is 4km
grid_mapping = PhysStates_data.variables['grid_mapping']
print(grid_mapping)


<class 'netCDF4._netCDF4.Variable'>
int32 grid_mapping()
    grid_mapping_name: polar_stereographic
    straight_vertical_longitude_from_pole: 58.0
    horizontal_resolution: 20000.0
    latitude_of_projection_origin: 90.0
    longitude_of_projection_origin: 58.0
    standard_parallel: 60.0
    origoRef: [0. 0.]
    semi_minor_axis: 6370000.0
    semi_major_axis: 6370000.0
    false_easting: 3900000.0
    false_northing: 2570000.0
    scale_factor_at_projection_origin: 1.0
unlimited dimensions: 
current shape = ()
filling on, default _FillValue of -2147483647 used


In [87]:
# Specifying projection to SINMOD format

# Define the Coordinate Reference Systems (CRS) for the coral data
# It could be any of these, need to double check
crs_wgs84 = CRS.from_epsg(4326)  # WGS84 (lat/lon coordinates)
# crs_euref89_utm32 = CRS.from_epsg(25832)  # EUREF89 / UTM zone 32N
# crs_euref89 = CRS.from_epsg(4258)  # EUREF89 (geographic lat/lon)

# SINMOD projection parameters as a custom projection
crs_sinmod = CRS.from_proj4("+proj=stere +lat_0=90 +lat_ts=60 +lon_0=58 "
                            "+x_0=3900000 +y_0=2570000 +ellps=WGS84 +units=m +no_defs")

# Create a transformer to transform from wgs84 (geographic) to SINMOD
transformer_wgs84_to_sinmod = Transformer.from_crs(crs_wgs84, crs_sinmod, always_xy=True)

xc_grid, yc_grid = np.meshgrid(xc, yc)
lon_grid, lat_grid = transformer_wgs84_to_sinmod.transform(xc_grid, yc_grid)

# Performing projection on coral reef data
print("coral_lons range before:", coral_location_df['lengdegrad'].min(), coral_location_df['lengdegrad'].max())
print("coral_lats range before:", coral_location_df['breddegrad'].min(), coral_location_df['breddegrad'].max())
print("--------------------------------")

# Transform coral coordinates from EUREF89 to SINMOD
coral_x, coral_y = transformer_wgs84_to_sinmod.transform(coral_location_df['lengdegrad'], coral_location_df['breddegrad'])

# We need to DIVIDE by the resolution of the SINMOD grid to get the grid coordinates
# In the case of gin this is 20km
coral_x /= 20000
coral_y /= 20000

# The values after are NOT lat lon, but rather the SINMOD grid coordinates 
print("coral_lons grid-coordinates after:", coral_x.min(), coral_x.max())
print("coral_lats grid-coordinates before:", coral_y.min(), coral_y.max())

# So we expect them to be in the range of the SINMOD grid:
print(f"Grid dimensions (xc): {xc.shape[0]}")
print(f"Grid dimensions (yc): {yc.shape[0]}")


coral_lons range before: 7.84797 11.32001
coral_lats range before: 63.4133 65.00816
--------------------------------
coral_lons grid-coordinates after: 87.50081214535975 98.74281827314783
coral_lats grid-coordinates before: 33.6640977812021 39.715041231478516
Grid dimensions (xc): 300
Grid dimensions (yc): 235
