In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt 

# Extracting necessary SINMOD data

Extracting more than a few time steps will take a while and potentially cause crashing. Then we will need to run on IDUN. Can set up via VS code ssh.

We will now:
- Look at a few data sets to check the numbers make sense
- Standardise and normalise the data

In [2]:
import netCDF4 as nc
from netCDF4 import Dataset

# Importing SINMOD data
filename_PhysStates = 'PhysStates.nc'

PhysStates_data = Dataset(filename_PhysStates, 'r')

# Looking at our list of available variables
variables_list = list(PhysStates_data.variables.keys())
for variable in variables_list:
    units = PhysStates_data.variables[variable].units if 'units' in PhysStates_data.variables[variable].ncattrs() else 'No units'
    print(f"{variable:<20} {units}")

time                 No units
grid_mapping         No units
LayerDepths          m
xc                   meter
yc                   meter
zc                   m
depth                m
DXxDYy               m2
u_velocity           m/s
v_velocity           m/s
elevation            m
temperature          degC
salinity             psu
ice_thickness        m
ice_compactness      -
salinity_ice         psu


In [3]:
# Extracting all the map dimensions, we can extract both ways, I don't think it matters
xc = PhysStates_data.variables['xc'][:]  # x-coordinates (meters)
yc = PhysStates_data.variables['yc'][:]  # y-coordinates (meters)
zc = PhysStates_data['LayerDepths'][:]  # z-coordinates (meters)

# Now checking temperature data matches what we expect from the dimensions
# Extracting temperature at t = 0, and check the dimensions
temperature_var = PhysStates_data.variables['temperature']
temperature = temperature_var[0,:,:,:]
print("SINMOD grid dimensions:")
print(f"xc shape: {xc.shape}")
print(f"yc shape: {yc.shape}")
print(f"zc shape: {zc.shape}")

print(f"\nTemperature data shape: {temperature.shape}")
# Now checking mean, max and min temperature to see if it makes sense
print(f"Mean temperature:\t{temperature.mean():.2f}")
print(f"Max temperature:\t{temperature.max():.2f}")
print(f"Min temperature:\t{temperature.min():.2f}")

# Repeating for salinity
salinity_var = PhysStates_data.variables['salinity']
salinity = salinity_var[0,:,:,:]

print(f"\nSalinity data shape:\t{salinity.shape}")
print(f"Mean salinity:\t\t{salinity.mean():.2f}")
print(f"Max salinity:\t\t{salinity.max():.2f}")
print(f"Min salinity:\t\t{salinity.min():.2f}")




SINMOD grid dimensions:
xc shape: (300,)
yc shape: (235,)
zc shape: (25,)

Temperature data shape: (25, 235, 300)
Mean temperature:	1.64
Max temperature:	10.86
Min temperature:	-1.80

Salinity data shape:	(25, 235, 300)
Mean salinity:		34.23
Max salinity:		37.26
Min salinity:		0.10


In [4]:
from sklearn.preprocessing import StandardScaler

# Many options for what sort of standardisation we want here, but a simple one:
# Initialize the StandardScaler
scaler = StandardScaler()

# Standardize the temperature and salinity data
temperature_standardized = scaler.fit_transform(temperature.reshape(-1, temperature.shape[-1])).reshape(temperature.shape)
salinity_standardized = scaler.fit_transform(salinity.reshape(-1, salinity.shape[-1])).reshape(salinity.shape)

# Print the mean and standard deviation of the standardized data to verify
print(f"Standardized Temperature - Mean: {temperature_standardized.mean():.2f}, Std Dev: {temperature_standardized.std():.2f}")
print(f"Standardized Salinity - Mean: {salinity_standardized.mean():.2f}, Std Dev: {salinity_standardized.std():.2f}")


Standardized Temperature - Mean: 0.00, Std Dev: 1.00
Standardized Salinity - Mean: 0.00, Std Dev: 1.00


Now repeating for Biostates

In [5]:
# Importing BioStates data
filename_BioStates = 'BioStates.nc'

BioStates_data = Dataset(filename_BioStates, 'r')


# Looking at our list of available variables
variables_list_bio = list(BioStates_data.variables.keys())
for variable in variables_list_bio:
    units = BioStates_data.variables[variable].units if 'units' in BioStates_data.variables[variable].ncattrs() else 'No units'
    print(f"{variable:<20} {units}")



time                 No units
grid_mapping         No units
LayerDepths          m
xc                   meter
yc                   meter
zc                   m
depth                m
DXxDYy               m2
nitrate              mmmol N m-3
silicate             mmmol N m-3
ammonium             mmmol N m-3
diatoms              mmmol N m-3
flagellates          mmmol N m-3
ciliates             mmmol N m-3
HNANO                mmmol N m-3
bacteria             mmmol N m-3
calanus_finmarchicus gC m-2
calanus_glacialis    gC m-2
detritus_slow        mmmol N m-3
detritus_fast        mmmol N m-3
DOC                  mmmol N m-3
cDOM                 m-1
silicate_detritus    mmmol N m-3
sediment_Si          mmmol Si m-2
sediment_N           mmmol N m-2


In [6]:
# Extracting all the map dimensions, we can extract both ways, I don't think it matters
xc_bio = BioStates_data.variables['xc'][:]  # x-coordinates (meters)
yc_bio = BioStates_data.variables['yc'][:]  # y-coordinates (meters)
zc_bio = BioStates_data['LayerDepths'][:]  # z-coordinates (meters)

print("SINMOD grid dimensions:")
print(f"xc shape: {xc_bio.shape}")
print(f"yc shape: {yc_bio.shape}")
print(f"zc shape: {zc_bio.shape}")

# Now checking chlorophyll data matches what we expect from the dimensions
# Extracting chlorophyll at t = 0, and check the dimensions
"""chlorophyll_var = BioStates_data.variables['chlorophyll']
chlorophyll = chlorophyll_var[0,:,:,:]
print("BioStates grid dimensions:")
print(f"xc shape: {xc_bio.shape}")
print(f"yc shape: {yc_bio.shape}")
print(f"zc shape: {zc_bio.shape}")

print(f"\nChlorophyll data shape: {chlorophyll.shape}")
# Now checking mean, max and min chlorophyll to see if it makes sense
print(f"Mean chlorophyll:\t{chlorophyll.mean():.2f}")
print(f"Max chlorophyll:\t{chlorophyll.max():.2f}")
print(f"Min chlorophyll:\t{chlorophyll.min():.2f}")"""

# Repeating for nitrate
nitrate_var = BioStates_data.variables['nitrate']
nitrate = nitrate_var[0,:,:,:]

print(f"\nNitrate data shape:\t{nitrate.shape}")
print(f"Mean nitrate:\t\t{nitrate.mean():.2f}")
print(f"Max nitrate:\t\t{nitrate.max():.2f}")
print(f"Min nitrate:\t\t{nitrate.min():.2f}")

# Repeating for silicate
silicate_var = BioStates_data.variables['silicate']
silicate = silicate_var[0,:,:,:]

print(f"\nSilicate data shape:\t{silicate.shape}")
print(f"Mean silicate:\t\t{silicate.mean():.2f}")
print(f"Max silicate:\t\t{silicate.max():.2f}")
print(f"Min silicate:\t\t{silicate.min():.2f}")

SINMOD grid dimensions:
xc shape: (300,)
yc shape: (235,)
zc shape: (25,)

Nitrate data shape:	(25, 235, 300)
Mean nitrate:		11.75
Max nitrate:		76.16
Min nitrate:		0.10

Silicate data shape:	(25, 235, 300)
Mean silicate:		7.53
Max silicate:		99.98
Min silicate:		0.10


In [7]:
#Standardizing

# Standardize the nitrate and silicate data
nitrate_standardized = scaler.fit_transform(nitrate.reshape(-1, nitrate.shape[-1])).reshape(nitrate.shape)
silicate_standardized = scaler.fit_transform(silicate.reshape(-1, silicate.shape[-1])).reshape(silicate.shape)

# Print the mean and standard deviation of the standardized data to verify
print(f"Standardized Nitrate - Mean: {nitrate_standardized.mean():.2f}, Std Dev: {nitrate_standardized.std():.2f}")
print(f"Standardized Silicate - Mean: {silicate_standardized.mean():.2f}, Std Dev: {silicate_standardized.std():.2f}")


Standardized Nitrate - Mean: -0.00, Std Dev: 1.00
Standardized Silicate - Mean: 0.00, Std Dev: 1.00


In [8]:
# Importing coral reef data

# gml_file_coral_reefs = "./KystOgFiskeri_50_Trondelag_25832_Korallrev_GML.gml"
#gml_file_coral_reefs = "KystOgFiskeri_50_Trondelag_25833_Korallrev_GML.gml"
gml_file_coral_reefs = "KystOgFiskeri_50_Trondelag_25832_Korallrev_GML.gml"

gdf_coral_reefs = gpd.read_file(gml_file_coral_reefs)

# Not sure this is necessary since all the naturtypeNavn are the same
# unique_naturtypeNavn = gdf_coral_reefs['naturtypeNavn'].unique()
# print(unique_naturtypeNavn)
coral_data = gdf_coral_reefs[gdf_coral_reefs['naturtypeNavn'] == 'Korallforekomster']

# I think it's already a dataframe, so also unnecessary
#coral_df = pd.DataFrame(coral_data)


#coral_location_df = coral_df[['lengdegrad', 'breddegrad']]

In [9]:
import geopandas as gpd
import pandas as pd

In [10]:
# Importing coral reef data from more areas
gml_file_coral_reefs_trondelag = "KystOgFiskeri_50_Trondelag_25832_Korallrev_GML.gml"
gml_file_coral_reefs_more_romsdal = "KystOgFiskeri_15_More_og_Romsdal_25832_Korallrev_GML.gml"
gml_file_coral_reefs_vestland = "KystOgFiskeri_46_Vestland_25832_Korallrev_GML.gml"

gml_file_coral_reefs_nordland = "KystOgFiskeri_18_Nordland_25833_Korallrev_GML.gml"
gml_file_coral_reefs_finnmark = "KystOgFiskeri_56_Finnmark_25833_Korallrev_GML.gml"
gml_file_coral_reefs_troms = "KystOgFiskeri_55_Troms_25833_Korallrev_GML.gml"
gml_file_coral_reefs_barents = "KystOgFiskeri_64_Barentshavet_vest_25833_Korallrev_GML.gml"
gml_file_coral_reefs_norskehavet = "KystOgFiskeri_63_Norskehavet_25832_Korallrev_GML.gml"

gdf_coral_reefs_trondelag = gpd.read_file(gml_file_coral_reefs_trondelag)
gdf_coral_reefs_more_romsdal = gpd.read_file(gml_file_coral_reefs_more_romsdal)
gdf_coral_reefs_vestland = gpd.read_file(gml_file_coral_reefs_vestland)

gdf_coral_reefs_nordland = gpd.read_file(gml_file_coral_reefs_nordland)
gdf_coral_reefs_finnmark = gpd.read_file(gml_file_coral_reefs_finnmark)
gdf_coral_reefs_troms = gpd.read_file(gml_file_coral_reefs_troms)
gdf_coral_reefs_barents = gpd.read_file(gml_file_coral_reefs_barents)
gdf_coral_reefs_norskehavet = gpd.read_file(gml_file_coral_reefs_norskehavet)


# Combine them into one GeoDataFrame
coral_data_EPSG_25832 = pd.concat(
    [gdf_coral_reefs_trondelag, gdf_coral_reefs_more_romsdal, gdf_coral_reefs_vestland, gdf_coral_reefs_norskehavet],
    ignore_index=True
)

coral_data_EPSG_25832 = gpd.GeoDataFrame(coral_data_EPSG_25832, geometry='geometry')

coral_data_EPSG_25833 = pd.concat(
    [gdf_coral_reefs_nordland, gdf_coral_reefs_finnmark, gdf_coral_reefs_troms, gdf_coral_reefs_barents],
    ignore_index=True
)

coral_data_EPSG_25833 = gpd.GeoDataFrame(coral_data_EPSG_25833, geometry='geometry')

print("Coral data crs:")
print(coral_data.crs)
print("---------")
print("Coral data shape total:")
print(coral_data.shape)
print("Coral data Trøndelag shape:")
print(gdf_coral_reefs_trondelag.shape)
print("Coral data Rogaland shape:")
print(gdf_coral_reefs_more_romsdal.shape)
print("Coral data Vestland shape:")
print(gdf_coral_reefs_vestland.shape)
print("-----------")
print("Coral data Nordland shape:")
print(gdf_coral_reefs_nordland.shape)
print("Coral reef Finnmark shape:")
print(gdf_coral_reefs_finnmark.shape)
print("Coral reef Troms shape:")
print(gdf_coral_reefs_troms.shape)
print("Coral reef Barents shape:")
print(gdf_coral_reefs_barents.shape)
print("Coral reef Norskehavet shape:")
print(gdf_coral_reefs_norskehavet.shape)
print("-----------")
print(gdf_coral_reefs_nordland.crs)
print(gdf_coral_reefs_finnmark.crs)
print(gdf_coral_reefs_troms.crs)
print(gdf_coral_reefs_barents.crs)
print(gdf_coral_reefs_norskehavet.crs)

coral_data_variables = coral_data.columns
print("Variables in coral data:")
for variable in coral_data_variables:
    print(variable)

print(len(coral_data_variables))

Coral data crs:
EPSG:25832
---------
Coral data shape total:
(104, 21)
Coral data Trøndelag shape:
(104, 21)
Coral data Rogaland shape:
(91, 21)
Coral data Vestland shape:
(232, 21)
-----------
Coral data Nordland shape:
(461, 21)
Coral reef Finnmark shape:
(49, 21)
Coral reef Troms shape:
(36, 21)
Coral reef Barents shape:
(98, 21)
Coral reef Norskehavet shape:
(647, 21)
-----------
EPSG:25833
EPSG:25833
EPSG:25833
EPSG:25833
EPSG:25832
Variables in coral data:
gml_id
lokalId
navnerom
verifiseringsdato
produkt
versjon
målemetode
nøyaktighet
medium
opphav
minimumsdybde
maksimumsdybde
naturtype
naturtypeNavn
kildeNavn
observasjonsMetode
observasjonsSted
observasjonsSlutt
lengdegrad
breddegrad
geometry
21


In [11]:
coral_data_variables = coral_data.columns
print("Variables in coral data:")
for variable in coral_data_variables:
    print(variable)

    coral_df_combined = pd.concat([coral_data_EPSG_25832[["gml_id", "nøyaktighet", "minimumsdybde", "maksimumsdybde", "lengdegrad", "breddegrad"]],
                            coral_data_EPSG_25833[["gml_id", "nøyaktighet", "minimumsdybde", "maksimumsdybde", "lengdegrad", "breddegrad"]]],
                           ignore_index=True)
    print(coral_df_combined.shape)

Variables in coral data:
gml_id
(1718, 6)
lokalId
(1718, 6)
navnerom
(1718, 6)
verifiseringsdato
(1718, 6)
produkt
(1718, 6)
versjon
(1718, 6)
målemetode
(1718, 6)
nøyaktighet
(1718, 6)
medium
(1718, 6)
opphav
(1718, 6)
minimumsdybde
(1718, 6)
maksimumsdybde
(1718, 6)
naturtype
(1718, 6)
naturtypeNavn
(1718, 6)
kildeNavn
(1718, 6)
observasjonsMetode
(1718, 6)
observasjonsSted
(1718, 6)
observasjonsSlutt
(1718, 6)
lengdegrad
(1718, 6)
breddegrad
(1718, 6)
geometry
(1718, 6)


In [12]:
coral_df_combined.to_csv(path_or_buf="/Users/maikentomren/Documents/prosjektoppgave/plotting git/Illuminating-the-deep---projections-/combined_coral_data.csv")

In [13]:
# Now to transform the coordinates to the same projection as the SINMOD data

from pyproj import CRS, Transformer

# Print grid mapping to see what the horizontal resolution is
# In the case of gin it is 20km, for nor4km it is 4km
grid_mapping = PhysStates_data.variables['grid_mapping']
print(grid_mapping)



<class 'netCDF4._netCDF4.Variable'>
int32 grid_mapping()
    grid_mapping_name: polar_stereographic
    straight_vertical_longitude_from_pole: 58.0
    horizontal_resolution: 20000.0
    latitude_of_projection_origin: 90.0
    longitude_of_projection_origin: 58.0
    standard_parallel: 60.0
    origoRef: [0. 0.]
    semi_minor_axis: 6370000.0
    semi_major_axis: 6370000.0
    false_easting: 3900000.0
    false_northing: 2570000.0
    scale_factor_at_projection_origin: 1.0
unlimited dimensions: 
current shape = ()
filling on, default _FillValue of -2147483647 used


In [14]:
# Specifying projection to SINMOD format

# Define the Coordinate Reference Systems (CRS) for the coral data
# It could be any of these, need to double check: checked, it is "EUREF89 UTM sone 32, 2d" (you could choose others as well)
crs_wgs84 = CRS.from_epsg(4326)  # WGS84 (lat/lon coordinates)

crs_euref89_utm32 = CRS.from_epsg(25832)  # EUREF89 / UTM zone 32N
crs_euref89_utm33 = CRS.from_epsg(25833)  # EUREF89 / UTM zone 33N
# crs_euref89 = CRS.from_epsg(4258)  # EUREF89 (geographic lat/lon)

# SINMOD projection parameters as a custom projection
#crs_sinmod = CRS.from_proj4("+proj=stere +lat_0=90 +lat_ts=60 +lon_0=58 "
#                            "+x_0=3900000 +y_0=2570000 +ellps=WGS84 +units=m +no_defs")

crs_sinmod = CRS.from_proj4(
    "+proj=stere +lat_0=90 +lat_ts=60 +lon_0=58 "
    "+x_0=3900000 +y_0=2570000 +a=6370000 +b=6370000 +units=m +no_defs"
)

# Create a transformer to transform from wgs84 (geographic) to SINMOD
transformer_wgs84_to_sinmod = Transformer.from_crs(crs_wgs84, crs_sinmod, always_xy=True)

#Create a transformer to transform from euref89 to SINMOD
transformer_euref89_to_sinmod = Transformer.from_crs(crs_euref89_utm32, crs_sinmod, always_xy=True)

transformer_euref89_to_sinmod_33 = Transformer.from_crs(crs_euref89_utm33, crs_sinmod, always_xy=True)


#Create a transformer to transform from SINMOD to wgs84
transformer_sinmod_to_wgs84 = Transformer.from_crs( crs_sinmod, crs_wgs84, always_xy=True)

xc_grid, yc_grid = np.meshgrid(xc, yc)
lon_grid, lat_grid = transformer_wgs84_to_sinmod.transform(xc_grid, yc_grid)


xc_lon_min, yc_lat_min = transformer_sinmod_to_wgs84.transform(xc.min()/20000, yc.min()/20000)
xc_lon_max, yc_lat_max = transformer_sinmod_to_wgs84.transform(xc.max()/20000, yc.max()/20000)

print("Bottom left corner:", xc_lon_min, yc_lat_min)
print("Bottom right corner:", xc_lon_max, yc_lat_min)
print("Top left corner:", xc_lon_min, yc_lat_max)
print("Top right corner:", xc_lon_max, yc_lat_max)


# Performing projection on coral reef data
print("coral_lons range before:", coral_data_EPSG_25832['lengdegrad'].min(), coral_data_EPSG_25832['lengdegrad'].max())
print("coral_lats range before:", coral_data_EPSG_25832['breddegrad'].min(), coral_data_EPSG_25832['breddegrad'].max())

print("coral_lons range before 33:", coral_data_EPSG_25833['lengdegrad'].min(), coral_data_EPSG_25833['lengdegrad'].max())
print("coral_lats range before 33:", coral_data_EPSG_25833['breddegrad'].min(), coral_data_EPSG_25833['breddegrad'].max())
print("--------------------------------")

# Transform coral coordinates from EUREF89 to SINMOD
coral_x, coral_y = transformer_wgs84_to_sinmod.transform(coral_data_EPSG_25832['lengdegrad'], coral_data_EPSG_25832['breddegrad'])

coral_x_33, coral_y_33 = transformer_wgs84_to_sinmod.transform(coral_data_EPSG_25833['lengdegrad'], coral_data_EPSG_25833['breddegrad'])

# We need to DIVIDE by the resolution of the SINMOD grid to get the grid coordinates
# In the case of gin this is 20km
coral_x /= 20000
coral_y /= 20000

coral_x_33 /= 20000
coral_y_33 /= 20000


# The values after are NOT lat lon, but rather the SINMOD grid coordinates 
print("coral_lons grid-coordinates after:", coral_x.min(), coral_x.max())
print("coral_lats grid-coordinates after:", coral_y.min(), coral_y.max())

print("coral_lons grid-coordinates after 33:", coral_x_33.min(), coral_x_33.max())
print("coral_lats grid-coordinates after 33:", coral_y_33.min(), coral_y_33.max())

# So we expect them to be in the range of the SINMOD grid:
print(f"Grid dimensions (xc): {xc.shape[0]}")
print(f"Grid dimensions (yc): {yc.shape[0]}")


print(xc.min(), xc.max())
print(yc.min(), yc.max())


Bottom left corner: 1.3838543498304823 47.09690769440129
Bottom right corner: 1.3834756632228211 47.09690769440129
Top left corner: 1.3838543498304823 47.10006795515003
Top right corner: 1.3834756632228211 47.10006795515003
coral_lons range before: 2.6833 11.32001
coral_lats range before: 59.57281 67.0133
coral_lons range before 33: 8.75218 22.7332
coral_lats range before 33: 65.47768 70.9287
--------------------------------
coral_lons grid-coordinates after: 66.24751269785357 107.00574385691127
coral_lats grid-coordinates after: 29.832777330761463 50.65579211631436
coral_lons grid-coordinates after 33: 100.06158866585542 136.58459222140908
coral_lats grid-coordinates after 33: 38.961115458723505 50.82802951927174
Grid dimensions (xc): 300
Grid dimensions (yc): 235
20000.0 6000000.0
20000.0 4700000.0


In [15]:
import pandas as pd

# Combine the two GeoDataFrames
coral_data_combined = pd.concat([coral_data_EPSG_25832, coral_data_EPSG_25833], ignore_index=True)

# Drop duplicates based on 'breddegrad' and 'lengdegrad'
coral_data_unique = coral_data_combined.drop_duplicates(subset=['breddegrad', 'lengdegrad'])

# Convert the resulting DataFrame back to a GeoDataFrame if needed
coral_data_unique = gpd.GeoDataFrame(coral_data_unique, geometry='geometry')

# Print the unique coral data
print(coral_data_unique)

ValueError: Cannot determine common CRS for concatenation inputs, got ['ETRS89 / UTM zone 32N', 'ETRS89 / UTM zone 33N']. Use `to_crs()` to transform geometries to the same CRS before merging.

In [16]:

grid_xc_old = 400
grid_yc_old = 350

grid_xc_new = 300
grid_yc_new = 235

xc_old_range_min = 800
xc_old_range_max = 320000
yc_old_range_min = 800
yc_old_range_max = 280000

xc_new_range_min = 20000
xc_new_range_max = 6000000
yc_new_range_min = 20000
yc_new_range_max = 4700000

sinmod_lon_min_old = 4.316784
sinmod_lon_max_old = 13.138062
sinmod_lat_min_old = 62.272575
sinmod_lat_max_old = 66.156746

