### Import packages

In [1]:
import os
import netCDF4
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

### Import NCDF path and csv

In [2]:
# Specify the folder path containing the NetCDF files
ncdf_folder = "../DHW/Max_annual_DHW/"

In [3]:
# Read the CSV file into a DataFrame
df = pd.read_csv('../Data/site_coord_geoenrich2_LM.csv')

# Convert the 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Create a dictionary to store the DataFrames for each year
dfs_by_year = {}

# Iterate over the years from 2008 to 2021
for year in range(2008, 2022):
    # Filter the rows for the current year
    df_year = df[df['date'].dt.year == year]
    # Store the DataFrame in the dictionary with the year as the key
    dfs_by_year[year] = df_year

# Access the DataFrames for each year
df_2008 = dfs_by_year[2008]
df_2009 = dfs_by_year[2009]
df_2010 = dfs_by_year[2010]
df_2011 = dfs_by_year[2011]
df_2012 = dfs_by_year[2012]
df_2013 = dfs_by_year[2013]
df_2014 = dfs_by_year[2014]
df_2015 = dfs_by_year[2015]
df_2016 = dfs_by_year[2016]
df_2017 = dfs_by_year[2017]
df_2018 = dfs_by_year[2018]
df_2019 = dfs_by_year[2019]
df_2020 = dfs_by_year[2020]
df_2021 = dfs_by_year[2021]

### Check NDCF files

In [4]:
# Specify the path to the NetCDF file
file_path = "../DHW/Max_annual_DHW/ct5km_dhw-max_v3.1_2008.nc"

# Open the NetCDF file
nc_file = netCDF4.Dataset(file_path)

# Print the variables in the file
print("Variables in the NetCDF file:")
for var_name in nc_file.variables:
    print(var_name)

# Close the NetCDF file
nc_file.close()


Variables in the NetCDF file:
time
lat
lon
degree_heating_week
date
mask
crs


### Extract max annual DHW 5km

In [5]:
# Create an empty list to store the results
results_list = []

# Iterate over the years from 2008 to 2021
for year in range(2008, 2022):
    # Get the corresponding DataFrame for the year
    df_year = dfs_by_year[year]

    # NetCDF file name for the year
    ncdf_file = f"ct5km_dhw-max_v3.1_{year}.nc"

    # Open the NetCDF file
    ncdf_path = os.path.join(ncdf_folder, ncdf_file)
    ncdf_dataset = netCDF4.Dataset(ncdf_path)

    # Extract the 'degree_heating_week' variable values at the point locations
    degree_heating_week = ncdf_dataset.variables['degree_heating_week'][:]

    # Iterate over the points in the DataFrame
    for _, point in df_year.iterrows():
        lat = point['latitude']
        lon = point['longitude']

        # Find the index of the nearest point in the NetCDF file
        lat_index = (abs(ncdf_dataset.variables['lat'][:] - lat)).argmin()
        lon_index = (abs(ncdf_dataset.variables['lon'][:] - lon)).argmin()

        # Get the 'degree_heating_week' value at the nearest point
        dhw_value = degree_heating_week[:, lat_index, lon_index]

        # Append the results to the list
        point_data = {
            'id': point['id'],
            'latitude': lat,
            'longitude': lon,
            'year': year,
            'max_DHW': dhw_value
        }
        point_df = pd.DataFrame(point_data)
        results_list.append(point_df)

    # Close the NetCDF file
    ncdf_dataset.close()

# Concatenate all the results into a single DataFrame
results_df = pd.concat(results_list, ignore_index=True)

# Save the results to a CSV file
#results_df.to_csv('../Results/MaxDHW_all_1year.csv', index=False)

In [23]:
print(results_df)

        id  latitude  longitude  year  max_DHW
0       c1       -20        149  2008     0.16
1      c15       -20        150  2008     0.15
2      c29       -21        151  2008     0.15
3      c43       -21        152  2008     1.14
4      c57       -21        153  2008     0.60
..     ...       ...        ...   ...      ...
127  c1779       -44        147  2008     0.68
128  c1793       -42        145  2008     1.06
129  c1807       -41        148  2008      NaN
130  c1821       -41        145  2008      NaN
131  c1835       -40        148  2008      NaN

[132 rows x 5 columns]


### Map

#### All points

In [47]:
# Import data 
data = pd.read_csv('../Results/MaxDHW_all_1year.csv')

# Create scatter map
fig = px.scatter_geo(data, lat='latitude', lon='longitude', color='max_DHW')
fig.update_geos(fitbounds="locations")
fig.show()

#### Missing points

In [49]:
# Import data 
df = pd.read_csv('../Results/MaxDHW_all_1year.csv')

# Replace NaN by a value outside DHW range
data = df.fillna(1000)

# Function to assign specific color
def SetColor(data):
    values = data['max_DHW'].tolist()
    color_list = []
    for i in values:
        if(i == 1000):
            color_list.append("Missing values")
        else :
            color_list.append("max_DHW values")
    return color_list


# Create scatter map
fig = px.scatter_geo(data, lat='latitude', lon='longitude', color=SetColor(data))
fig.update_geos(fitbounds="locations")
fig.show()