In [None]:

import numpy as np

import random
import matplotlib.pyplot as plt
import xarray as xr


In [None]:
#Hourly 2D bottom currents
jan_jun = xr.open_dataset("/cluster/projects/itk-SINMOD/coral-mapping/midnor/samp_2D_jan_jun.nc")
jun_aug = xr.open_dataset("/cluster/projects/itk-SINMOD/coral-mapping/midnor/samp_2D_jun-aug.nc")
sep_dec = xr.open_dataset("/cluster/projects/itk-SINMOD/coral-mapping/midnor/samp_2D_sep-dec.nc")

In [None]:
#Remove overlap
jan_jun = jan_jun.isel(time=slice(0, -360))  
jun_aug = jun_aug.isel(time=slice(0, -192))  

In [None]:
#Merge (not possible with this memory size.. Can do this later if necessary)
current_2019 = xr.concat([current_jan_jun, current_jun_aug, current_sep_dec], dim="time")

In [None]:
print(list(jan_jun.variables))

In [44]:
u_bottom_jan_jun = jan_jun.variables['u-bottom']
u_bottom_jun_aug = jun_aug.variables['u-bottom']
u_bottom_sep_dec = sep_dec.variables['u-bottom']
v_bottom_jan_jun = jan_jun.variables['v-bottom']
v_bottom_jun_aug = jun_aug.variables['v-bottom']
v_bottom_sep_dec = sep_dec.variables['v-bottom']

In [45]:
u_bottom_jan_jun = xr.DataArray(u_bottom_jan_jun)
u_bottom_jun_aug = xr.DataArray(u_bottom_jun_aug)
u_bottom_sep_dec = xr.DataArray(u_bottom_sep_dec)
v_bottom_jan_jun = xr.DataArray(v_bottom_jan_jun)
v_bottom_jun_aug = xr.DataArray(v_bottom_jun_aug)
v_bottom_sep_dec = xr.DataArray(v_bottom_sep_dec)

#Combine
u_bottom_2019 = xr.concat([u_bottom_jan_jun, u_bottom_jun_aug, u_bottom_sep_dec], dim='time')
v_bottom_2019 = xr.concat([v_bottom_jan_jun, v_bottom_jun_aug, v_bottom_sep_dec], dim='time')


In [12]:
output_path = "/cluster/home/maikents/bottom_currents_2019.nc"

bottom_currents = xr.Dataset({
    "u_bottom": u_bottom_2019,
    "v_bottom": v_bottom_2019
})

bottom_currents.to_netcdf(output_path)


In [None]:
#Coral data
import pandas as pd

coral_data = pd.read_parquet('/cluster/home/maikents/midnor_total_coral_data_processed_v2.parquet')


In [None]:
print(f"xc_max: {xc_max}, yc_max: {yc_max}")
print(f"Max x in coral data: {max(coral_data['x']/hor_res)}, Max y: {max(coral_data['y']/hor_res)}")

In [None]:
#Histogram for bottom current speed
hor_res = jan_jun.grid_mapping.attrs['horizontal_resolution']
t_start, t_stop = (0, len(u_bottom_2019))
coral_values_jan_jun = []
valid_coords_jan_jun = []
xc_max = jan_jun.variables['xc'].shape[0]
yc_max = jan_jun.variables['yc'].shape[0]
for time in range(t_start, t_stop):

        for x, y in zip(coral_data['x']/hor_res, coral_data['y']/hor_res):
            x = np.clip(np.round(coral_data['x']/hor_res).astype(int), 0, xc_max-1)
            y = np.clip(np.round(coral_data['y']/hor_res).astype(int), 0, yc_max-1)
            value = np.sqrt(jan_jun.variables['u-bottom'][time, y, x]**2 + jan_jun.variables['v-bottom'][time, y, x]**2)
            if not np.ma.is_masked(value):
                coral_values_jan_jun.append(value)
    
                        
#Convert list to numpy array for easier manipulation
coral_values_jan_jun = np.array(coral_values_jan_jun)

#Flatten the array to combine all time steps
coral_values_jan_jun_flat = coral_values_jan_jun.flatten()

# Calculate statistics for coral values
stats_jan_jun = {
    'mean': np.mean(coral_values_flat_jan_jun),
    'max': np.max(coral_values_flat_jan_jun),
    'min': np.min(coral_values_flat_jan_jun),
    '90th_percentile': np.percentile(coral_values_flat_jann_jun, 90),
    '10th_percentile': np.percentile(coral_values_flat, 10)
}

print(f"Statistics for coral values: {stats_jan_jun}")

#Determine the bin edges for the histograms
bin_edges = np.histogram_bin_edges(coral_values_flat_jan_jun, bins=20)

#Plot histogram of the combined variable values
plt.hist(coral_values_flat_jan_jun, bins=bin_edges, edgecolor='black')
plt.xlabel('Current speed [m/s]', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.title(f'Histogram of current speeds at coral data points', fontsize=16)
plt.grid(True)
plt.savefig(f'/cluster/home/maikents/current_speed_histogram_coral_points_jan_jun.png')
    
plt.show()




In [None]:
          """
            #Ensure the coordinates are within the grid bounds
            if 0 <= x <= xc_max and 0 <= y <= yc_max:
                #value = np.sqrt(u_bottom_2019[time, x, y]**2 + v_bottom_2019[time, x, y]**2)
                #value = np.sqrt(u_bottom_2019.isel(xc=int(x), yc=int(y), time=time)**2 +v_bottom_2019.isel(xc=int(x), yc=int(y), time=time)**2)                #value = netcdf_data.variables[variable_name][time, layer_index, int(y), int(x)]
                value = np.sqrt(jan_jun.variables['u-bottom'][time, int(y), int(x)]**2 + jan_jun.variables['v-bottom'][time, int(y), int(x)]**2)
"""
                

In [None]:
#Plot histograms
import random
import numpy as np
import matplotlib.pyplot as plt

def plot_histogram_at_coral_points(netcdf_data, coral_data, variable_name, t_range, generate_random=False, save=False, save_path="midnor"):

    zc = netcdf_data.variables['LayerDepths'][:]
    cumulative_depth = np.cumsum(zc, axis=0)

    xc_max = netcdf_data.variables['xc'].shape[0]
    yc_max = netcdf_data.variables['yc'].shape[0]

    hor_res = netcdf_data.variables['grid_mapping'].getncattr('horizontal_resolution')

    t_start, t_stop = t_range

    if t_stop == -1:
        t_stop = netcdf_data.variables['time'].shape[0]

    # Extract variable values at coral data points
    coral_values = []
    valid_coordinates = []

    for time in range(t_start, t_stop):

        for x, y in zip(coral_data['x']/hor_res, coral_data['y']/hor_res):
            
            # Ensure the coordinates are within the grid bounds
            if 0 <= x <= xc_max and 0 <= y <= yc_max:

                depth_at_point = netcdf_data.variables['depth'][int(y), int(x)]
                layer_index = np.searchsorted(cumulative_depth, depth_at_point)

                if variable_name == 'current_speed':
                    value = np.sqrt(netcdf_data.variables['u_velocity'][time, layer_index, int(y), int(x)]**2 + netcdf_data.variables['v_velocity'][time, layer_index, int(y), int(x)]**2)
                else:
                    value = netcdf_data.variables[variable_name][time, layer_index, int(y), int(x)]

                if not np.ma.is_masked(value):
                    coral_values.append(value)
                    if (x, y) not in valid_coordinates:
                        valid_coordinates.append((x, y))

    # Convert list to numpy array for easier manipulation
    coral_values = np.array(coral_values)

    # Flatten the array to combine all time steps
    coral_values_flat = coral_values.flatten()

    # Calculate statistics for coral values
    stats = {
        'mean': np.mean(coral_values_flat),
        'max': np.max(coral_values_flat),
        'min': np.min(coral_values_flat),
        '90th_percentile': np.percentile(coral_values_flat, 90),
        '10th_percentile': np.percentile(coral_values_flat, 10)
    }

    print(f"Statistics for coral values: {stats}")

    # Determine the bin edges for the histograms
    bin_edges = np.histogram_bin_edges(coral_values_flat, bins=20)

    # Plot histogram of the combined variable values
    plt.hist(coral_values_flat, bins=bin_edges, edgecolor='black')
    plt.xlabel(f'{variable_name.capitalize()}')
    plt.ylabel('Frequency')
    plt.title(f'Histogram of {variable_name.capitalize()} at Coral Data Points - {save_path.capitalize()}')
    
    if save:
        plt.savefig(f'plots/variable_histograms/{variable_name}_histogram_coral_points_{save_path}.png')
    
    plt.show()

    print(f"Number of valid coordinates: {len(valid_coordinates)} out of {len(coral_data)}")

    if generate_random:
        # Plotting random histogram

        no_points = int(len(valid_coordinates))

        # Generate same number of valid co-ords as random x and y locations within the grid
        valid_xc_yc_indices = np.argwhere(np.logical_and(~netcdf_data.variables['temperature'][0,0].mask, netcdf_data.variables['depth'][:] <=275,  netcdf_data.variables['depth'][:] >= 150))

        random_values = []

        for time in range(t_start, t_stop):

            selected_indices = valid_xc_yc_indices[np.random.choice(valid_xc_yc_indices.shape[0], size=no_points, replace=False)]

            for y, x in selected_indices:
                
                # Ensure the coordinates are within the grid bounds
                if 0 <= x <= xc_max and 0 <= y <= yc_max:
                    depth_at_point = netcdf_data.variables['depth'][int(y), int(x)]
                    layer_index = np.searchsorted(cumulative_depth, depth_at_point)

                    if variable_name == 'current_speed':
                        value = np.sqrt(netcdf_data.variables['u_velocity'][time, layer_index, int(y), int(x)]**2 + netcdf_data.variables['v_velocity'][time, layer_index, int(y), int(x)]**2)
                    else:
                        value = netcdf_data.variables[variable_name][time, layer_index, int(y), int(x)]

                    if not np.ma.is_masked(value):
                        random_values.append(value)
        
        # Convert list to numpy array for easier manipulation
        random_values = np.array(random_values)

        # Flatten the array to combine all time steps
        random_values_flat = random_values.flatten()

        # Plot histogram of the combined random variable values
        plt.hist(random_values_flat, bins=bin_edges, edgecolor='black')
        plt.xlabel(f'Random {variable_name.capitalize()}')
        plt.ylabel('Frequency')
        plt.title(f'Histogram of Random {variable_name.capitalize()} Values - {save_path.capitalize()}')
        
        if save:
            plt.savefig(f'plots/variable_histograms/{variable_name}_histogram_coral_points_{save_path}_random.png')
        
        plt.show()

        random_stats = {
            'mean': np.mean(random_values_flat),
            'max': np.max(random_values_flat),
            'min': np.min(random_values_flat),
            '90th_percentile': np.percentile(random_values_flat, 90),
            '10th_percentile': np.percentile(random_values_flat, 10)
        }

        print(f"Statistics for random values: {random_stats}")

        return coral_values, stats, random_values, random_stats
    return coral_values, stats

# Example usage
# plot_histogram_at_coral_points(PhysStates_data, coral_data, 'temperature', (0, 50))
# plot_histogram_at_coral_points(nor4km_PhysStates_data, nor4km_coral_data, 'temperature', (0, 50))
midnor_temp, midnor_stats, midnor_temp_random, midnor_random_stats = plot_histogram_at_coral_points(midnor_PhysStates_data, midnor_coral_data, 'temperature', (0, 10), True)
# plot_histogram_at_coral_points(midnor_PhysStates_data, midnor_coral_data, 'salinity', (0, 50))