# Introduction: 
## Extract the path of particle tracking outputs
Following code read the outputs of particle tracking and extracts the latitude and longitude 

# Steps
1- Realease the particles (2_PT_seseflux)
2- Read the particles for each month 
3- make a dataframe for particles and add group_number to the columns 
3- reduce to firs particles intersection to coastline, delete the ones that does not interact with coastline 
4- we make a dataframe that only include the particles first intersect with the shoreline for each month 
- another thing we need to add is to add a group_number to the particles in addition that group_id like 11, 12, 13

### Part1: Required Imports

In [7]:
# Importing libraries
from netCDF4 import Dataset
from netCDF4 import MFDataset
import netCDF4 as nc
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
import matplotlib.dates as mdates
from matplotlib import rcParams
from matplotlib.colors import LinearSegmentedColormap, LogNorm
import re 
import pylag
from shapely.geometry import Point
import contextily as ctx
import datetime
import dask
import dask.dataframe as dd
import gc 
import concurrent.futures
import cftime  # Import cftime to handle cftime.DatetimeGregorian objects
from datetime import datetime


#### Part1.1:Define Path to pylags output nc files

In [1]:
#define a path to the FVCOME data directory /home/samin/data/FVCOME_OUTPUT/Gldata/FVCOMEDATA
# go forlder 202301 and red nc file contain *2230101.nc
# define the path to the data directory
data_dir = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output'

In [4]:
def sort_key(file):
    filename = os.path.basename(file)
    number = int(filename.split('__')[1].split('.')[0])
    return number
files = glob.glob(data_dir + "/updated_Fvcome_huron_estuary_2023_Winter_*.nc")
files.sort(key=sort_key)

#### Part1.2: Reading Outpufiles

In [6]:
def sort_key(file):
    filename = os.path.basename(file)
    number = int(filename.split('__')[1].split('.')[0])
    return number

# Get the output files and sort them using the sort_key function
files = glob.glob(os.path.join(data_dir, 'updated_Fvcome_huron_estuary_2023_Winter_*.nc'))
files.sort(key=sort_key)

# Open multiple NetCDF datasets with chunks using xarray
datasets = [xr.open_mfdataset(file) for file in files]

# (Optional) Print the sorted file names to verify the sorting
for file in files:
    print(file)

/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Jan__1.nc
/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Feb__2.nc
/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Mar__3.nc
/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Apr__4.nc
/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_May__5.nc
/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Jun__6.nc
/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_July__7.nc
/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Aug__8.nc
/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Sep__9.nc
/home/abo

# Part2: Extract variable

#### Part2-1: Test to make a dataframe. Prepare Dataset and ensure we have the combined GeoDatafram

In [5]:
#get the last output file in the dataset 
ds = datasets[-1]
#get the particle id index 
group_id = ds['group_id'].values
#get lat, lon and time for the last time step 
lat = ds['latitude'].values
lon = ds['longitude'].values
time = ds['time'].values


## Read the variables for all the data

In [None]:
# Initialize an empty list to store DataFrames
dataframes = []

# Loop through each dataset in the datasets list
for dataset in datasets:
    # Extract the required variables into a DataFrame
    df = dataset[['latitude', 'longitude', 'time', 'group_id', 'WetLoad_TN_kg2', 'WetLoad_TP_kg2']].to_dataframe().reset_index()
    
    # If needed, adjust longitude
    df.loc[df['longitude'] > 0, 'longitude'] = df.loc[df['longitude'] > 0, 'longitude'] - 360
    
    # Append the DataFrame to the list
    dataframes.append(dd.from_pandas(df, npartitions=10))  # Dask partitions for chunking

# Concatenate all DataFrames into one using Dask
combined_df = dd.concat(dataframes)

# Compute the final DataFrame (brings it into memory in chunks)
final_df = combined_df.compute()

# Display the result
print(final_df.head())

# Read the data using nc Datasaete and creating dataframe and then geodataframe for each unique group_ids

In [15]:
# Initialize an empty list to store DataFrames from each file
df_list = []

# Function to process each NetCDF file
def process_file(file):
    try:
        # Step 1: Open the NetCDF file
        dataset = nc.Dataset(file, 'r')

        # Step 2: Ensure the necessary variables exist in the file
        required_vars = ['latitude', 'longitude', 'time', 'group_id', 'WetLoad_TN_kg2', 'WetLoad_TP_kg2']
        if not all(var in dataset.variables for var in required_vars):
            print(f"Skipping file {file}: Missing required variables.")
            return None

        # Step 3: Extract the necessary variables and check their dimensions
        latitude = dataset.variables['latitude'][:]
        longitude = dataset.variables['longitude'][:]
        time = dataset.variables['time'][:]
        group_id = dataset.variables['group_id'][:]
        wetload_tn = dataset.variables['WetLoad_TN_kg2'][:]
        wetload_tp = dataset.variables['WetLoad_TP_kg2'][:]

        # Diagnostic step: Print the unique group_ids for this file before any processing
        unique_group_ids_file = set(group_id.flatten())  # Convert to set to get unique values
        print(f"File: {file} | Unique group_ids: {len(unique_group_ids_file)}")

        # Step 4: Convert the time variable to human-readable datetime format
        time_units = dataset.variables['time'].units
        time = nc.num2date(time, units=time_units)

        # Handle cftime.DatetimeGregorian and convert to standard Python datetime objects where possible
        time = pd.Series([t if isinstance(t, datetime) else t.strftime('%Y-%m-%d %H:%M:%S') for t in time], name='time')

        # Step 5: Ensure all variables are 1-dimensional, flatten them if necessary
        if latitude.ndim > 1:
            latitude = latitude.flatten()
        if longitude.ndim > 1:
            longitude = longitude.flatten()
        if group_id.ndim > 1:
            group_id = group_id.flatten()
        if wetload_tn.ndim > 1:
            wetload_tn = wetload_tn.flatten()
        if wetload_tp.ndim > 1:
            wetload_tp = wetload_tp.flatten()

        # # Ensure all arrays have the same length after flattening
        min_length = min(len(latitude), len(longitude), len(time), len(group_id), len(wetload_tn), len(wetload_tp))
        latitude = latitude[:min_length]
        longitude = longitude[:min_length]
        time = time[:min_length]
        group_id = group_id[:min_length]
        wetload_tn = wetload_tn[:min_length]
        # wetload_tp = wetload_tp[:min_length]

        # Step 6: Create a pandas DataFrame with the extracted data
        df = pd.DataFrame({
            'latitude': latitude,
            'longitude': longitude,
            'time': time,
            'group_id': group_id,
            'WetLoad_TN_kg2': wetload_tn,
            'WetLoad_TP_kg2': wetload_tp
        })

        # Step 7: Adjust longitude values if necessary
        df.loc[df['longitude'] > 0, 'longitude'] = df.loc[df['longitude'] > 0, 'longitude'] - 360

        # Step 8: Create the geometry column using longitude and latitude
        geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
        df['geometry'] = geometry

        # Close the NetCDF dataset
        dataset.close()

        return df

    except Exception as e:
        print(f"Error processing file {file}: {e}")
        return None

# Process all the files one by one
for file in files:
    df = process_file(file)
    if df is not None:
        df_list.append(df)
    else:
        print(f"File {file} returned no data.")

    # Manually free memory
    gc.collect()

# Step 9: Check if there are any DataFrames in the list
if len(df_list) > 0:
    # Concatenate all DataFrames into one
    final_df = pd.concat(df_list, ignore_index=True)

    # Debug: Check the number of unique group_ids extracted across all files
    unique_group_ids = final_df['group_id'].unique()
    print(f"Total unique group_id values found across all files: {len(unique_group_ids)}")
    print(f"Unique group_ids across all files: {unique_group_ids}")

    # Step 10: Convert the pandas DataFrame to a GeoDataFrame
    gdf = gpd.GeoDataFrame(final_df, geometry='geometry')

    # Step 11: Set the Coordinate Reference System (CRS) to WGS 84 (EPSG:4326)
    gdf.set_crs(epsg=4326, inplace=True)

    # Step 12: Shorten column names to fit within Shapefile's 10-character limit
    gdf.rename(columns={
        'WetLoad_TN_kg2': 'TN_Load',
        'WetLoad_TP_kg2': 'TP_Load',
        'geometry': 'geom'
    }, inplace=True)

    # Step 13: Set the active geometry column to 'geom' after renaming
    gdf = gdf.set_geometry('geom')

    # Step 14: Convert the 'time' column to string format to avoid issues with datetime in shapefiles
    gdf['time'] = gdf['time'].astype(str)

    # Step 15: Check again for unique group_ids and verify data integrity
    unique_group_ids = gdf['group_id'].unique()
    print(f"Total unique group_ids in GeoDataFrame: {len(unique_group_ids)}")
    print(f"Unique group_ids in GeoDataFrame: {unique_group_ids}")

    # Step 16: Iterate over each unique group_id and create individual GeoDataFrames
    for gid in unique_group_ids:
        # Filter rows for the current group_id and select specific columns
        subset = gdf[gdf['group_id'] == gid][['time','group_id','longitude', 'latitude', 'TN_Load', 'TP_Load','geom']]

        # Create a GeoDataFrame for the current group_id
        group_gdf = gpd.GeoDataFrame(subset, geometry='geom', crs=gdf.crs)

        # Save each group GeoDataFrame as a shapefile or GeoJSON with shortened column names
        #group_gdf.to_file(f"group_{gid}_data.shp")

    print("Individual GeoDataFrames with geometry and shortened column names created successfully for each group_id.")
    
    
    

Error processing file /home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Jan__1.nc: MFNetCDF4 only works with NETCDF3_* and NETCDF4_CLASSIC formatted files, not NETCDF4
File /home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Jan__1.nc returned no data.
Error processing file /home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Feb__2.nc: MFNetCDF4 only works with NETCDF3_* and NETCDF4_CLASSIC formatted files, not NETCDF4
File /home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Feb__2.nc returned no data.
Error processing file /home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Mar__3.nc: MFNetCDF4 only works with NETCDF3_* and NETCDF4_CLASSIC formatted files, not NETCDF4
File /home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/updated_Fvcome_h

In [12]:
# print the lenght of the group_id in group_gdfs
print (group_gdf.tail())



                     time  group_id  longitude   latitude       TN_Load  \
1330  2023-12-30 00:00:00         3 -84.230347  45.641350  1.767471e-08   
1331  2023-12-30 06:00:00         3 -84.226349  45.641350  1.767471e-08   
1332  2023-12-30 12:00:00         3 -84.242371  45.643349  1.767471e-08   
1333  2023-12-30 18:00:00         3 -84.228363  45.639351  1.767471e-08   
1334  2023-12-31 00:00:00         3 -84.238373  45.641350  1.767471e-08   

           TP_Load                        geom  
1330  1.156378e-09  POINT (-84.23035 45.64135)  
1331  1.156378e-09  POINT (-84.22635 45.64135)  
1332  1.156378e-09  POINT (-84.24237 45.64335)  
1333  1.156378e-09  POINT (-84.22836 45.63935)  
1334  1.156378e-09  POINT (-84.23837 45.64135)  


#### Part2-2: Create a geodataframe from particle tracking outputs

In [None]:
# Convert the dataset to a DataFrame
df = datasets[['latitude', 'longitude', 'group_id', 'time','WetLoad_TN_kg2','WetLoad_TP_kg2']].to_dataframe().reset_index()

# Ensure 'time' is in the DataFrame
if 'time' not in df.columns:
    raise KeyError("The 'time' column is missing from the DataFrame.")
# if lon > 0, then longitude = longitude - 360
df.loc[df['longitude'] > 0, 'longitude'] -= 360
# Inspect the DataFrame to check alignment
print(df.head(100))

In [None]:
# Initialize an empty GeoDataFrame with time as the index
time_index = pd.to_datetime(final_df['time'].unique())
gdf = gpd.GeoDataFrame(index=time_index)

In [None]:
# Initialize a list to store individual GeoDataFrames for each group_id
gdfs = []

unique_group_ids = df['group_id'].unique()

for gid in unique_group_ids:
    # Filter rows for the current group_id
    subset = df[df['group_id'] == gid][['time', 'longitude', 'latitude', 'group_id']]
    # if longitude is greater than > -0 convert it to longitude - 360
    subset.loc[subset['longitude'] > -0, 'longitude'] = subset.loc[subset['longitude'] > -0, 'longitude'] - 360
    # Ensure 'time' column is datetime
    subset['time'] = pd.to_datetime(subset['time'])
    subset['time'] = subset['time'].astype(str)
    # Create a GeoDataFrame for the subset
    subset_gdf = gpd.GeoDataFrame(subset, geometry=gpd.points_from_xy(subset.longitude, subset.latitude))
    
    # Append the subset GeoDataFrame to the list
    gdfs.append(subset_gdf)

# Combine all individual GeoDataFrames into one
combined_gdf = pd.concat(gdfs)

# Reset the index to time
combined_gdf.set_index('time', inplace=True)

# Ensure combined_gdf is correctly indexed and includes group_id
combined_gdf['group_id'] = combined_gdf['group_id']

# Set the GeoDataFrame's geometry
combined_gdf['geometry'] = combined_gdf.apply(lambda row: row.geometry, axis=1)

# Drop any columns that are not needed
combined_gdf = combined_gdf.drop(columns=[col for col in combined_gdf.columns if col.startswith('geometry_') and col != 'geometry'])
# convert datetime field to string 
#gdf['time'] = gdf.index.strftime('%Y-%m-%d %H:%M:%S')
# Ensure the final GeoDataFrame is correct
print(combined_gdf.tail(100))

#### Part2-3: save the geodataframe as a shapefile

In [None]:
# Save the GeoDataFrame to a shapefile called combined_gdf_Jan.shp
#combined_gdf_Feb = combined_gdf.copy()
combined_gdf.to_file('/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/Extracted_latlongeodataframe_Pylag/particle_tracking_Jan.shp')

In [None]:
# save the geodataframe to a shapefile
combined_gdf.to_file('/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/Extracted_latlongeodataframe_Pylag/particle_tracking_Feb.shp')

In [None]:
# save the data frame to a csv file
combined_gdf.to_csv('/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/Extracted_latlongeodataframe_Pylag/particle_tracking_Mar.csv')

#### Part2-4: Plotting Particle Tracing versus Group ID

In [None]:
import matplotlib.pyplot as plt

# Create a figure and axis with a specified size
fig, ax = plt.subplots(1, 1, figsize=(12, 18))

# Plot each group_id with a different color
for gid in unique_group_ids:
    subset = combined_gdf[combined_gdf['group_id'] == gid]
    subset.plot(ax=ax, marker='>', label=f'Group {gid}', markersize=2)

# Add a legend with a specified font size
#plt.legend(fontsize=15)

# Add title and labels with specified font sizes
#plt.title('Particle Trajectories along the Huron Jan', fontsize=20)
plt.xlabel('Longitude', fontsize=15)
plt.ylabel('Latitude', fontsize=15)

# Add grid and background color
plt.grid(True)
plt.gca().set_facecolor('grey')

# Save the figure
#plt.savefig('/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/results/particle_tracking_Jan.png')

# Show the plot
plt.show()

In [None]:
# Step 18: Plot the data for visualization
fig, ax = plt.subplots(figsize=(10, 10))
for gid in unique_group_ids:
    subset = gdf[gdf['group_id'] == gid]
    if not subset.empty:
        subset.plot(ax=ax, marker='>', label=f'Group {gid}', markersize=2)
    
    # Set the aspect ratio manually
ax.set_aspect('auto')

    # Add a legend and labels
plt.legend(fontsize=15)
plt.title('Particle Trajectories along the Huron Jan', fontsize=20)
plt.xlabel('Longitude', fontsize=15)
plt.ylabel('Latitude', fontsize=15)

    # Show the plot
plt.show()
#else:
    #print("No valid DataFrames were created from the NetCDF files.")

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, LogNorm
from netCDF4 import Dataset
import cartopy.crs as ccrs
from pylag.processing.plot import FVCOMPlotter
from pylag.processing.plot import create_figure, colourmap
# Define a list of pink shades for the colormap
pink_shades = ['#fff5f7', '#ffebf0', '#ffd6e1', '#ffbfd4', '#ff99c1', '#ff6ea9', '#ff4c92', '#ff2171', '#b50d4e']
# Create a custom colormap
pink_cmap = LinearSegmentedColormap.from_list('custom_pink', pink_shades)

# Define a list of blue shades for the colormap and reverse it
blue_shades = ['#f7fbff', '#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#084594']
blue_shades_reversed = blue_shades[::-1]
blue_cmap_reversed = LinearSegmentedColormap.from_list('custom_blue_reversed', blue_shades_reversed)

# Define a list of green shades for the colormap
green_shades = ['#e8f5e9', '#c8e6c9', '#a5d6a7', '#81c784', '#66bb6a', '#4caf50', '#43a047', '#388e3c', '#2e7d32']
# Create a custom colormap
green_cmap = LinearSegmentedColormap.from_list('custom_green', green_shades)

# Create a custom colormap
blue_cmap_reversed = LinearSegmentedColormap.from_list('custom_blue_reversed', blue_shades_reversed)
font_size = 15
cmap = colourmap('h_r')

# Create the figure and axis with FVCOM plotter
fig, ax = create_figure(figure_size=(26.,26.),projection=ccrs.PlateCarree(), font_size=font_size, bg_color='gray')

# Load bathymetry data
grid_metrics_file_name = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/input/gridfile/grid_metrics_huron_senseflux_Seasonal_winter.nc'
ds = Dataset(grid_metrics_file_name, 'r')
bathy = -ds.variables['h'][:]
ds.close()
# Configure plotter
plotter = FVCOMPlotter(grid_metrics_file_name,
                       geographic_coords=True,
                       font_size=font_size)

# Plot bathymetry
#extents = np.array([-84.10,-84.20, 45.58,45.65], dtype=float)
#extensts = np.array([-84,-81.3, 43,46], dtype=float)
#Lake Huron Ausable Point
extents = np.array([275, 277.69, 43, 46.3], dtype=float)
ax, plot = plotter.plot_field(ax, bathy, extents=extents, add_colour_bar=True, cb_label='Depth (m)',
                              cmap=blue_cmap_reversed, zorder= 0, vmin=-60, vmax=0)

# Overlay grid
plotter.draw_grid(ax, linewidth=1.0)

# Plot each group_id with a different color
for gid in unique_group_ids:
    subset = combined_gdf[combined_gdf['group_id'] == gid]
    subset.plot( ax = ax, marker='<', label=f'Group {gid}', markersize=2, zorder=40)
    plt.savefig('/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/results/particle_tracking_Jan.jpeg', dpi=300)

# Part3: Find the Intersection of Coastal Wetland and Particle Tracking

### Part 3-1: Read the shapefiles for coastal wetlands, Lake shore line buffer, particle tracking geo data frame

In [None]:
# path to the shapefile
data_dir = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/results/shapefiles'
path = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output'
#buffer_lh = gpd.read_file(os.path.join(data_dir, 'lh_shore_ESRI_100k_Buffer1000m_NAD1983.shp'))
# I created the buffer shapefile with 240 meter mainly because littoral zone is around 200 meter from the shore and
# we are mostly interested in the littoral zone
# buffer shapefile
buffer_lh = gpd.read_file(os.path.join(data_dir, 'lh_shore_ESRI_100k_Buffer240m_NAD1983_US.shp'))
# coastal wetlands shapefile
CW = gpd.read_file(os.path.join(data_dir, 'wetland_connected_avg_inundation_Albers.shp'))
# particle tracking shapefile
par_jan = gpd.read_file(os.path.join(path, 'combined_gdf_Jan.shp'))
par_Feb = gpd.read_file(os.path.join(path, 'combined_gdf_Feb.shp'))
par_Mar = gpd.read_file(os.path.join(path, 'combined_gdf_Mar.shp'))

In [None]:
# convert the shapefile to a geodataframe
buffer_gdf = gpd.GeoDataFrame(buffer_lh)
CW_geo = gpd.GeoDataFrame(CW) 

In [None]:
# convert the shapefile to a geodataframe
combined_gdf_Jan = gpd.GeoDataFrame(par_jan)
combined_gdf_Feb = gpd.GeoDataFrame(par_Feb)
combined_gdf_Mar = gpd.GeoDataFrame(par_Mar)

### Part3-2: Convert the geometry of shapefiles 

In [None]:
# Ensure the geodataframe is the same coordinates which is 3174 for Great Lakes Albers
# Set the original CRS of the GeoDataFrames because they particle tracking is lat/lon we first set it to 4326
combined_gdf_Jan = combined_gdf_Jan.set_crs(epsg=4326) # particle data
combined_gdf_Feb = combined_gdf_Feb.set_crs(epsg=4326) # particle data
combined_gdf_Mar = combined_gdf_Mar.set_crs(epsg=4326) # particle data

In [None]:
# to have the Great Lake Albers projection we re project it to Great lakes Albers
combined_gdf_Jan = combined_gdf_Jan.to_crs(epsg=3174)
combined_gdf_Feb = combined_gdf_Feb.to_crs(epsg=3174)
combined_gdf_Mar = combined_gdf_Mar.to_crs(epsg=3174)

In [None]:
# print the CRS of CW_geo
print("Coastal wetland", CW_geo.crs)
print ("Buffer", buffer_gdf.crs)
print("combined_gdf_Jan", combined_gdf_Jan.crs)
print("combined_gdf_Feb", combined_gdf_Feb.crs)
print("combined_gdf_Mar", combined_gdf_Mar.crs)

### Part 3-3: Reading Grid file from Pylag 

In [None]:
import os
#data_dir = '/home/samin/data/FVCOME_OUTPUT/Gldata/FVCOMEDATA/202301'.format(os.environ['HOME'])
data_dir='/home/abolmaal/data/FVCOME_OUTPUT/Gldata/FVCOMEDATA/202301'.format(os.environ['HOME']) 
# Create run directory
cwd = os.getcwd()
# Create run directory
simulation_dir = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron'.format(cwd)
try:
    os.makedirs(simulation_dir)
except FileExistsError:
    pass

# Create input sub-directory
input_dir = '{}/input'.format(simulation_dir)
try:
    os.makedirs(input_dir)
except FileExistsError:
    pass
grid_metrics_file_name = f'{input_dir}/gridfile/grid_metrics_huron_senseflux_Seasonal_winter.nc'

### Part 3-4: Find the intersection of polygon and Point 

In the Following section we will use geopandas spatial join to find the intersection for the particle tracking out put for:
1- between the lh_shore_ESRI_100k_Buffer1000m_WGS84 and particle tracking
2- Between wetland_connected_avg_Buffer (which is a 500 meter buffer for the coastal wetland) with Particle tracking 

### Find the average number of trajectories entering the shore line for the fisrt three month of Winter

### Calculating the average particles enter each polygone for the fisrt time calculate the average number of particles enter to the shoreline buffer and print how many percent of particles from the total trajectories remain in the shoreline

 to calculate how many of all the points first interact with the shoreline, we need to consider all individual points, not just those grouped by group_id. We'll determine the earliest interaction for each point within the points_within_shoreline DataFrame, and then calculate how many of these points first interact with the shoreline.

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, LogNorm

# Load your monthly GeoDataFrames
monthly_data = {
    'Jan': combined_gdf_Jan,
    'Feb': combined_gdf_Feb,
    'Mar': combined_gdf_Mar,
    # Add more months as needed
}



# DataFrame to store particle counts for each polygon across all months
all_particle_counts = pd.DataFrame(columns=['index_right', 'count'])

# Define a list of blue shades for the colormap
blue_shades = ['#f7fbff', '#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#084594']

# Create a custom colormap
blue_cmap = LinearSegmentedColormap.from_list('custom_blue', blue_shades)

total_particles_intersecting_buffer = 0
total_points_first_interaction = 0

# Iterate over each month's data
for month, combined_gdf in monthly_data.items():
    # Ensure 'time' is a column and convert to datetime
    if 'time' in combined_gdf.index.names:
        combined_gdf.reset_index(inplace=True)
    combined_gdf['time'] = pd.to_datetime(combined_gdf['time'])
    
    # Perform spatial join to find which points fall within the polygons
    points_within_shoreline = gpd.sjoin(combined_gdf, buffer_gdf[['geometry']], how='inner', predicate='intersects')
    
    # Ensure 'time' is a column in the intersections and convert to datetime
    if 'time' in points_within_shoreline.index.names:
        points_within_shoreline.reset_index(inplace=True)
    points_within_shoreline['time'] = pd.to_datetime(points_within_shoreline['time'])
    
    # Sort the intersection by time to find the first intersection for all particles
    points_within_shoreline = points_within_shoreline.sort_values(by='time')
    
    # Calculate the total number of points that intersect the shoreline buffer
    total_points_first_interaction += points_within_shoreline.drop_duplicates(subset=['geometry']).shape[0]
    
    # Calculate the total number of particles intersecting the buffer
    total_particles_intersecting_buffer += points_within_shoreline['group_id'].nunique()
    
# Calculate the average number of particles entering the shoreline buffer
average_particles_entering_buffer = total_particles_intersecting_buffer / len(monthly_data)

# Calculate the percentage of points that first interact with the shoreline buffer
#percentage_points_first_interaction = (total_points_first_interaction / total_particles_intersecting_buffer) 

print(f'Average number of particles entering the shoreline buffer: {average_particles_entering_buffer}')
print(f'Percentage of points that first interact with the shoreline buffer: {percentage_points_first_interaction:.2f}%')

# Plot the average number of particles entering the shoreline buffer
fig, ax = plt.subplots(figsize=(10, 8))
# Create the hexbin plot
hb = ax.hexbin(points_within_shoreline['longitude'], points_within_shoreline['latitude'], gridsize=100, cmap=blue_cmap, norm=LogNorm())

# Set the title of the colormap to the average number of particles that fall within the polygons
ax.set_title('Average number of trajectories returning to shoreline over the winter of 2023')

# Add a colorbar
cbar = fig.colorbar(hb)
cbar.set_label('Average Number of Trajectories')
plt.gca().set_facecolor('lightsteelblue')

# Show the plot
plt.show()


In [None]:
buffer_gdf

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, LogNorm
import numpy as np
from pylag.processing.plot import FVCOMPlotter, create_figure, colourmap
import cartopy.crs as ccrs
from netCDF4 import Dataset
from pylag.processing.utils import get_grid_bands

# Load your monthly GeoDataFrames
monthly_data = {
    'Jan': combined_gdf_Jan,
    'Feb': combined_gdf_Feb,
    'Mar': combined_gdf_Mar,
    # Add more months as needed
}


# Ensure all GeoDataFrames use the same CRS
buffer_gdf_crs = buffer_gdf.crs

# DataFrame to store particle counts for each polygon across all months
all_particle_counts = pd.DataFrame(columns=['index_right', 'count'])

# Define a list of pink shades for the colormap
pink_shades = ['#fff5f7', '#ffebf0', '#ffd6e1', '#ffbfd4', '#ff99c1', '#ff6ea9', '#ff4c92', '#ff2171', '#b50d4e']
# Create a custom colormap
pink_cmap = LinearSegmentedColormap.from_list('custom_pink', pink_shades)

# Define a list of blue shades for the colormap and reverse it
blue_shades = ['#f7fbff', '#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#084594']
blue_shades_reversed = blue_shades[::-1]
# Create a custom colormap
blue_cmap_reversed = LinearSegmentedColormap.from_list('custom_blue_reversed', blue_shades_reversed)


total_particles_intersecting_buffer = 0
total_particles_tracked = 0
points_within_shoreline_list = []
total_particles_list = []

# Store the hexbin data for each month to calculate the average
all_bin_counts = []

# Collect all coordinates for a consistent hexbin grid
all_coords = []

# Iterate over each month's data
for month, combined_gdf in monthly_data.items():
    # Ensure 'time' is a column and convert to datetime
    if 'time' in combined_gdf.index.names:
        combined_gdf.reset_index(inplace=True)
    combined_gdf['time'] = pd.to_datetime(combined_gdf['time'])
    
    # Perform spatial join to find which points fall within the coastal buffer
    points_within_shoreline = gpd.sjoin(combined_gdf, buffer_gdf[['geometry']], how='inner', predicate='intersects')
    
    # Ensure 'time' is a column in the intersections and convert to datetime
    if 'time' in points_within_shoreline.index.names:
        points_within_shoreline.reset_index(inplace=True)
    points_within_shoreline['time'] = pd.to_datetime(points_within_shoreline['time'])
    
    # Sort the intersection by time to find the first intersection for all particles
    points_within_shoreline = points_within_shoreline.sort_values(by='time')
    
    # Create a GeoDataFrame for the first intersections using the same CRS
    geometry = gpd.points_from_xy(points_within_shoreline['longitude'], points_within_shoreline['latitude'], crs=buffer_gdf_crs)
    first_intersections_shoreline = gpd.GeoDataFrame(points_within_shoreline, geometry=geometry, crs=buffer_gdf_crs)
    
    # Collect coordinates for hexbin grid
    all_coords.extend(zip(first_intersections_shoreline.geometry.x, first_intersections_shoreline.geometry.y))

font_size = 15
cmap = colourmap('h_r')

# Create the figure and axis with FVCOM plotter
fig, ax = create_figure(figure_size=(26., 26.), projection=ccrs.PlateCarree(), font_size=font_size, bg_color='gray')

# Load bathymetry data

ds = Dataset(grid_metrics_file_name, 'r')
bathy = -ds.variables['h'][:]
ds.close()

# Configure plotter
plotter = FVCOMPlotter(grid_metrics_file_name, geographic_coords=True, font_size=font_size)
extents = np.array([275, 277.69, 43, 46.3], dtype=float )
ax, plot = plotter.plot_field(ax, bathy, extents=extents, add_colour_bar=True, cb_label='Depth (m)', vmin=-60., vmax=0., cmap=blue_cmap_reversed, zorder= 0)

# Overlay grid= 
plotter.draw_grid(ax, linewidth=1.0)

# Create a hexbin plot to get the consistent grid
hb = ax.hexbin(*zip(*all_coords), gridsize=100, cmap=pink_cmap, norm=LogNorm())
hexbin_grid = hb.get_offsets()

# Initialize an array to accumulate bin counts
accumulated_bin_counts = np.zeros(len(hexbin_grid))

# Iterate over each month's data again to accumulate counts
for month, combined_gdf in monthly_data.items():
    # Ensure 'time' is a column and convert to datetime
    if 'time' in combined_gdf.index.names:
        combined_gdf.reset_index(inplace=True)
    combined_gdf['time'] = pd.to_datetime(combined_gdf['time'])
    
    # Perform spatial join to find which points fall within the coastal buffer
    points_within_shoreline = gpd.sjoin(combined_gdf, buffer_gdf[['geometry']], how='inner', predicate='intersects')
    
    # Ensure 'time' is a column in the intersections and convert to datetime
    if 'time' in points_within_shoreline.index.names:
        points_within_shoreline.reset_index(inplace=True)
    points_within_shoreline['time'] = pd.to_datetime(points_within_shoreline['time'])
    
    # Sort the intersection by time to find the first intersection for all particles
    points_within_shoreline = points_within_shoreline.sort_values(by='time')
    
    # Create a GeoDataFrame for the first intersections using the same CRS
    geometry = gpd.points_from_xy(points_within_shoreline['longitude'], points_within_shoreline['latitude'], crs=buffer_gdf_crs)
    first_intersections_shoreline = gpd.GeoDataFrame(points_within_shoreline, geometry=geometry, crs=buffer_gdf_crs)
    
    # Create a hexbin plot with the consistent grid and accumulate counts
    hb = ax.hexbin(first_intersections_shoreline.geometry.x, first_intersections_shoreline.geometry.y, gridsize=100, cmap=pink_cmap, norm=LogNorm(), reduce_C_function=np.sum, zorder=40)
    bin_counts = hb.get_array()
    for i, count in enumerate(bin_counts):
        accumulated_bin_counts[i] += count
    
    # Calculate the total number of particles that intersect the shoreline buffer
    total_particles_intersecting_buffer += first_intersections_shoreline['group_id'].nunique()
    
    # Calculate the total number of particles tracked in the month
    total_particles_tracked += combined_gdf['group_id'].nunique()
    total_particles_list.append(combined_gdf.shape[0])
    points_within_shoreline_list.append(first_intersections_shoreline.shape[0])

# Calculate the average bin counts
average_bin_counts = accumulated_bin_counts / len(monthly_data)

# Calculate the average number of particles entering the shoreline buffer
average_particles_entering_shoreline = total_particles_intersecting_buffer / len(monthly_data)

# Calculate the percentage of particles from the total trajectories that remain in the shoreline buffer
percentage_particles_remain_in_shoreline = (total_particles_intersecting_buffer / total_particles_tracked) * 100

print(f'Average number of particles entering the shoreline: {average_particles_entering_shoreline}')
print(f'Percentage of particles from total trajectories that remain in the shoreline: {percentage_particles_remain_in_shoreline:.2f}%')

# Update the hexbin plot with average bin counts
hb = ax.hexbin(*zip(*hexbin_grid), gridsize=100, C=average_bin_counts, cmap=pink_cmap, norm=LogNorm())

# Print the average number of particles in each bin
print("Average number of particles in each bin:")
for i, count in enumerate(average_bin_counts):
    print(f"Bin {i}: {count:.2f} particles")

# Set the title of the colormap to the average number of particles that fall within the polygons
#ax.set_title('Average number of particles returning to the shoreline over Winter 2023')

# Add x and y labels
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

# Add a colorbar
cbar = fig.colorbar(hb, ax=ax, pad=0.09)
cbar.set_label('Average Number of Particles in each bin', fontsize=15)

# Adjust layout to make space for colorbar
plt.subplots_adjust(right=1)

# Add legend
plt.legend()

# Show the plot
plt.show()

#### Part 3-3-1: In this part we are going to calculate when the particles first hit the shoreline

In [None]:
# print the crs of ds
ds = Dataset(grid_metrics_file_name, 'r')
bathy = -ds.variables['h'][:]
ds.close()

In [None]:
grid_metrics_file_name

In [None]:
# print the points_within_shoreline columns
print(points_within_shoreline.columns)


In [None]:
# sort intersections by group_id and time
points_within_shoreline = points_within_shoreline.sort_values(by=['group_id', 'time'])

In [None]:
# Group by group_id and then find the fosrt intersection for each group 
# this willl keep only thr fisrt intersection for each group at each time step
first_intersections = points_within_shoreline.groupby(['group_id','time']).first().reset_index()

In [None]:
first_intersections

In [None]:
# save the earliest_intersection to a csv file
first_intersections.to_csv('/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/output/earliest_intersections.csv')

In [None]:
#Print the points_within_shoreline with group_id 0
points_within_shoreline[points_within_shoreline['group_id'] == 2]
# Plot the points_within_shoreline with group_id 0
points_within_shoreline[points_within_shoreline['group_id'] == 2].plot()


In [None]:
# Count the number of points that fall within the polygons
num_points_within_shoreline = points_within_shoreline

# Print the resulting GeoDataFrame
print(f"Number of points within shoreline: {num_points_within_shoreline.shape[0]}")
# print the total number of points from the combined_gdf
print(f"Total number of particle points: {combined_gdf.shape[0]}")
print(f"points within shoreline: {num_points_within_shoreline}")

# print the combined_gdf group_id that fall within the polygons with Watershed
print(points_within_shoreline[['longitude', 'latitude', 'group_id']])

In [None]:
# Count the number of points that fall within the polygons
num_points_within_shoreline = earliest_intersection

# Print the resulting GeoDataFrame
print(f"Number of points within shoreline: {num_points_within_shoreline.shape[0]}")
# print the total number of points from the combined_gdf
print(f"Total number of particle points: {combined_gdf.shape[0]}")
print(f"points within shoreline: {num_points_within_shoreline}")

# print the combined_gdf group_id that fall within the polygons with Watershed
print(points_within_shoreline[['longitude', 'latitude', 'group_id']])

In [None]:
# make a bar chart that shows the number of points that fall within the polygons and the total number of points
# Create a bar chart and make the number of points log scale
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
plt.bar(['Total Trajectories', 'Trajectories within Shoreline'], [combined_gdf.shape[0], num_points_within_shoreline.shape[0]])
plt.ylabel('Total number of trajectories')
plt.title('Number of trajectories within Shoreline Jan 2023')
plt.show()

In [None]:
#make a seaborn  plot showing all the data points vesus the points that fall within the polygons
import seaborn as sns
import matplotlib.pyplot as plt

# Create a plot
fig, ax = plt.subplots(1, 1, figsize=(20, 20))
sns.scatterplot(data=combined_gdf, x='longitude', y='latitude', hue='group_id', palette='tab20', ax=ax)
sns.scatterplot(data=points_within_shoreline, x='longitude', y='latitude', hue='group_id', palette='tab20', ax=ax, marker='x', s=100)

# Add a title
ax.set_title('DParticle tracking Points vs Points Within Polygons')

# Adjust the legend to have two columns
plt.legend(loc='upper left', ncol=2)

plt.show()

### part 3-5: Finding the intersection of particle tracking and coastal Wetlands 

In [None]:
# Add the Great lake shapefile 
#basin = gpd.read_file('/home/abolmaal/data/FVCOME_OUTPUT/shapefiles/Basins/glbasins_gen.shp')
lake = gpd.read_file('/home/abolmaal/data/FVCOME_OUTPUT/shapefiles/Basins/hydro_p_LakeHuron/hydro_p_LakeHuron_84.shp')
# print the basin and lake crs
#print(basin.crs)
print(lake.crs)

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, LogNorm

# Load your monthly GeoDataFrames
monthly_data = {
    'Jan': combined_gdf_Jan,
    'Feb': combined_gdf_Feb,
    'Mar': combined_gdf_Mar,
    # Add more months as needed
}

# Coastal wetland data


# DataFrame to store particle counts for each polygon across all months
all_particle_counts = pd.DataFrame(columns=['index_right', 'count'])

# Define a list of blue shades for the colormap
blue_shades = ['#f7fbff', '#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#084594']
# Create a custom colormap
blue_cmap = LinearSegmentedColormap.from_list('custom_blue', blue_shades)

total_particles_intersecting_CW = 0
total_particles_tracked = 0
points_within_CW_list = []
total_particles_list = []

# Iterate over each month's data
for month, combined_gdf in monthly_data.items():
    # Ensure 'time' is a column and convert to datetime
    if 'time' in combined_gdf.index.names:
        combined_gdf.reset_index(inplace=True)
    combined_gdf['time'] = pd.to_datetime(combined_gdf['time'])
    
    # Perform spatial join to find which points fall within the coastal wetland
    points_within_CW = gpd.sjoin(combined_gdf, CW_geo[['geometry']], how='inner', predicate='intersects')
    
    # Ensure 'time' is a column in the intersections and convert to datetime
    if 'time' in points_within_CW.index.names:
        points_within_CW.reset_index(inplace=True)
    points_within_CW['time'] = pd.to_datetime(points_within_CW['time'])
    
    # Sort the intersection by time to find the first intersection for all particles
    points_within_CW= points_within_CW.sort_values(by='time')
    
    # Get the first intersection for all particles
    first_intersections_CW = points_within_CW.groupby('geometry').first().reset_index()
    
    # Count the number of particles entering each polygon (shoreline area)
    particle_counts = first_intersections_CW['index_right'].value_counts().reset_index()
    particle_counts.columns = ['index_right', 'count']
    
    # Calculate the total number of particles that intersect the shoreline buffer
    total_particles_intersecting_CW += first_intersections_CW['group_id'].nunique()
    
    # Calculate the total number of particles tracked in the month
    total_particles_tracked += combined_gdf['group_id'].nunique()
    total_particles_list.append(combined_gdf.shape[0])
    points_within_CW_list.append(first_intersections_CW.shape[0])
# Calculate the average number of particles entering the shoreline buffer
average_particles_entering_CW = total_particles_intersecting_CW / len(monthly_data)

# Calculate the percentage of particles from the total trajectories that remain in the shoreline buffer
percentage_particles_remain_in_CW = (total_particles_intersecting_CW / total_particles_tracked) 

print(f'Average number of particles entering the Coastal Wetland: {average_particles_entering_CW}')
print(f'Percentage of particles from total trajectories that remain in the Coastal Wetland: {percentage_particles_remain_in_CW:.2f}%')


# Plot the average number of particles entering the coastal wetland
fig, ax = plt.subplots(figsize=(10, 8))

# Create the hexbin plot
hb = ax.hexbin(first_intersections_CW['longitude'], first_intersections_CW['latitude'], gridsize=30, cmap=blue_cmap, norm=LogNorm())
#plot the buffer_gdf
#buffer_gdf.boundary.plot(ax=ax, edgecolor='black')
# plot the lake_gdf based on latitude and longitude in the geoDataFrame
  
# Set the title of the colormap to the average number of particles that fall within the polygons
ax.set_title('Average number of particles returning to the coastal wetland over Winter 2023')

# Add a colorbar
cbar = fig.colorbar(hb)
cbar.set_label('Average Number of Particles')
plt.gca().set_facecolor('gainsboro')
plt.savefig('/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/results/average_number_of_particles_entering_coastal_wetland.png')
# Show the plot
plt.show()


In [None]:
import os
#data_dir = '/home/samin/data/FVCOME_OUTPUT/Gldata/FVCOMEDATA/202301'.format(os.environ['HOME'])
data_dir='/home/abolmaal/data/FVCOME_OUTPUT/Gldata/FVCOMEDATA/202301'.format(os.environ['HOME']) 
# Create run directory
cwd = os.getcwd()
# Create run directory
simulation_dir = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron'.format(cwd)
try:
    os.makedirs(simulation_dir)
except FileExistsError:
    pass

# Create input sub-directory
input_dir = '{}/input'.format(simulation_dir)
try:
    os.makedirs(input_dir)
except FileExistsError:
    pass
grid_metrics_file_name = f'{input_dir}/gridfile/grid_metrics_huron_senseflux_Seasonal_winter.nc'


In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, LogNorm
import numpy as np
from pylag.processing.plot import FVCOMPlotter
from pylag.processing.plot import create_figure, colourmap
import cartopy.crs as ccrs
from netCDF4 import Dataset
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from pylag.regrid import regridder
from pylag.processing.ncview import Viewer
from pylag.processing.plot import FVCOMPlotter
from pylag.processing.plot import create_figure
from pylag.processing.utils import get_grid_bands

# Load your monthly GeoDataFrames
monthly_data = {
    'Jan': combined_gdf_Jan,
    'Feb': combined_gdf_Feb,
    'Mar': combined_gdf_Mar,
    # Add more months as needed
}
# Ensure all GeoDataFrames use the same CRS
CW_geo_crs = CW_geo.crs


# DataFrame to store particle counts for each polygon across all months
all_particle_counts = pd.DataFrame(columns=['index_right', 'count'])

# Define a list of pink shades for the colormap
pink_shades = ['#fff5f7', '#ffebf0', '#ffd6e1', '#ffbfd4', '#ff99c1', '#ff6ea9', '#ff4c92', '#ff2171', '#b50d4e']
# Create a custom colormap
pink_cmap = LinearSegmentedColormap.from_list('custom_pink', pink_shades)
# Define a list of blue shades for the colormap
# Define a list of blue shades for the colormap
# Define a list of blue shades for the colormap and reverse it
# Define a list of blue shades for the colormap and reverse it
blue_shades = ['#f7fbff', '#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#084594']
blue_shades_reversed = blue_shades[::-1]
# Create a custom colormap
blue_cmap_reversed = LinearSegmentedColormap.from_list('custom_blue_reversed', blue_shades_reversed)



total_particles_intersecting_CW = 0
total_particles_tracked = 0
points_within_CW_list = []
total_particles_list = []

# Store the hexbin data for each month to calculate the average
all_bin_counts = []

# Collect all coordinates for a consistent hexbin grid
all_coords = []

# Iterate over each month's data
for month, combined_gdf in monthly_data.items():
    # Ensure 'time' is a column and convert to datetime
    if 'time' in combined_gdf.index.names:
        combined_gdf.reset_index(inplace=True)
    combined_gdf['time'] = pd.to_datetime(combined_gdf['time'])
    
    # Perform spatial join to find which points fall within the coastal wetland
    points_within_CW = gpd.sjoin(combined_gdf, CW_geo[['geometry']], how='inner', predicate='intersects')
    
    # Ensure 'time' is a column in the intersections and convert to datetime
    if 'time' in points_within_CW.index.names:
        points_within_CW.reset_index(inplace=True)
    points_within_CW['time'] = pd.to_datetime(points_within_CW['time'])
    
    # Sort the intersection by time to find the first intersection for all particles
    points_within_CW = points_within_CW.sort_values(by='time')
    
    # Create a GeoDataFrame for the first intersections using the same CRS
    geometry = gpd.points_from_xy(points_within_CW['longitude'], points_within_CW['latitude'], crs=CW_geo_crs)
    first_intersections_CW = gpd.GeoDataFrame(points_within_CW, geometry=geometry, crs=CW_geo_crs)
    
    # Collect coordinates for hexbin grid
    all_coords.extend(zip(first_intersections_CW.geometry.x, first_intersections_CW.geometry.y))

font_size = 15
cmap = colourmap('h_r')

# Create the figure and axis with FVCOM plotter
fig, ax = create_figure(figure_size=(26., 26.), projection=ccrs.PlateCarree(), font_size=font_size, bg_color='gray')

# Load bathymetry data
grid_metrics_file_name = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/input/gridfile/grid_metrics_huron_senseflux_Seasonal_winter.nc'

ds = Dataset(grid_metrics_file_name, 'r')
bathy = -ds.variables['h'][:]
ds.close()

# Configure plotter
plotter = FVCOMPlotter(grid_metrics_file_name, geographic_coords=True, font_size=font_size)
extents = np.array([275, 277.69, 43, 46.3], dtype=float)
ax, plot = plotter.plot_field(ax, bathy, extents=extents, add_colour_bar=True, cb_label='Depth (m)', vmin=-60., vmax=0., cmap=blue_cmap_reversed,zorder=0)

# Overlay grid
plotter.draw_grid(ax, linewidth=1.0)

# Create a hexbin plot to get the consistent grid
hb = ax.hexbin(*zip(*all_coords), gridsize=100, cmap=pink_cmap, norm=LogNorm())
hexbin_grid = hb.get_offsets()

# Initialize an array to accumulate bin counts
accumulated_bin_counts = np.zeros(len(hexbin_grid))

# Iterate over each month's data again to accumulate counts
for month, combined_gdf in monthly_data.items():
    # Ensure 'time' is a column and convert to datetime
    if 'time' in combined_gdf.index.names:
        combined_gdf.reset_index(inplace=True)
    combined_gdf['time'] = pd.to_datetime(combined_gdf['time'])
    
    # Perform spatial join to find which points fall within the coastal wetland
    points_within_CW = gpd.sjoin(combined_gdf, CW_geo[['geometry']], how='inner', predicate='intersects')
    
    # Ensure 'time' is a column in the intersections and convert to datetime
    if 'time' in points_within_CW.index.names:
        points_within_CW.reset_index(inplace=True)
    points_within_CW['time'] = pd.to_datetime(points_within_CW['time'])
    
    # Sort the intersection by time to find the first intersection for all particles
    points_within_CW = points_within_CW.sort_values(by='time')
    
    # Create a GeoDataFrame for the first intersections using the same CRS
    geometry = gpd.points_from_xy(points_within_CW['longitude'], points_within_CW['latitude'], crs=CW_geo_crs)
    first_intersections_CW = gpd.GeoDataFrame(points_within_CW, geometry=geometry, crs=CW_geo_crs)
    
    # Create a hexbin plot with the consistent grid and accumulate counts
    hb = ax.hexbin(first_intersections_CW.geometry.x, first_intersections_CW.geometry.y, gridsize=100, cmap=pink_cmap, norm=LogNorm(), reduce_C_function=np.sum, zorder = 40)
    bin_counts = hb.get_array()
    for i, count in enumerate(bin_counts):
        accumulated_bin_counts[i] += count
    
    # Calculate the total number of particles that intersect the shoreline buffer
    total_particles_intersecting_CW += first_intersections_CW['group_id'].nunique()
    
    # Calculate the total number of particles tracked in the month
    total_particles_tracked += combined_gdf['group_id'].nunique()
    total_particles_list.append(combined_gdf.shape[0])
    points_within_CW_list.append(first_intersections_CW.shape[0])

# Calculate the average bin counts
average_bin_counts = accumulated_bin_counts / len(monthly_data)

# Calculate the average number of particles entering the shoreline buffer
average_particles_entering_CW = total_particles_intersecting_CW / len(monthly_data)

# Calculate the percentage of particles from the total trajectories that remain in the shoreline buffer
percentage_particles_remain_in_CW = (total_particles_intersecting_CW / total_particles_tracked) * 100

print(f'Average number of particles entering the Coastal Wetland: {average_particles_entering_CW}')
print(f'Percentage of particles from total trajectories that remain in the Coastal Wetland: {percentage_particles_remain_in_CW:.2f}%')

# Update the hexbin plot with average bin counts
hb = ax.hexbin(*zip(*hexbin_grid), gridsize=100, C=average_bin_counts, cmap=pink_cmap, norm=LogNorm(), zorder=40)

# Print the average number of particles in each bin
print("Average number of particles in each bin:")
for i, count in enumerate(average_bin_counts):
    print(f"Bin {i}: {count:.2f} particles")

# Set the title of the colormap to the average number of particles that fall within the polygons
#ax.set_title('Average number of particles returning to the coastal wetland over Winter 2023')

# Add x and y labels
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

# Add a colorbar
cbar = fig.colorbar(hb, ax=ax, pad=0.09)
cbar.set_label('Average Number of Particles in each bin', fontsize=15)

# Adjust layout to make space for colorbar
plt.subplots_adjust(right=1)

# Add legend
plt.legend()

# Show the plot
plt.show()


In [None]:
#Define a list of purple shades
#purple_shades = ['purple', 'violet', 'plum', 'thistle', 'mediumslateblue', 'darkorchid', 'mediumpurple', 'slateblue']
blue_shades = ['#f7fbff', '#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#084594']
#blue_shades= ['royalblue','slateblue', 'mediumslateblue','mediumslateblue', 'mediumpurple','mediumorchid','plum']

# Create a custom colormap
blue_cmap = LinearSegmentedColormap.from_list('custom_blue', blue_shades)
# plot hexbin map of coastal wetland with the color range of purple shades and the name of cmpap is Coastal Wetland density 
plt.figure(figsize=(20, 18))
CW_geo.plot.hexbin(x='Start_Lon', y='Start_Lat', gridsize=30, cmap=blue_cmap,  norm=colors.LogNorm())
plt.gca().set_facecolor('lightsteelblue')
#plt.title('Coastal Wetlands Density')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.savefig('/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/results/CoastalWetlandsDensity.png')
plt.show()
# save the plot to a data_dir directory


In [None]:
# Load bathymetry data
grid_metrics_file_name = f'{input_dir}/gridfile/grid_metrics_huron_senseflux_Seasonal_winter.nc'
ds = Dataset(grid_metrics_file_name, 'r')
ds

In [None]:
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, LogNorm
from netCDF4 import Dataset
import cartopy.crs as ccrs

# Define a list of pink shades for the colormap
pink_shades = ['#fff5f7', '#ffebf0', '#ffd6e1', '#ffbfd4', '#ff99c1', '#ff6ea9', '#ff4c92', '#ff2171', '#b50d4e']
# Create a custom colormap
pink_cmap = LinearSegmentedColormap.from_list('custom_pink', pink_shades)

# Define a list of blue shades for the colormap and reverse it
blue_shades = ['#f7fbff', '#deebf7', '#c6dbef', '#9ecae1', '#6baed6', '#4292c6', '#2171b5', '#084594']
blue_shades_reversed = blue_shades[::-1]

# Define a list of green shades for the colormap
green_shades = ['#e8f5e9', '#c8e6c9', '#a5d6a7', '#81c784', '#66bb6a', '#4caf50', '#43a047', '#388e3c', '#2e7d32']
# Create a custom colormap
green_cmap = LinearSegmentedColormap.from_list('custom_green', green_shades)

# Create a custom colormap
blue_cmap_reversed = LinearSegmentedColormap.from_list('custom_blue_reversed', blue_shades_reversed)
font_size = 15
cmap = colourmap('h_r')

# Create the figure and axis with FVCOM plotter
fig, ax = create_figure( figure_size=(26.,26.),projection=ccrs.PlateCarree(), font_size=font_size, bg_color='gray')

# Load bathymetry data
grid_metrics_file_name = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/input/gridfile/grid_metrics_huron_senseflux_Seasonal_winter.nc'
ds = Dataset(grid_metrics_file_name, 'r')
bathy = -ds.variables['h'][:]
ds.close()
# Configure plotter
plotter = FVCOMPlotter(grid_metrics_file_name,
                       geographic_coords=True,
                       font_size=font_size)

# Plot bathymetry
#extents = np.array([-84.10,-84.20, 45.58,45.65], dtype=float)
#extensts = np.array([-84,-81.3, 43,46], dtype=float)
#Lake Huron Ausable Point
extents = np.array([275, 277.69, 43, 46.3], dtype=float)
ax, plot = plotter.plot_field(ax, bathy, extents=extents, add_colour_bar=True, cb_label='Depth (m)',
                              vmin=-60., vmax=0., cmap=blue_cmap_reversed, zorder = 0)

# Overlay grid
plotter.draw_grid(ax, linewidth=1.0)


# Plot the coastal wetland data with hexbin
hb = ax.hexbin(CW_geo['Start_Lon'], CW_geo['Start_Lat'], gridsize=100, cmap=green_cmap, norm=LogNorm(), zorder=40)

# Add a colorbar
cbar = fig.colorbar(hb, ax=ax, pad=0.09)
cbar.set_label('Coastal Wetland Density',fontsize=15)

# Adjust layout to make space for colorbar
plt.subplots_adjust(right=1)

# Add legend
plt.legend()

# Show the plot
plt.show()
