# Introduction 

- Following Code reads FVCOM particle tracking outputs and get the group_number
- Count the number if group_id in each group
- Add a new column to the data called group_number and add the number of particle in each group_id 
- The order is the first number is group_is,second numbers are particle number 


## Imports Libraries

In [None]:
# Importing libraries
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr


# Input Files

In [None]:
# Set the directory of the FVCOM model outputs
FVCOM_dir = '/home/abolmaal/modelling/FVCOM/Huron/output'


# Helper Functions 

In [7]:
# define the function to sort the files based on the time
def sort_key(file):
    filename = os.path.basename(file)
    number = int(filename.split('__')[1].split('.')[0])
    return number

In [18]:
# Function to load the FVCOMoutputs data get group_id column 
# count the number of particles in each group_id and the add a new column to the netcdf file called group_number 
# and writes the number of particles in each group_id to the group_number column

def write_groupnumber(files, data_dir):
    """
    Updates each NetCDF file by adding a group_number column based on group_id.
    The updated NetCDF files are saved with a new filename prefix in the specified directory.
    
    Parameters:
    - files (list of str): List of paths to NetCDF files to be processed.
    - data_dir (str): Directory where the updated NetCDF files will be saved.
    """
    for file in files:
        # Read the NetCDF file
        ds = xr.open_dataset(file)
        
        # Convert the NetCDF 'group_id' variable to a DataFrame for processing
        netcdf_df = ds['group_id'].to_dataframe().reset_index()
        
        # Step 1: Count occurrences of each unique group_id in the NetCDF file
        netcdf_df['group_number'] = netcdf_df.groupby('group_id').cumcount()
        
        # Step 2: Combine 'group_id' and 'group_number' with leading zeros
        netcdf_df['group_number'] = netcdf_df.apply(
            lambda row: f"{int(row['group_id']):02}{row['group_number']:03}", axis=1
        )
        
        # Step 3: Add the updated group_number column back into the NetCDF dataset
        ds['group_number'] = (('particles'), netcdf_df['group_number'].values)
        
        # Step 4: Save the updated NetCDF file
        output_file_path = os.path.join(data_dir, f"updated_{os.path.basename(file)}")
        ds.to_netcdf(output_file_path)
        
        print(f"Updated NetCDF file saved: {output_file_path}")


# Main Functions

In [19]:
# Load the FVCOM output files and sort them based on the time
files = glob.glob(FVCOM_dir + "/Fvcome_huron_estuary_2023_Winter_*.nc")
files.sort(key=sort_key)
print(files)


['/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Jan__1.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Feb__2.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Mar__3.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Apr__4.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_May__5.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Jun__6.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_July__7.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Aug__8.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Sep__9.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Oct__10.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcome_huron_estuary_2023_Winter_Nov__11.nc', '/home

In [20]:
# Call the function to update the NetCDF files with the group_number column

write_groupnumber(files, FVCOM_dir)


Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Jan__1.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Feb__2.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Mar__3.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Apr__4.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_May__5.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Jun__6.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_July__7.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcome_huron_estuary_2023_Winter_Aug__8.nc
Updated NetCDF file sav

In [None]:
# Make sure the group_number column is added to the NetCDF files and is in right order 
# read the updated NetCDF file
updated_files = glob.glob(FVCOM_dir + "/updated_Fvcome_huron_estuary_2023_Winter_*.nc")
updated_files.sort(key=sort_key)
#ds = xr.open_mfdataset(updated_files, combine='by_coords')

# read the firs file
ds = xr.open_dataset(updated_files[0])
# print the updated group_number variable values
print(ds['group_number'].values)

['00000' '00001' '00002' ... '147045' '147046' '147047']


In [23]:
ds

In [22]:
# Convert the NetCDF 'group_id' and 'group_number' variables to a DataFrame for processing
netcdf_df = ds['group_id'].to_dataframe().reset_index()

# Ensure that 'group_number' is extracted and correctly added
netcdf_df['group_number'] = ds['group_number'].values

# Step 1: Convert 'group_number' to integer if necessary, and format as a 5-digit string with leading zeros
netcdf_df['group_number'] = netcdf_df['group_number'].apply(lambda x: f"{int(x):05}")

# Step 2: Print values for debugging
print("First few rows of netcdf_df:")
print(netcdf_df.head())

# Step 3: Select relevant columns for saving to CSV
netcdf_df = netcdf_df[['group_id', 'group_number']]

# Save the result as a CSV file
netcdf_df.to_csv(os.path.join(FVCOM_dir, 'group_id_group_number.csv'), index=False)

print("CSV file saved with formatted group numbers")



First few rows of netcdf_df:
   particles  group_id group_number
0          0         0        00000
1          1         0        00001
2          2         0        00002
3          3         0        00003
4          4         0        00004
CSV file saved with formatted group numbers
