# Introduction 

- Following Code reads FVCOM particle tracking outputs and get the group_number
- Count the number if group_id in each group
- Add a new column to the data called group_number and add the number of particle in each group_id 
- The order is the first number is group_is,second numbers are particle number 


## Imports Libraries

In [2]:
# Importing libraries
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr


# Input Files

In [None]:
# Set the directory of the FVCOM model outputs
FVCOM_dir = '/home/abolmaal/modelling/FVCOM/Huron/output'
# Set the directory of the FVCOM model outputs
files = glob.glob(os.path.join(FVCOM_dir, 'Fvcom_Huron_23_*.nc'))


['/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Mar__3.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_May__5.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Jun__6.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Oct__10.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Jan__1.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Sep__9.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Aug__8.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Nov__11.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_DecJan_1.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Feb__2.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Jul__7.nc']


# Output Files

In [14]:
# OUTPUTS# List of months for generating output filenames
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']


# Generate the output filenames based on the months
output_files = [
    os.path.join(FVCOM_dir, f'updated_Fvcom_Huron_23_{month}__{index+1}.nc')
    for index, month in enumerate(months)
]
# Add the December/January file manually if it's part of the pattern
output_files.append(os.path.join(FVCOM_dir, 'updated_Fvcom_Huron_23_DecJan_1.nc'))


In [15]:
# Output file paths list
for file in output_files:
    print(file) 

/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Jan__1.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Feb__2.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Mar__3.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Apr__4.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_May__5.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Jun__6.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Jul__7.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Aug__8.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Sep__9.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Oct__10.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Nov__11.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Dec__12.nc
/home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_DecJan

# Helper Functions 

In [None]:
# # define the function to sort the files based on the time
# def sort_key(file):
#     filename = os.path.basename(file)
#     number = int(filename.split('__')[1].split('.')[0])
#     return number

In [8]:
# Define the function to sort the files based on the time
def sort_key(file):
    filename = os.path.basename(file)
    try:
        # Extract the number after the double underscores and before the `.nc` extension
        number = int(filename.split('__')[1].split('.')[0])
        return number
    except (IndexError, ValueError):
        # Handle filenames that do not match the pattern by returning a high number to place them last
        return float('inf')

In [None]:
# Function to load the FVCOM outputs data get group_id column 
# count the number of particles in each group_id and the add a new column to the netcdf file called group_number 
# and writes the number of particles in each group_id to the group_number column

def write_groupnumber(files, data_dir):
    """
    Updates each NetCDF file by adding a group_number column based on group_id.
    The updated NetCDF files are saved with a new filename prefix in the specified directory.
    
    Parameters:
    - files (list of str): List of paths to NetCDF files to be processed.
    - data_dir (str): Directory where the updated NetCDF files will be saved.
    """
    for file in files:
        # Read the NetCDF file
        ds = xr.open_dataset(file)
        
        # Convert the NetCDF 'group_id' variable to a DataFrame for processing
        netcdf_df = ds['group_id'].to_dataframe().reset_index()
        
        # Step 1: Count occurrences of each unique group_id in the NetCDF file
        netcdf_df['group_number'] = netcdf_df.groupby('group_id').cumcount()
        
        # Step 2: Combine 'group_id' and 'group_number' with leading zeros
        netcdf_df['group_number'] = netcdf_df.apply(
            lambda row: f"{int(row['group_id']):02}{row['group_number']:03}", axis=1
        )
        
        # Step 3: Add the updated group_number column back into the NetCDF dataset
        ds['group_number'] = (('particles'), netcdf_df['group_number'].values)
        
        # Step 4: Save the updated NetCDF file
        output_file_path = os.path.join(data_dir, f"updated_{os.path.basename(file)}")
        ds.to_netcdf(output_file_path)
        
        print(f"Updated NetCDF file saved: {output_file_path}")


# Main Functions

In [16]:
# Load the FVCOM output files and sort them based on the time
files = glob.glob(FVCOM_dir + "/Fvcom_Huron_23_*.nc")
files.sort(key=sort_key)
print(files)


['/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Jan__1.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Feb__2.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Mar__3.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_May__5.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Jun__6.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Jul__7.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Aug__8.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Sep__9.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Oct__10.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_Nov__11.nc', '/home/abolmaal/modelling/FVCOM/Huron/output/Fvcom_Huron_23_DecJan_1.nc']


In [11]:
# Call the function to update the NetCDF files with the group_number column

write_groupnumber(files, FVCOM_dir)


Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Jan__1.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Feb__2.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Mar__3.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_May__5.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Jun__6.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Jul__7.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Aug__8.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Sep__9.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron/output/updated_Fvcom_Huron_23_Oct__10.nc
Updated NetCDF file saved: /home/abolmaal/modelling/FVCOM/Huron

In [12]:
# Make sure the group_number column is added to the NetCDF files and is in right order 
# read the updated NetCDF file
updated_files = glob.glob(FVCOM_dir + "/updated_Fvcom_Huron_23_*.nc")
updated_files.sort(key=sort_key)
#ds = xr.open_mfdataset(updated_files, combine='by_coords')

# read the firs file
ds = xr.open_dataset(updated_files[0])
# print the updated group_number variable values
print(ds['group_number'].values)

['00000' '00001' '00002' ... '147086' '147087' '147088']


In [14]:
# Convert the NetCDF 'group_id' and 'group_number' variables to a DataFrame for processing
netcdf_df = ds['group_id'].to_dataframe().reset_index()

# Ensure that 'group_number' is extracted and correctly added
netcdf_df['group_number'] = ds['group_number'].values

# Step 1: Convert 'group_number' to integer if necessary, and format as a 5-digit string with leading zeros
netcdf_df['group_number'] = netcdf_df['group_number'].apply(lambda x: f"{int(x):05}")

# Step 2: Print values for debugging
print("First few rows of netcdf_df:")
print(netcdf_df.head())

# Step 3: Select relevant columns for saving to CSV
netcdf_df = netcdf_df[['group_id', 'group_number']]

# Save the result as a CSV file
netcdf_df.to_csv(os.path.join(FVCOM_dir, 'group_id_group_number.csv'), index=False)

print("CSV file saved with formatted group numbers")



First few rows of netcdf_df:
   particles  group_id group_number
0          0         0        00000
1          1         0        00001
2          2         0        00002
3          3         0        00003
4          4         0        00004
CSV file saved with formatted group numbers
