In [1]:
# Author: Kyeong Pil Kong (USC)
# kyeongpi@usc.edu
# This code converts chl data downloaded from the Copernicus GlobColour database into simplified csv files. 
# https://data.marine.copernicus.eu/product/OCEANCOLOUR_GLO_BGC_L4_MY_009_104/files?subdataset=cmems_obs-oc_glo_bgc-plankton_my_l4-gapfree-multi-4km_P1D_202311
# The csv files are used to match them onto locations from parcel back trajectories

import os
import netCDF4
import numpy as np
import pandas as pd
from datetime import datetime

# Define the geographical extent
extent_to_keep = [-180, -120, -10, 40]

# Specify the directory containing the .nc files
nc_files_directory = #your filepath here

# Get a list of all .nc files in the directory
nc_files = [file for file in os.listdir(nc_files_directory) if file.endswith('.nc')]

# Iterate through each .nc file
for nc_file_name in nc_files:
    # Extract date from the filename and remove the .nc extension
    date_str = nc_file_name.split('_')[0].split('.')[0]

    
    # Convert date_str to a datetime object
    date_obj = datetime.strptime(date_str, '%Y%m%d')
    
    # Format the date string as YYYY_MM_DD
    formatted_date_str = date_obj.strftime('%Y_%m_%d')

    # Open the NetCDF file
    nc_file_path = os.path.join(nc_files_directory, nc_file_name)
    nc_file = netCDF4.Dataset(nc_file_path, 'r')

    # Extract latitude, longitude, and chlor_a data
    latitude = nc_file.variables['lat'][:]
    longitude = nc_file.variables['lon'][:]
    chlor_a_data = nc_file.variables['CHL'][0, :, :]

    # Create a meshgrid of latitude and longitude
    lon_grid, lat_grid = np.meshgrid(longitude, latitude)

    # Create a boolean mask based on the extent
    mask = (lon_grid >= extent_to_keep[0]) & (lon_grid <= extent_to_keep[1]) & (lat_grid >= extent_to_keep[2]) & (lat_grid <= extent_to_keep[3])

    # Apply the mask to chlor_a_data
    chlor_a_data_filtered = np.ma.masked_where(~mask, chlor_a_data)

    # Close the NetCDF file
    nc_file.close()
    
    # Convert the filtered data to a DataFrame
    df = pd.DataFrame({
        'Latitude': lat_grid[mask].flatten(),
        'Longitude': lon_grid[mask].flatten(),
        'Chlorophyll-a': chlor_a_data_filtered[mask].flatten()  # Use chlor_a_data_filtered instead of chl_data
    })
    
    # Remove rows where 'Chlorophyll-a' is empty
    df = df.dropna(subset=['Chlorophyll-a'])

    # Save the DataFrame as a CSV file
    csv_file_name = f'chl_sorted_{formatted_date_str}.csv'
    csv_file_path = os.path.join(nc_files_directory, csv_file_name)
    df.to_csv(csv_file_path, index=False)

    # print(f'Saved {csv_file_name}')

print('Processing completed.')

ModuleNotFoundError: No module named 'netCDF4'

In [None]:
#convert mimi netcdf file outputs to a dataframe
# Data source: https://doi.org/10.5194/gmd-12-3835-2019
# Data Author: Douglas Hamilton, NCSU

import pandas as pd
import netCDF4
import numpy as np

# Open the NetCDF file
nc_file = netCDF4.Dataset(filepath, 'r+')

# Extract relevant variables
dates = nc_file.variables['date'][:]  # Assuming 'date' is in a suitable format
latitude = nc_file.variables['lat'][:]
longitude = nc_file.variables['lon'][:]
fesolsrf_data = nc_file.variables['FEsolSRF'][:,:,:]
fesolsrf_data = nc_file.variables['FESOLSRF'][:,:,:]
feallsrf_data = fesolsrf_data + fesolsrf_data
# dusoldry_data = nc_file.variables['DUsolDRY'][:,:,:]
# dusolwet_data = nc_file.variables['DUsolWET'][:,:,:]
# dusolall_data = dusoldry_data + dusolwet_data
feansolsrf_data = nc_file.variables['FEANsolSRF'][:,:,:]
feansolsrf_data = nc_file.variables['FEANSOLSRF'][:,:,:]
feanallsrf_data = feansolsrf_data + feansolsrf_data
febbsolsrf_data = nc_file.variables['FEBBsolSRF'][:,:,:]
febbsolsrf_data = nc_file.variables['FEBBSOLSRF'][:,:,:]
febballsrf_data = febbsolsrf_data + febbsolsrf_data
fedusolsrf_data = nc_file.variables['FEDUsolSRF'][:,:,:]
fedusolsrf_data = nc_file.variables['FEDUSOLSRF'][:,:,:]
feduallsrf_data = fedusolsrf_data + fedusolsrf_data

# Reshape the data for tabulation
dates_grid, latitude_grid, longitude_grid = np.meshgrid(dates, latitude, longitude, indexing='ij')
dates_flat = dates_grid.flatten()
latitude_flat = latitude_grid.flatten()
longitude_flat = longitude_grid.flatten()
fesolsrf_flat = fesolsrf_data.flatten()
fesolsrf_flat = fesolsrf_data.flatten()
feansolsrf_flat = feansolsrf_data.flatten()
feansolsrf_flat = feansolsrf_data.flatten()
febbsolsrf_flat = febbsolsrf_data.flatten()
febbsolsrf_flat = febbsolsrf_data.flatten()
fedusolsrf_flat = fedusolsrf_data.flatten()
fedusolsrf_flat = fedusolsrf_data.flatten()

# Create a DataFrame
df_mimi = pd.DataFrame({
    'Date': dates_flat,
    'Latitude': latitude_flat,
    'Longitude': longitude_flat,
    'FEsolSRF': fesolsrf_flat,
    'FESOLSRF': fesolsrf_flat,
    'FEANsolSRF': feansolsrf_flat,
    'FEBBsolSRF': febbsolsrf_flat,
    'FEDUTOSRF': fedusolsrf_flat,
    'FEANSOLSRF': feansolsrf_flat,
    'FEBBSOLSRF': febbsolsrf_flat,
    'FEDUSOLSRF': fedusolsrf_flat,
})

# Close the NetCDF file
nc_file.close()

# Display the DataFrame
print(df_mimi)

#convert mimi output as individual csv's per day

import pandas as pd

# Assuming your DataFrame is named df
# Sort the DataFrame by Date
df_sorted = df_mimi.sort_values(by='Date')

# Assuming your DataFrame is named df
# Convert the 'Date' column to datetime format for better handling
df_mimi['Date'] = pd.to_datetime(df_mimi['Date'], format='%Y%m%d')

# Convert longitude values to the standard range of -180 to 180
df_mimi['Longitude'] = (df_mimi['Longitude'] + 180) % 360 - 180

# Sort the DataFrame by Date
df_sorted = df_mimi.sort_values(by='Date')

# Group by Date and iterate over groups to save each group to a separate CSV file
for date, group in df_sorted.groupby('Date'):
    # Generate the file name with the format 'mimi_YYYY-MM-DD.csv'
    output_filepath = f"FILEPATH/mimi_srf_{date.strftime('%Y_%m_%d')}.csv"
    file_name = output_filepath
    
    # Save the group to the CSV file
    group.to_csv(file_name, index=False)
