In [6]:
import numpy as np
import pandas as pd
import netCDF4 as nc
import os
from datetime import datetime

# Define a function to read data from NetCDF files
def read_data(file_path, variable_name):
    with nc.Dataset(file_path) as dataset:
        data = dataset.variables[variable_name][:]
        time = dataset.variables['time'][:]
        longitude = dataset.variables['longitude'][:]
        latitude = dataset.variables['latitude'][:]
        
        # Convert time to a datetime format
        time_units = dataset.variables['time'].units
        time_calendar = dataset.variables['time'].calendar if 'calendar' in dataset.variables['time'].ncattrs() else 'standard'
        time_dates = nc.num2date(time, units=time_units, calendar=time_calendar)
        
        # Average over latitude and longitude
        if data.ndim > 2:
            data = np.mean(data, axis=(-1, -2))  # Average over longitude and latitude dimensions

        return data, time_dates

# File paths for the data
file_path_sst = 'CMEMS_GLO-halmahera_sst_0_2009_2020_luas_ok.nc'
file_path_chl = 'CMEMS_GLO-halmahera_CHL_0_2009_2020.nc'
file_path_salinity = 'CMEMS_GLO-halmahera_sss_0_2009_2020_luas_ok.nc'
file_path_zooplankton = 'CMEME_phyto_2009_2020.nc'
file_path_ssh = 'CMEMS_GLO-halmahera_ssh_0_2009_2020_luas_ok.nc'

# Read the data from each file
sea_surface_temp, time_sst = read_data(file_path_sst, 'thetao')
chlorophyll, time_chl = read_data(file_path_chl, 'CHL')
salinity, time_s = read_data(file_path_salinity, 'so')
zooplankton, time_z = read_data(file_path_zooplankton, 'zooc')
sea_surface_height, time_ssh = read_data(file_path_ssh, 'zos')

# Create a DataFrame for each variable
def create_dataframe(data, time):
    df = pd.DataFrame(data, columns=['Value'])
    # Convert cftime to Python datetime
    time = [datetime.fromisoformat(str(t)) for t in time]  # Convert to Python datetime
    df['time'] = time  # Ensure time is in datetime format
    return df

df_sst = create_dataframe(sea_surface_temp, time_sst)
df_chl = create_dataframe(chlorophyll, time_chl)
df_salinity = create_dataframe(salinity, time_s)
df_zooplankton = create_dataframe(zooplankton, time_z)
df_ssh = create_dataframe(sea_surface_height, time_ssh)

# Ensure all dataframes have the same time range and frequency
start_date = '2009-01-01'
end_date = '2020-12-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='M')

# Resampling to monthly mean
monthly_mean_sst = df_sst.set_index('time').resample('M').mean().reindex(date_range)
monthly_mean_chl = df_chl.set_index('time').resample('M').mean().reindex(date_range)
monthly_mean_salinity = df_salinity.set_index('time').resample('M').mean().reindex(date_range)
monthly_mean_zooplankton = df_zooplankton.set_index('time').resample('M').mean().reindex(date_range)
monthly_mean_ssh = df_ssh.set_index('time').resample('M').mean().reindex(date_range)

# Combine all monthly means into a single DataFrame
monthly_means_combined = pd.DataFrame({
    'SST': monthly_mean_sst['Value'],
    'CHL': monthly_mean_chl['Value'],
    'Salinity': monthly_mean_salinity['Value'],
    'Zooplankton': monthly_mean_zooplankton['Value'],
    'SSH': monthly_mean_ssh['Value'],
}, index=date_range)

# Exporting to CSV
output_file_path = os.path.join('monthly_means_2009_2020.csv')
monthly_means_combined.to_csv(output_file_path)

print(f'Monthly mean data exported to {output_file_path}')


Monthly mean data exported to monthly_means_2009_2020.csv


  date_range = pd.date_range(start=start_date, end=end_date, freq='M')
  monthly_mean_sst = df_sst.set_index('time').resample('M').mean().reindex(date_range)
  monthly_mean_chl = df_chl.set_index('time').resample('M').mean().reindex(date_range)
  monthly_mean_salinity = df_salinity.set_index('time').resample('M').mean().reindex(date_range)
  monthly_mean_zooplankton = df_zooplankton.set_index('time').resample('M').mean().reindex(date_range)
  monthly_mean_ssh = df_ssh.set_index('time').resample('M').mean().reindex(date_range)


In [7]:
import numpy as np
import pandas as pd
import netCDF4 as nc
import os
from datetime import datetime

# Define a function to read data from NetCDF files
def read_data(file_path, variable_name):
    with nc.Dataset(file_path) as dataset:
        data = dataset.variables[variable_name][:]
        time = dataset.variables['time'][:]
        longitude = dataset.variables['longitude'][:]
        latitude = dataset.variables['latitude'][:]
        
        # Convert time to a datetime format
        time_units = dataset.variables['time'].units
        time_calendar = dataset.variables['time'].calendar if 'calendar' in dataset.variables['time'].ncattrs() else 'standard'
        time_dates = nc.num2date(time, units=time_units, calendar=time_calendar)
        
        # Average over latitude and longitude
        if data.ndim > 2:
            data = np.mean(data, axis=(-1, -2))  # Average over longitude and latitude dimensions

        return data, time_dates

# File paths for the data
file_path_sst = 'CMEMS_GLO-halmahera_sst_0_2009_2020_luas_ok.nc'
file_path_chl = 'CMEMS_GLO-halmahera_CHL_0_2009_2020.nc'
file_path_salinity = 'CMEMS_GLO-halmahera_sss_0_2009_2020_luas_ok.nc'
file_path_zooplankton = 'CMEME_phyto_2009_2020.nc'
file_path_ssh = 'CMEMS_GLO-halmahera_ssh_0_2009_2020_luas_ok.nc'

# Read the data from each file
sea_surface_temp, time_sst = read_data(file_path_sst, 'thetao')
chlorophyll, time_chl = read_data(file_path_chl, 'CHL')
salinity, time_s = read_data(file_path_salinity, 'so')
zooplankton, time_z = read_data(file_path_zooplankton, 'zooc')
sea_surface_height, time_ssh = read_data(file_path_ssh, 'zos')

# Create a DataFrame for each variable
def create_dataframe(data, time):
    df = pd.DataFrame(data, columns=['Value'])
    # Convert cftime to Python datetime
    time = [datetime.fromisoformat(str(t)) for t in time]  # Convert to Python datetime
    df['time'] = time  # Ensure time is in datetime format
    
    # Normalize the data (min-max normalization)
    df['Normalized Value'] = (df['Value'] - df['Value'].min()) / (df['Value'].max() - df['Value'].min())
    
    return df

df_sst = create_dataframe(sea_surface_temp, time_sst)
df_chl = create_dataframe(chlorophyll, time_chl)
df_salinity = create_dataframe(salinity, time_s)
df_zooplankton = create_dataframe(zooplankton, time_z)
df_ssh = create_dataframe(sea_surface_height, time_ssh)

# Ensure all dataframes have the same time range and frequency
start_date = '2009-01-01'
end_date = '2020-12-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='M')

# Resampling to monthly mean
monthly_mean_sst = df_sst.set_index('time').resample('M').mean().reindex(date_range)
monthly_mean_chl = df_chl.set_index('time').resample('M').mean().reindex(date_range)
monthly_mean_salinity = df_salinity.set_index('time').resample('M').mean().reindex(date_range)
monthly_mean_zooplankton = df_zooplankton.set_index('time').resample('M').mean().reindex(date_range)
monthly_mean_ssh = df_ssh.set_index('time').resample('M').mean().reindex(date_range)

# Combine all monthly means into a single DataFrame
monthly_means_combined = pd.DataFrame({
    'SST': monthly_mean_sst['Value'],
    'Normalized SST': monthly_mean_sst['Normalized Value'],
    'CHL': monthly_mean_chl['Value'],
    'Normalized CHL': monthly_mean_chl['Normalized Value'],
    'Salinity': monthly_mean_salinity['Value'],
    'Normalized Salinity': monthly_mean_salinity['Normalized Value'],
    'Zooplankton': monthly_mean_zooplankton['Value'],
    'Normalized Zooplankton': monthly_mean_zooplankton['Normalized Value'],
    'SSH': monthly_mean_ssh['Value'],
    'Normalized SSH': monthly_mean_ssh['Normalized Value'],
}, index=date_range)

# Exporting to CSV
output_file_path = os.path.join('monthly_means_normalized_2009_2020.csv')
monthly_means_combined.to_csv(output_file_path)

print(f'Monthly mean normalized data exported to {output_file_path}')


Monthly mean normalized data exported to monthly_means_normalized_2009_2020.csv


  date_range = pd.date_range(start=start_date, end=end_date, freq='M')
  monthly_mean_sst = df_sst.set_index('time').resample('M').mean().reindex(date_range)
  monthly_mean_chl = df_chl.set_index('time').resample('M').mean().reindex(date_range)
  monthly_mean_salinity = df_salinity.set_index('time').resample('M').mean().reindex(date_range)
  monthly_mean_zooplankton = df_zooplankton.set_index('time').resample('M').mean().reindex(date_range)
  monthly_mean_ssh = df_ssh.set_index('time').resample('M').mean().reindex(date_range)
