In [7]:
import pandas as pd
import numpy as np
from scipy.spatial import cKDTree
import netCDF4
from noaa_coops import Station
from sklearn.metrics import mean_squared_error
from math import sqrt
import numpy as np

# Function to calculate correlation coefficient
def calculate_correlation(true_values, predicted_values):
    # Ensure there are enough data points
    if len(true_values) > 48 and len(predicted_values) > 48:
        # Ignore the first 48 values
        true_values = true_values[48:]
        predicted_values = predicted_values[48:]
    else:
        # Not enough data points to ignore the first 48 values
        return np.nan  # Return NaN or handle this case as needed

    # Handle cases where the lengths are different due to missing data
    valid_mask = ~np.isnan(true_values) & ~np.isnan(predicted_values)
    true_values, predicted_values = true_values[valid_mask], predicted_values[valid_mask]
    
    # Calculate Pearson correlation coefficient if there are still enough data points
    if len(true_values) > 0 and len(predicted_values) > 0:
        correlation_matrix = np.corrcoef(true_values, predicted_values)
        correlation_coefficient = correlation_matrix[0, 1]  # Extract the correlation coefficient
        return correlation_coefficient
    else:
        # Not enough data points after filtering for NaNs
        return np.nan  # Return NaN or handle this case as needed

# Read the station details CSV file
station_details = pd.read_csv('station_details.csv')

# Load the NetCDF file
nc_file = netCDF4.Dataset('C:/Users/soele/OneDrive - George Mason University - O365 Production/Documents/FINESST/Paper-2/swot_fast/fort.63.nc', 'r')

# Extract the latitude and longitude from the NetCDF file
latitudes = nc_file.variables['y'][:]  # Replace with your actual variable name
longitudes = nc_file.variables['x'][:]  # Replace with your actual variable name
zeta = nc_file.variables['zeta'][:]

# Normalize longitudes from 0-360 to -180 to +180 if needed
longitudes = np.where(longitudes > 180, longitudes - 360, longitudes)

# Create an array of tuples with the latitude and longitude
points = np.column_stack((latitudes, longitudes))

# Create a KDTree for quick nearest-neighbor lookup
kdtree = cKDTree(points)

# Prepare a list to hold RMSE values
correlation_values = []

# Loop through each station, find the closest point, and retrieve water level data
for index, row in station_details.iterrows():
    try:
        station_id = row['Station ID']
        
        # Find the closest point for the current station's coordinates
        _, closest_point_idx = kdtree.query([row['Latitude'], row['Longitude']])
        
        # Adjust the index for 0-based Python indexing
        closest_point_idx_zero_based = closest_point_idx - 1
        
        # Retrieve 'zeta' variable data for the closest point
        nc_zeta = zeta[:, closest_point_idx_zero_based]
        
        # Delete the first 4 values and shift the remaining values up
        nc_zeta_adjusted = np.delete(nc_zeta, np.s_[0:4])
        
        # Initialize the station object
        station = Station(station_id)

        # Try to retrieve water level data for the station
        try:
            station_data = station.get_data(
                begin_date="20230618",
                end_date="20230709",
                product="hourly_height",
                datum="MSL",
                units="metric",
                time_zone="lst"
            )
        except Exception as e:
            print(f"Error retrieving data for station {station_id}: {e}")
            correlation_values.append(np.nan)  # Append a NaN value for stations with errors
            continue  # Skip to the next iteration of the loop

        # The index 't' is already in datetime format, so no conversion is necessary
        station_df = pd.DataFrame(station_data)
        
        # Ensure nc_zeta_adjusted aligns with the time frame, accounting for the deleted entries
        nc_zeta_series = pd.Series(nc_zeta_adjusted.flatten(), index=pd.date_range(start="2023-06-18 00:00:00", periods=len(nc_zeta_adjusted), freq='H'))

        # Resample or interpolate to ensure both series align on the same time index
        station_df_resampled = station_df['v'].reindex(nc_zeta_series.index, method='nearest')
        
        correlation = calculate_correlation(station_df_resampled.values, nc_zeta_series.values)
        correlation_values.append(correlation)  # Consider renaming this list to something more appropriate, like correlation_values

    except Exception as e:
        # Catch any other exception that might occur
        print(f"An unexpected error occurred for station {station_id}: {e}")
        correlation_values.append(np.nan)  # Append a NaN value for stations with errors
        continue  # Skip to the next iteration of the loop

# Add the RMSE values to the station_details DataFrame
station_details['correlation_swot_fast_without'] = correlation_values

# Save the updated dataframe as "rmse.csv"
station_details.to_csv('correlation_swot_fast_mod.csv', index=False)

# Close the NetCDF file
nc_file.close()

Error retrieving data for station 8311030: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 8311062: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 8518962: CO-OPS API returned an error: No data was found. This product may not be offered at this station at the requested time.


  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]
  c /= stddev[:, None]
  c /= stddev[None, :]


Error retrieving data for station 9014070: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 9014080: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 9014087: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 9014090: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 9014095: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 9014098: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 9034052: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 9044020: CO-OPS API returned an error. Status Code: 400. Reason: Bad Request

Error retrieving data for station 9044030: CO-OPS API returned an error. Status Code: 400. Reason: Bad R