In [1]:
import netCDF4 as nc
import pandas as pd
import numpy as np
import os


In [2]:
pwd

'/Users/saima/Documents/6.Graduate Assistant/Dr.Paul,GA/Thesis_Work/oisst/scripts'

In [9]:
import netCDF4 as nc
import pandas as pd
import numpy as np
import os

# Define the latitude and longitude ranges in the original -180 to 180 system
lat_range = (0.0, 35.0)  # Example range
lon_range = (260.0, 360.0)  # Adjusted range for 0-360 system

def process_netcdf_to_dataframe(netcdf_file):
    # Open the NetCDF file
    dataset = nc.Dataset(netcdf_file)

    # Read variables
    time = dataset.variables['time'][:]  # Time
    lat = dataset.variables['lat'][:]    # Latitude
    lon = dataset.variables['lon'][:]    # Longitude
    sst = dataset.variables['sst'][:]    # Sea surface temperature (time, depth/level, lat, lon)

    # Convert time units to human-readable format if needed
    time_units = dataset.variables['time'].units
    time = nc.num2date(time, units=time_units)

    # Filter lat and lon within the specified range
    lat_filter = (lat >= lat_range[0]) & (lat <= lat_range[1])
    lon_filter = (lon >= lon_range[0]) & (lon <= lon_range[1])

    # Check if filters have any data
    if np.sum(lat_filter) == 0 or np.sum(lon_filter) == 0:
        print(f"No data found within the specified lat/lon ranges.")
        return pd.DataFrame()  # Return an empty DataFrame if no data found

    # Apply lat and lon filter to SST
    lat_indices = np.where(lat_filter)[0]
    lon_indices = np.where(lon_filter)[0]

    filtered_lat = lat[lat_filter]
    filtered_lon = lon[lon_filter]
    filtered_sst = sst[:, 0, lat_filter, :][:, :, lon_filter]  # Keep the first level (depth/level) for simplicity

    # Flatten the SST, lat, lon, and time arrays for CSV
    num_time_steps = len(time)
    num_lat = len(filtered_lat)
    num_lon = len(filtered_lon)

    if filtered_sst.shape[1] != num_lat or filtered_sst.shape[2] != num_lon:
        print(f"Mismatch in filtered SST shape: {filtered_sst.shape}")
        return pd.DataFrame()  # Return an empty DataFrame if mismatch

    filtered_sst = filtered_sst.reshape(num_time_steps * num_lat * num_lon)
    filtered_lat = np.repeat(filtered_lat, num_lon * num_time_steps)
    filtered_lon = np.tile(np.repeat(filtered_lon, num_time_steps), num_lat)
    filtered_time = np.tile(time, num_lat * num_lon)

    # Create a Pandas DataFrame
    df = pd.DataFrame({
        'time': filtered_time,
        'lat': filtered_lat,
        'lon': filtered_lon,
        'sst': filtered_sst
    })

    # Remove missing values (if SST has missing values marked as -9999)
    df = df[df['sst'] != -9999]

    # Format lat and lon to have the required precision
    df['lat'] = df['lat'].round(3)
    df['lon'] = df['lon'].round(3)

    return df

# Folder containing NetCDF files
netcdf_folder = './oisst_data'  # Replace with your folder path

# Process each NetCDF file in the folder
all_data = pd.DataFrame()
for filename in os.listdir(netcdf_folder):
    if filename.endswith('.nc'):
        file_path = os.path.join(netcdf_folder, filename)
        df = process_netcdf_to_dataframe(file_path)
        if not df.empty:
            all_data = pd.concat([all_data, df], ignore_index=True)

# Save all data to a single CSV file
all_data.to_csv('./dataset/TEST_combined_sst_data.csv', index=False)


In [13]:
import pandas as pd

# Load the dataset
df = pd.read_csv('./dataset/TEST_combined_sst_data.csv')

# Define a function to convert longitude from 0-360 to -180 to 180
def convert_lon(lon):
    return lon - 360 if lon > 180 else lon

# Apply the conversion function to the 'lon' column
df['lon_converted'] = df['lon'].apply(convert_lon)

# Drop the original 'lon' column
df = df.drop(columns=['lon'])

# Rename 'lon_converted' to 'lon'
df = df.rename(columns={'lon_converted': 'lon'})

# Sort the DataFrame by 'time', 'lat', and 'lon'
df = df.sort_values(by=['time', 'lat', 'lon'])

# Save the updated DataFrame to a new CSV file
df.to_csv('./dataset/FINAL_dataset.csv', index=False)
