This Jupyter Notebook loads CMIP6 NetCDF files, extracts climate variables for selected locations, converts the data into pandas DataFrames, and exports the results as CSV files for further analysis.

In [None]:
from pathlib import Path
import xarray as xr
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Define dataset directory
DATA_DIR = Path("datasets")

# Define locations for each model
LOCATIONS = {
    "UKESM1-0-LL": {"city": "London", "lat": 51.5, "lon": -0.1},
    "MIROC6": {"city": "Tokyo", "lat": 35.7, "lon": 139.7},
    "CESM2-WACCM": {"city": "Los Angeles", "lat": 34.05, "lon": -118.25},
}

# Define the experiments (ssp245 and ssp585) and variables (tas, pr)
EXPERIMENTS = ["ssp245", "ssp585"]
VARIABLES = ["tas", "pr"]

# Function to extract data for a given model and variable
def extract_location_data(model, experiment, variable):
    """Extracts time-series data for a specific model and city location."""
    file_path = DATA_DIR / f"cmip6_{variable}_{experiment}_{model}.nc"
    
    if not file_path.exists():
        logging.warning(f"File not found: {file_path}")
        return None

    logging.info(f"Processing {file_path} for {LOCATIONS[model]['city']}...")
    
    # Load dataset
    ds = xr.open_dataset(file_path)

    # Extract location details
    city = LOCATIONS[model]["city"]
    target_lat = LOCATIONS[model]["lat"]
    target_lon = LOCATIONS[model]["lon"]

    # Find the nearest grid point
    ds_nearest = ds.sel(lat=target_lat, lon=target_lon, method="nearest")

    # Convert to DataFrame for easy handling
    df = ds_nearest.to_dataframe().reset_index()

    # Save extracted data
    output_file = DATA_DIR / f"{model}_{variable}_{experiment}_{city}.csv"
    df.to_csv(output_file, index=False)
    logging.info(f"Saved extracted data to {output_file}")

    return df

# Loop through models, experiments, and variables
for model in LOCATIONS.keys():
    for experiment in EXPERIMENTS:
        for variable in VARIABLES:
            extract_location_data(model, experiment, variable)

logging.info("Extraction complete.")