In [2]:
import numpy as np
from netCDF4 import Dataset
from datetime import datetime

# Input and output file paths
species_list = [
    "c2f6", "c3f8", "c4f10", "c5f12", "c6f14", "c7f16", "c8f18", "cc4f8", "cf4",
    "cfc11", "cfc11eq", "cfc113", "cfc114", "cfc115", "cfc12", "cfc12eq", "ccl4",
    "ch2cl2", "ch3br", "ch3ccl3", "ch3cl", "ch4", "chcl3", "co2", "halon1211",
    "halon1301", "halon2402", "hcfc141b", "hcfc142b", "hcfc22", "hfc125", "hfc134a",
    "hfc134aeq", "hfc143a", "hfc152a", "hfc227ea", "hfc236fa", "hfc245fa", "hfc23",
    "hfc32", "hfc365mfc", "hfc4310mee", "n2o", "nf3", "sf6", "so2f2"
]

base_path = "/glade/campaign/cesm/cesmdata/input4MIPs_raw/input4MIPs/CMIP7/CMIP/CR/CR-CMIP-1-0-0/atmos/mon/"
output_file = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_c250326.nc"
reference_file = "/glade/campaign/acom/acom-climate/dkin/inputs/lbc/LBC_17500116-2501216_CMIP6_0p5degLat_1998EESC_c211115.nc"

# Function to check if a year is a leap year
def is_leap_year(year):
    return (year % 4 == 0 and (year % 100 != 0 or year % 400 == 0))

# Generate all dates between 1750-01-16 and 2023-01-16
start_date = datetime(1750, 1, 16)
end_date = datetime(2022, 12, 16)
dates = []

current_date = start_date
while current_date <= end_date:
    year = current_date.year
    month = current_date.month
    
    # Use 15th for February in leap years, 16th otherwise
    if month == 2:
        day = 15 if is_leap_year(year) else 16
    else:
        day = 16
    
    # Format the date as YYYYMMDD
    dates.append(int(f"{year}{month:02d}{day:02d}"))
    
    # Move to the next month
    if month == 12:
        current_date = current_date.replace(year=year + 1, month=1)
    else:
        current_date = current_date.replace(month=month + 1)

dates = np.array(dates)  # Convert to numpy array

# Read latitude and longitude from the reference file
with Dataset(reference_file, "r") as ref:
    lat = ref.variables["lat"][:]
    lon = ref.variables["lon"][:]

# Create the output file
with Dataset(output_file, "w", format="NETCDF4") as dst:
    # Define dimensions
    dst.createDimension("lat", len(lat))
    dst.createDimension("lon", len(lon))
    dst.createDimension("time", None)  # Unlimited time dimension
    
    # Define variables for lat, lon, time, and date
    lat_var = dst.createVariable("lat", "f4", ("lat",))
    lon_var = dst.createVariable("lon", "f4", ("lon",))
    time_var = dst.createVariable("time", "f4", ("time",))
    date_var = dst.createVariable("date", "i4", ("time",))
    
    # Assign attributes to lat, lon, time, and date
    lat_var.long_name = "latitude"
    lat_var.units = "degrees_north"
    lon_var.long_name = "longitude"
    lon_var.units = "degrees_east"
    time_var.long_name = "time"
    time_var.units = "days since 1750-01-01 00:00:00"
    time_var.calendar = "gregorian"
    time_var.bounds = "time_bnds"
    date_var.long_name = "current date (YYYYMMDD)"

    
    # Process each species
    for species in species_list:
        print(f"Processing {species}...")
        species_file = f"{base_path}{species}/gnz/v20250228/{species}_input4MIPs_GHGConcentrations_CMIP_CR-CMIP-1-0-0_gnz_175001-202212.nc"
        
        # Open the species file
        with Dataset(species_file, "r") as src:
            # Read the species data and its attributes
            species_data = src.variables[species][:]
            species_units = src.variables[species].units.lower()
            species_lat = src.variables["lat"][:]  # Read the 12 latitudes

            time = src.variables["time"][:]
            
            # Determine the conversion factor based on the units
            if species_units == "ppm":
                conversion_factor = 1e-6
            elif species_units == "ppb":
                conversion_factor = 1e-9
            elif species_units == "ppt":
                conversion_factor = 1e-12
            else:
                raise ValueError(f"Unknown units for {species}: {species_units}")
            
            # Convert units to mol/mol
            species_data_molmol = species_data * conversion_factor

        
        # Interpolate species data from 12 latitudes to 360 latitudes
        species_interpolated = np.array([
            np.interp(lat, species_lat, species_data_molmol[time_idx])
            for time_idx in range(species_data_molmol.shape[0])
        ])

        
        # Define the species variable in the output file
        species_var = dst.createVariable(f"{species.upper()}_LBC", "f4", ("time", "lat"), zlib=True)
        species_var.long_name = f"{species.upper()} lower boundary condition"
        species_var.units = "mol/mol"
        
        # Write interpolated data to the species variable
        species_var[:, :] = species_interpolated
        
    
    # Extend time by adding one additional time step
    time_extended = np.append(time, time[-1] + (time[-1] - time[-2]))
    dates_extended = np.append(dates, 20230116)

    # Write lat, lon, time, and date data
    lat_var[:] = lat
    lon_var[:] = lon
    time_var[:] = time_extended
    date_var[:] = dates_extended

  # Add global attributes
    dst.data_title = "Time dependent mole fraction surface boundary conditions"
    dst.data_source = "File created by Jun Zhang"
    dst.creation_date = datetime.now().strftime("%Y-%m-%d")
    dst.data_script = "Python code (contact Jun Zhang, jzhan166@ucar.edu)"
    dst.data_summary = "All lower boundary condition fields are taken from the CMIP7 forcing website."

Processing c2f6...
Processing c3f8...
Processing c4f10...
Processing c5f12...
Processing c6f14...
Processing c7f16...
Processing c8f18...
Processing cc4f8...
Processing cf4...
Processing cfc11...
Processing cfc11eq...
Processing cfc113...
Processing cfc114...
Processing cfc115...
Processing cfc12...
Processing cfc12eq...
Processing ccl4...
Processing ch2cl2...
Processing ch3br...
Processing ch3ccl3...
Processing ch3cl...
Processing ch4...
Processing chcl3...
Processing co2...
Processing halon1211...
Processing halon1301...
Processing halon2402...
Processing hcfc141b...
Processing hcfc142b...
Processing hcfc22...
Processing hfc125...
Processing hfc134a...
Processing hfc134aeq...
Processing hfc143a...
Processing hfc152a...
Processing hfc227ea...
Processing hfc236fa...
Processing hfc245fa...
Processing hfc23...
Processing hfc32...
Processing hfc365mfc...
Processing hfc4310mee...
Processing n2o...
Processing nf3...
Processing sf6...
Processing so2f2...


In [3]:
import numpy as np
from netCDF4 import Dataset

# File path
file_y = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_c250326.nc"

# Open the file for appending
with Dataset(file_y, "a") as dst:
    print(f"Filling missing values for the last timestep in {file_y}...")

    # Loop through all variables ending with _LBC
    for var_name in dst.variables:
        if var_name.endswith("_LBC"):
            print(f"Processing variable: {var_name}")
            dst_var = dst.variables[var_name]

            # Check if the variable has a time dimension
            if "time" in dst_var.dimensions:
                # Get the current data
                species_data = dst_var[:]
                
                # Check for NaN or missing values in the last timestep
                last_timestep = species_data[-1, :]
                if np.isnan(last_timestep).any() or not last_timestep.any():
                    print(f"Filling last timestep of {var_name} with values from the previous timestep...")
                    species_data[-1, :] = species_data[-2, :]
                    dst_var[:] = species_data
                else:
                    print(f"Last timestep of {var_name} already has valid values. Skipping.")
            else:
                print(f"Variable {var_name} does not have a time dimension. Skipping.")

print(f"File {file_y} has been successfully updated with filled data.")


Filling missing values for the last timestep in /glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_c250326.nc...
Processing variable: C2F6_LBC
Filling last timestep of C2F6_LBC with values from the previous timestep...
Processing variable: C3F8_LBC
Filling last timestep of C3F8_LBC with values from the previous timestep...
Processing variable: C4F10_LBC
Filling last timestep of C4F10_LBC with values from the previous timestep...
Processing variable: C5F12_LBC
Filling last timestep of C5F12_LBC with values from the previous timestep...
Processing variable: C6F14_LBC
Filling last timestep of C6F14_LBC with values from the previous timestep...
Processing variable: C7F16_LBC
Filling last timestep of C7F16_LBC with values from the previous timestep...
Processing variable: C8F18_LBC
Filling last timestep of C8F18_LBC with values from the previous timestep...
Processing variable: CC4F8_LBC
Filling last timestep of CC4F8_LBC with values from the previous timestep...

In [None]:
from netCDF4 import Dataset
import os

# File paths
file_x = "/glade/u/home/jzhan166/scripts/LBC/LBC_17500116-20230116_CMIP6_0p5degLat_truncated_then_matching_add_nonmatch_var_compressed.nc"
file_y = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_c250326.nc"
output_file = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_combine_var_c250326.nc"

# Open files
nc_x = Dataset(file_x, 'r')
nc_y = Dataset(file_y, 'a')  # Modify file_y in place or use 'w' to create a new file

# Get variables in both files
vars_x = set(nc_x.variables.keys())
vars_y = set(nc_y.variables.keys())

# Find variables to add
vars_to_add = vars_x - vars_y

print(f"Variables to add: {vars_to_add}")

# Add variables from file_x to file_y
for var_name in vars_to_add:
    var_x = nc_x.variables[var_name]

    # Create the variable in file_y
    new_var = nc_y.createVariable(
        var_name,  # Variable name
        var_x.datatype,  # Data type
        var_x.dimensions  # Dimensions
    )

    # Copy attributes
    for attr_name in var_x.ncattrs():
        setattr(new_var, attr_name, getattr(var_x, attr_name))

    # Copy data
    new_var[:] = var_x[:]

print("Variables added successfully.")

# Close files
nc_x.close()
nc_y.close()

# Optional: Create a copy of the modified file
os.rename(file_y, output_file)
print(f"Updated file saved as: {output_file}")


Variables to add: {'CHBR3_LBC', 'CH2BR2_LBC', 'OCS_LBC', 'CF2CL2_LBC', 'TOT_CLOY', 'CFC11eq_LBC', 'H2_LBC', 'TOT_BROY', 'TOT_EECL', 'CF2CLBR_LBC', 'H1202_LBC', 'CFCL3_LBC', 'CF3BR_LBC', 'TOT_FOY', 'H2402_LBC'}
Variables added successfully.
Updated file saved as: /glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_combine_var_c250326.nc


In [None]:
from netCDF4 import Dataset

# File paths
file_x = "LBC_17500116-20221216_CMIP7_0p5degLat_c250212_testtime.nc"
file_y = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_combine_var_c250326.nc"

# Open both files
with Dataset(file_x, "r") as src, Dataset(file_y, "a") as dst:
    print(f"Setting the time variable in {file_y} to match {file_x}...")

    # Get the time variable from file_x
    time_x = src.variables["time"][:]

    # Update the time variable in file_y
    if "time" in dst.variables:
        dst.variables["time"][:] = time_x
        print(f"Time variable in {file_y} has been updated.")
    else:
        print(f"Error: 'time' variable not found in {file_y}. Update aborted.")

print("Process complete.")



Setting the time variable in /glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_combine_var_c250326.nc to match LBC_17500116-20221216_CMIP7_0p5degLat_c250212_testtime.nc...
Time variable in /glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_combine_var_c250326.nc has been updated.
Process complete.


In [None]:

from netCDF4 import Dataset
import shutil

# File paths
input_file = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_combine_var_c250326.nc"
output_file = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_cleaned_c250326.nc"

# Variables to remove
variables_to_remove = [
    "H2402_LBC", "CF2CLBR_LBC", 
    "CF3BR_LBC", "CFCL3_LBC", "CF2CL2_LBC", "CFC11eq_LBC"
]

# Open the input file
with Dataset(input_file, 'r') as src:
    # Create a new file
    with Dataset(output_file, 'w') as dst:
        # Copy global attributes
        dst.setncatts({attr: src.getncattr(attr) for attr in src.ncattrs()})
        
        # Copy dimensions
        for name, dimension in src.dimensions.items():
            dst.createDimension(
                name, (len(dimension) if not dimension.isunlimited() else None)
            )
        
        # Copy variables, excluding those to remove
        for name, variable in src.variables.items():
            if name not in variables_to_remove:
                new_var = dst.createVariable(
                    name, variable.datatype, variable.dimensions
                )
                # Copy variable attributes
                new_var.setncatts({attr: variable.getncattr(attr) for attr in variable.ncattrs()})
                # Copy variable data
                new_var[:] = variable[:]

print(f"New file created without specified variables: {output_file}")

# Replace the input file with the new file if desired
replace_input = input("Replace the original file with the cleaned file? (y/n): ").strip().lower()
if replace_input == 'y':
    shutil.move(output_file, input_file)
    print(f"The original file has been replaced with the cleaned file.")
else:
    print(f"The cleaned file is saved as: {output_file}")


New file created without specified variables: /glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_cleaned_c250326.nc


Replace the original file with the cleaned file? (y/n):  n


The cleaned file is saved as: /glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_cleaned_c250326.nc


In [None]:

from netCDF4 import Dataset
import shutil

# File paths
input_file = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_cleaned_c250326.nc"
output_file = "/glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_renamed_c250326.nc"

# Dictionary for renaming variables
rename_dict = {
    "HALON1211_LBC": "CF2CLBR_LBC",
    "HALON1301_LBC": "CF3BR_LBC",
    "CFC11_LBC": "CFCL3_LBC",
    "CFC12_LBC": "CF2CL2_LBC",
    # "HCC140A_LBC": "CH3CCL3_LBC",
    "HALON2402_LBC": "H2402_LBC",
    "CFC11EQ_LBC": "CFC11eq_LBC",
}

# Open the input file
with Dataset(input_file, 'r') as src:
    # Create a new file for output
    with Dataset(output_file, 'w') as dst:
        # Copy global attributes
        dst.setncatts({attr: src.getncattr(attr) for attr in src.ncattrs()})
        
        # Copy dimensions
        for name, dimension in src.dimensions.items():
            dst.createDimension(name, len(dimension) if not dimension.isunlimited() else None)
        
        # Copy variables with renamed variables
        for name, variable in src.variables.items():
            new_name = rename_dict.get(name, name)  # Rename variable if it exists in the dict
            if new_name != name:  # If it's renamed, create the new variable
                new_var = dst.createVariable(new_name, variable.datatype, variable.dimensions)
                # Copy variable attributes
                new_var.setncatts({attr: variable.getncattr(attr) for attr in variable.ncattrs()})
                # Copy variable data
                new_var[:] = variable[:]
            else:  # Copy without renaming
                new_var = dst.createVariable(name, variable.datatype, variable.dimensions)
                # Copy variable attributes
                new_var.setncatts({attr: variable.getncattr(attr) for attr in variable.ncattrs()})
                # Copy variable data
                new_var[:] = variable[:]

print(f"New file created with renamed variables: {output_file}")

# Optional: Replace the original file with the new file
replace_input = input("Replace the original file with the renamed file? (y/n): ").strip().lower()
if replace_input == 'y':
    shutil.move(output_file, input_file)
    print(f"The original file has been replaced with the renamed file.")
else:
    print(f"The renamed file is saved as: {output_file}")


New file created with renamed variables: /glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_renamed_c250326.nc


Replace the original file with the renamed file? (y/n):  n


The renamed file is saved as: /glade/u/home/jzhan166/scripts/LBC/clean/LBC_17500116-20221216_CMIP7_0p5degLat_renamed_c250326.nc
