# Remove All Additional Variables

In [1]:
from datetime import datetime, timedelta

# Start and end dates
start_date = datetime(2015, 12, 1)
end_date = datetime(2016, 1, 31)

# Iterate through the dates
date_list = []
current_date = start_date
while current_date <= end_date:
    date_list.append(current_date.strftime('%Y%m%d'))
    current_date += timedelta(days=1)

In [2]:
import netCDF4 as nc
def modify_filedesc(dataset):
    import re

    filedesc = dataset.getncattr('FILEDESC')

    # Define the regex pattern to match 'NUMBER OF VARIABLES/' and its value
    pattern = r'(NUMBER OF VARIABLES/)\s*\d+'
    replacement = r'\1{:>4}'.format(56)  # Adjust width as needed

    # Extract the current number of variables
    match = re.search(pattern, filedesc)
    if match:
        current_number_of_variables = int(str(match)[-4:-2])
        print(f"Current number of variables: {current_number_of_variables}")
    else:
        print("Number of variables not found in filedesc.")

    # Replace the old value with the new one
    new_filedesc = re.sub(pattern, replacement, filedesc)
    dataset.setncattr('FILEDESC', new_filedesc)

def remove_variables(dataset, vars_to_delete):
    import re
    input_string = dataset.getncattr('VAR-LIST')
    print(input_string)
    for var in vars_to_delete:
        # Create a pattern that matches the variable followed by any number of spaces
        pattern = rf'\b{var}\b\s*'
        # Replace the matched pattern with an empty string
        input_string = re.sub(pattern, '', input_string)
    input_string +=  "         "
    print(input_string)
    dataset.setncattr('VAR-LIST', input_string)
    return

def modify_nvars(dataset, num = 56):
    num_short = np.int32(56) # prevent L suffic i.e. "55L" instead of 55
    dataset.setncattr('NVARS', num_short)
    print(f"Modified NVARS {dataset.getncattr('NVARS')}")
    return

def modify_nc_file(input_file, output_file, vars_to_delete):
    with nc.Dataset(input_file, 'r') as src:
        with nc.Dataset(output_file, 'w') as dst:            
            
            # Copy global attributes
            for name in src.ncattrs():
                print(name)
                dst.setncattr(name, src.getncattr(name))
                
            # set file description
            modify_filedesc(dst)
                
            # set var list
            remove_variables(dst, vars_to_delete)
            
            # set nvars
            modify_nvars(dst)
            
            # Copy dimensions
            for name, dimension in src.dimensions.items():
                if name == "VAR":
                    dst.createDimension(name, int(56))
                else:
                    dst.createDimension(name, len(dimension) if not dimension.isunlimited() else None)

            # Copy variables, except TERP
            for name, variable in src.variables.items():
                if name == "TFLAG":
                    # Create a new variable with the desired shape
                    new_shape = (25, 56, 2)  # Desired shape
                    new_data = np.zeros(new_shape, dtype= src[name].dtype)

                    # Copy data into the new variable, adjusting for shape
                    new_data[:, :56, :] = src[name][:, :56, :]  # Adjust slicing as needed
                    
                    x = dst.createVariable(name, variable.datatype,  variable.dimensions)
                    dst[name][:] = new_data[:]
                    for attr_name in variable.ncattrs():
                        x.setncattr(attr_name, variable.getncattr(attr_name))
                        
                elif name not in vars_to_delete:
                    x = dst.createVariable(name, variable.datatype, variable.dimensions)
                    dst[name][:] = src[name][:]
                    # Copy variable attributes
                    for attr_name in variable.ncattrs():
                        x.setncattr(attr_name, variable.getncattr(attr_name))
    

In [3]:
import numpy as np
import netCDF4 as nc
file_dir = r"/projects/b1045/SMOKE.EMF/2016_beta/2016ff_16j/premerged/rwc/"
file_path = "/projects/b1045/SMOKE.EMF/2016_beta/2016ff_16j/premerged/rwc/mod_emis_mole_rwc_20160102_CONUS4K_d02_cmaq_cb6_2016ff_16j_cp.ncf"


vars_to_delete = ['AACD', 'APIN', 'FACD', 'GLY', 'GLYD', 'ISPD', 'IVOC', 'MGLY', 'NMOG', 'PACD']

# Paths for the original and modified files
original_file_path = file_path
modified_file_path = file_dir + f'mod_emis_mole_rwc_20160102_CONUS4K_d02_cmaq_cb6_2016ff_16j.ncf'

# Modify the file
modify_nc_file(original_file_path, modified_file_path, vars_to_delete)

IOAPI_VERSION
EXEC_ID
FTYPE
CDATE
CTIME
WDATE
WTIME
SDATE
STIME
TSTEP
NTHIK
NCOLS
NROWS
NLAYS
NVARS
GDTYP
P_ALP
P_BET
P_GAM
XCENT
YCENT
XORIG
YORIG
XCELL
YCELL
VGTYP
VGTOP
VGLVLS
GDNAM
UPNAM
VAR-LIST
FILEDESC
HISTORY
Current number of variables: 66
FORM            FORM_PRIMARY    NMOG            BENZ            ALD2            ALD2_PRIMARY    NAPH            BUTADIENE13     ACROLEIN        CO              NH3             NH3_FERT        AACD            ACET            ALDX            APIN            CH4             ETH             ETHA            ETHY            ETOH            FACD            GLY             GLYD            IOLE            ISOP            ISPD            IVOC            KET             MEOH            MGLY            NVOL            OLE             PACD            PAR             PRPA            SOAALK          TERP            TOL             UNR             XYLMN           HONO            NO              NO2             PAL             PCA             PCL            

In [None]:
nccopy -k '64-bit offset' mod_emis_mole_rwc_20160102_CONUS4K_d02_cmaq_cb6_2016ff_16j.ncf emis_mole_rwc_20160102_CONUS4K_d02_cmaq_cb6_2016ff_16j.ncf



In [10]:
import numpy as np
import netCDF4 as nc

file_dir = r"/projects/b1045/SMOKE.EMF/2016_beta/2016ff_16j/premerged/rwc/"
vars_to_delete = ['AACD', 'APIN', 'FACD', 'GLY', 'GLYD', 'ISPD', 'IVOC', 'MGLY', 'NMOG', 'PACD']

for date in date_list:
    file_path = file_dir + f"emis_mole_rwc_{date}_CONUS4K_d02_cmaq_cb6_2016ff_16j.ncf"
    
    # Paths for the original and modified files
    original_file_path = file_path
    modified_file_path = file_dir + f'mod_emis_mole_rwc_{date}_CONUS4K_d02_cmaq_cb6_2016ff_16j.ncf'

    # Modify the file
    modify_nc_file(original_file_path, modified_file_path, vars_to_delete)

IOAPI_VERSION
EXEC_ID
FTYPE
CDATE
CTIME
WDATE
WTIME
SDATE
STIME
TSTEP
NTHIK
NCOLS
NROWS
NLAYS
NVARS
GDTYP
P_ALP
P_BET
P_GAM
XCENT
YCENT
XORIG
YORIG
XCELL
YCELL
VGTYP
VGTOP
VGLVLS
GDNAM
UPNAM
VAR-LIST
FILEDESC
HISTORY
Current number of variables: 66
FORM            FORM_PRIMARY    NMOG            BENZ            ALD2            ALD2_PRIMARY    NAPH            BUTADIENE13     ACROLEIN        CO              NH3             NH3_FERT        AACD            ACET            ALDX            APIN            CH4             ETH             ETHA            ETHY            ETOH            FACD            GLY             GLYD            IOLE            ISOP            ISPD            IVOC            KET             MEOH            MGLY            NVOL            OLE             PACD            PAR             PRPA            SOAALK          TERP            TOL             UNR             XYLMN           HONO            NO              NO2             PAL             PCA             PCL            

# Reading and modifying binary file

In [42]:
with open('/projects/b1045/SMOKE.EMF/2016_beta/smoke4.6/Linux2_x86_64ifort.beta/grdmat', 'rb') as f:
    # Read binary data
    # This example assumes the file contains float data; adjust as needed
    data = np.fromfile(f, dtype=np.float32)

<_io.BufferedReader name='/projects/b1045/SMOKE.EMF/2016_beta/smoke4.6/Linux2_x86_64ifort.beta/grdmat'>


In [43]:
data

array([1.3073374e+04, 9.2197031e-41, 0.0000000e+00, ..., 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00], dtype=float32)

In [48]:
data.max()

nan

## Checking original remove species totals

In [None]:
rwc_2020_original =  xr.open_dataset("emis_mole_rwc_201601_original_2020_RWC.nc

In [None]:
emis_mole_rwc_201601_original_2020_RWC.nc