In [23]:
import os
import re
import numpy as np
import xarray as xr
from pyproj import Proj, Transformer

def read_snowice_bin(filepath):
    rows, cols = 720, 720
    dtype = np.uint8
    
    with open(filepath, 'rb') as f:
        data = np.fromfile(f, dtype=dtype).reshape((rows, cols))
    
    # Mask missing values (255 = missing)
    data = np.ma.masked_where(data == 255, data)

    return data

def convert_bin_to_netcdf(bin_files, output_nc_path):
    data_list = []
    times = []

    # Regex to extract start date YYYYMMDD from filenames like:
    # EASE2_N25km.snowice.19661003-19661009.v04.bin
    date_pattern = re.compile(r'\.(\d{8})-\d{8}\.')

    for fname in sorted(bin_files):
        data = read_snowice_bin(fname)
        
        match = date_pattern.search(fname)
        if not match:
            print(f"Skipping file (no date found): {fname}")
            continue

        date_str = match.group(1)
        times.append(np.datetime64(date_str))

        data_list.append(data)

    # Stack into 3D array: (time, y, x)
    data_array = np.ma.stack(data_list)

    # Reference date for time coordinate
    ref_date = np.datetime64('1980-03-01')
    time_nums = np.array([(t - ref_date).astype('timedelta64[D]').astype(int) for t in times])

    data_xr = xr.DataArray(
        data_array,
        coords={'time': time_nums, 'y': np.arange(720), 'x': np.arange(720)},
        dims=['time', 'y', 'x'],
        name='snow_ice_classification'
    )

    # Add lat/lon coordinates for EASE2 25km grid
    nx, ny = 720, 720
    res = 25000  # 25 km grid spacing in meters

    x0 = - (nx // 2) * res
    y0 = (ny // 2) * res

    x = np.arange(nx) * res + x0
    y = y0 - np.arange(ny) * res
    xg, yg = np.meshgrid(x, y)

    proj_ease = Proj('+proj=laea +lat_0=90 +lon_0=0 +datum=WGS84 +units=m')
    transformer = Transformer.from_proj(proj_ease, 'epsg:4326', always_xy=True)
    lon, lat = transformer.transform(xg, yg)

    ds = xr.Dataset({
        'snow_ice_classification': data_xr,
        'lat': (('y', 'x'), lat),
        'lon': (('y', 'x'), lon),
    })

    # Set attributes for the time coordinate (variable)
    ds['time'].attrs['units'] = 'days since 1980-03-01 00:00:00'
    ds['time'].attrs['calendar'] = 'standard'
    
    # Encoding dictionary only with valid keys for netCDF4 backend
    encoding = {
        'time': {
            'dtype': 'int32',
            '_FillValue': None
        },
        'snow_ice_classification': {
            '_FillValue': 255  # mask missing values
        }
    }
    
    ds.to_netcdf(output_nc_path, encoding=encoding)
    print(f"Saved NetCDF to: {output_nc_path}")


if __name__ == "__main__":
    # Example usage
    data_dir = './data/nsidc0046_data'  # your folder with .bin files
    output_nc = './snow_ice_data.nc'

    bin_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.bin')]
    convert_bin_to_netcdf(bin_files, output_nc)


Saved NetCDF to: ./snow_ice_data.nc


In [26]:
import xarray as xr

ds = xr.open_dataset('./data/snow_ice_data.nc', decode_times=False)
ds