To regrid and resample:
 - Rutgers (lat/lon regrid)
 - Albedo (lat/lon regrid)

In [None]:
from ftplib import FTP
import os

# ---------- USER SETTINGS ----------
ftp_host = "stratus.ssec.wisc.edu"
ftp_user = "anonymous"
ftp_pass = "skycgale@uw.edu"
base_dir = "pub/xuanjiw/APPx_For_SkyGale"
download_dir = "APPx_data"

main_files = [
    "nlat_APPx_25km.dat",
    "nlon_APPx_25km.dat",
    "nlat_APPx_25km.dat.README",
    "nlon_APPx_25km.dat.README",
    "read_extracted_caspr_params_months.pro"
]

data_subdirs = [
    "APPx_25km_Arctic_1982-2024_Monthly_1400_broadalb",
    "APPx_25km_Arctic_1982-2024_Monthly_1400_swdnsrf",
    "APPx_25km_Arctic_1982-2024_Monthly_1400_swupsrf",
    "APPx_25km_Arctic_1982-2024_Monthly_1400_landmask2"
]

# ---------- CONNECT & LOGIN ----------
ftp = FTP(ftp_host)
ftp.login(user=ftp_user, passwd=ftp_pass)
print(f"Connected to {ftp_host}")

# ---------- DOWNLOAD MAIN FILES ----------
os.makedirs(download_dir, exist_ok=True)
ftp.cwd("pub")
ftp.cwd("xuanjiw")
ftp.cwd("APPx_For_SkyGale")
print(f"Changed directory to: {base_dir}")
print(f"Contents of {base_dir}:")

# List directory contents
files = ftp.nlst()
for name in files:
    print("  -", name)

for filename in main_files:
    local_path = os.path.join(download_dir, filename)
    with open(local_path, "wb") as f:
        ftp.retrbinary(f"RETR {filename}", f.write)
        print(f"Downloaded: {filename}")

# ---------- DOWNLOAD SUBFOLDER CONTENTS ----------
for subdir in data_subdirs:
    ftp.cwd(subdir)
    local_subdir_path = os.path.join(download_dir, subdir)
    os.makedirs(local_subdir_path, exist_ok=True)

    ftp.cwd(full_remote_path)
    files = ftp.nlst()

    print(f"\nDownloading {len(files)} files from: {subdir}")
    for file in files:
        local_file_path = os.path.join(local_subdir_path, file)
        with open(local_file_path, "wb") as f:
            ftp.retrbinary(f"RETR {file}", f.write)
            print(f"  → {file}")

# ---------- CLEANUP ----------
ftp.quit()
print("\nAll downloads complete.")

In [7]:
import xarray as xr
import numpy as np
import xesmf as xe
import pyproj
import pandas as pd

In [9]:
# Get raw data
# u10_data = xr.open_dataset('./raw_data/ERA5_winds_data.nc').u10
# v10_data = xr.open_dataset('./raw_data/ERA5_winds_data.nc').v10
SCE_data1 = xr.open_dataset('/pscratch/sd/s/skygale/snow/Rutgers_SCE.nc')  # .snow_cover_extent
# SCE_data2 = xr.open_dataset('/pscratch/sd/s/skygale/snow/MODIS_Terra_SCE.nc')  # .snow_cover
ALB_data = xr.open_dataset('/pscratch/sd/s/skygale/snow/CLARA_ALB.nc')  # .white_sky_albedo_all_mean

datasets = [SCE_data1, ALB_data]
for dataset in datasets:
    print(dataset.dims)



In [10]:
ALB_data

### Lat & Lon Regrid

In [19]:
# --- Load dataset ---
ds = xr.open_dataset('/pscratch/sd/s/skygale/snow/CLARA_ALB.nc')

# --- Extract data variable ---
data_var_name = "white_sky_albedo_all_mean"
data_var = ds[data_var_name]

# --- Ensure time is datetime64 ---
if not np.issubdtype(ds.time.dtype, np.datetime64):
    ds["time"] = pd.to_datetime(ds.time.values)

# --- Subset to March–April, 1980–2022 ---
ds_sel = ds.sel(
    time=((ds.time.dt.year >= 1980) & (ds.time.dt.year <= 2022)) &
         (ds.time.dt.month.isin([3, 4]))
)

# --- Define target grid: global 2.5°x2.5° ---
lat_out = np.arange(-88.75, 88.751, 2.5)
lon_out = np.arange(1.25, 358.751, 2.5)

ds_out = xr.Dataset({
    "lat": (["lat"], lat_out),
    "lon": (["lon"], lon_out),
})

# --- Loop over each time slice and regrid ---
regridded_list = []

for t in ds_sel.time.values:
    da_t = ds_sel[data_var_name].sel(time=t)
    lat_t = ds["lat"].sel(time=t)  # time-varying
    lon_t = ds["lon"]  # static

    ds_src = xr.Dataset({
        "lat": (["y", "x"], lat_t.values),
        "lon": (["y", "x"], lon_t.values),
        "var": (["y", "x"], da_t.values)
    })

    # Regrid with nearest neighbor (safe for categorical/flagged snow data)
    regridder = xe.Regridder(ds_src, ds_out, method="nearest_s2d", periodic=True, reuse_weights=False)
    da_rg = regridder(ds_src["var"])
    da_rg = da_rg.expand_dims(time=[t])
    regridded_list.append(da_rg)

# --- Combine regridded time slices ---
regridded_all = xr.concat(regridded_list, dim="time")
regridded_all.name = data_var_name

# --- Optional: attach coords and save ---
regridded_all.coords["lat"] = lat_out
regridded_all.coords["lon"] = lon_out
regridded_all.coords["time"] = ds_sel.time.values

regridded_all

In [None]:
# Optional: Save
da_regridded.to_netcdf("./processed_data/ALB_gridded.nc")

### Simple Regrid

In [3]:
# --- Open your original dataset ---
ds = xr.open_dataset('/pscratch/sd/s/skygale/snow/MODIS_Terra_SCE.nc')
data_var = ds["snow_cover"]

# --- Ensure time is in datetime format ---
if not np.issubdtype(data_var.time.dtype, np.datetime64):
    data_var["time"] = pd.to_datetime(data_var.time.values)

# --- Select only March & April from 1980–2022 ---
da = data_var.sel(
    time=((data_var.time.dt.year >= 1980) & (data_var.time.dt.year <= 2022)) &
         (data_var.time.dt.month.isin([3, 4]))
)

# --- Clean & decode flag values ---
# Convert flag 254 and 255 to NaN (missing)
da_clean = da.where(~da.isin([254, 255]))

# Convert snow flags to binary snow (1) / no snow (0)
# Example: 100 = snow, 0 = no snow
# You may want to preserve other flags if needed
da_clean = xr.where(da_clean == 100, 1.0, xr.where(da_clean == 0, 0.0, np.nan))

# --- Ensure coordinate names ---
if "latitude" in da_clean.coords and "longitude" in da_clean.coords:
    da_clean = da_clean.rename({"latitude": "lat", "longitude": "lon"})

# --- Create global 2.5° grid within valid lat range ---
lat_min, lat_max = float(da.lat.min()), float(da.lat.max())

lats_full = np.arange(-88.75, 88.751, 2.5)
lons_full = np.arange(1.25, 358.751, 2.5)

lats_valid = lats_full[(lats_full >= lat_min) & (lats_full <= lat_max)]

ds_out = xr.Dataset({
    "lat": (["lat"], lats_valid),
    "lon": (["lon"], lons_full),
})

# --- Regrid safely ---
try:
    regridder = xe.Regridder(da_clean, ds_out, method="bilinear", periodic=True, reuse_weights=False)
except ValueError:
    print("Bilinear failed, falling back to nearest_s2d.")
    regridder = xe.Regridder(da_clean, ds_out, method="nearest_s2d", periodic=True, reuse_weights=False)

da_regridded = regridder(da_clean)

# --- Finalize ---
da_regridded.name = da_clean.name
da_regridded.attrs.update(da_clean.attrs)

# --- Done! ---
da_regridded

Bilinear failed, falling back to nearest_s2d.


In [6]:
# Optional: Save
da_regridded.to_netcdf("./processed_data/SCE2_gridded.nc")