## ASTE Plots

### Imports

In [11]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import csv
import os
import pandas as pd
import re # Import for regex
from matplotlib import gridspec

In [12]:
!pip install geopandas
import geopandas as gpd



In [3]:
#from shapely.geometry import Point

### Get all the OMG data

In [13]:
def get_all_nc_files(folder):
    nc_files = []
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.endswith(".nc"):
                nc_files.append(os.path.join(root, file))
                #nc_files.append(file)
    return nc_files

In [14]:
omg_09_2016 = get_all_nc_files('/Users/sherine_aldrin/Downloads/CoOL/ASTE_Greenland_Model/Monthly_OMG_2016/09_September')
omg_10_2016 = get_all_nc_files('/Users/sherine_aldrin/Downloads/CoOL/ASTE_Greenland_Model/Monthly_OMG_2016/10_October')
omg_10_2017 = get_all_nc_files('/Users/sherine_aldrin/Downloads/CoOL/ASTE_Greenland_Model/Monthly_OMG_2017/10_October')

#omg_09_2016[:3] for testing

omg_data = [
    (omg_09_2016, 176),   # index 176
    (omg_10_2016, 177),  # index 177
    (omg_10_2017, 189)   # index 189
]

### All the ASTE data

In [15]:
aste_base_path = "/Users/sherine_aldrin/Downloads/CoOL/ASTE_Greenland_Model"
asteData = [os.path.join(aste_base_path, f) for f in ["THETA.0005.nc", "THETA.0011.nc",
                "THETA.0012.nc", "THETA.0014.nc",
                "THETA.0015.nc", "THETA.0024.nc",
                "THETA.0027.nc", "SALT.0005.nc",
                "SALT.0011.nc", "SALT.0012.nc",
                "SALT.0014.nc", "SALT.0015.nc",
                "SALT.0024.nc", "SALT.0027.nc"]]


### Making the CSV file with all the data

In [16]:
# returns distances between my point and all the other points, from Prof
def great_circle_distance(lon_ref, lat_ref, Lon, Lat):
    earth_radius = 6371000
    lon_ref_radians = np.radians(lon_ref)
    lat_ref_radians = np.radians(lat_ref)
    lons_radians = np.radians(Lon)
    lats_radians = np.radians(Lat)
    lat_diff = lats_radians - lat_ref_radians
    lon_diff = lons_radians - lon_ref_radians
    d = np.sin(lat_diff * 0.5) ** 2 + np.cos(lat_ref_radians) * np.cos(lats_radians) * np.sin(lon_diff * 0.5) ** 2
    h = 2 * earth_radius * np.arcsin(np.sqrt(d))
    return(h)

In [17]:
# To have for the csv file
summary_rows = []

for omg_month, time_index in omg_data:
    # get each omg file itself
    for path in omg_month:
        omg = xr.open_dataset(path)
        lat_omg = omg.attrs["latitude"]
        lon_omg = omg.attrs["longitude"]

        #for each variable in the omg file
        for var_type in ["THETA", "SALT"]:
            closest_distance = 1e22 # placeholder for comparing later on
            closest_lat = None
            closest_lon = None
            closest_tile = None
            closest_profile = None

            for aste_file in asteData:
                file_var = os.path.basename(aste_file).split('.')[0]# to get theta or salinity
                
                if file_var != var_type:
                    continue

                aste = xr.open_dataset(aste_file)

                # Rename dimensions to standard ones (based on aste data)
                aste_renamed = aste.rename({'i1': 'time', 'i2': 'depth', 'i3': 'row', 'i4': 'col'})

                if var_type not in aste_renamed.variables:
                    print(f"Variable {var_type} not found in {aste_file}")
                    continue

                var = aste_renamed[var_type] # the variables, temperatures and salinities
                aste_lon = aste["lon"].values
                aste_lat = aste["lat"].values

                dists = great_circle_distance(lon_omg, lat_omg, aste_lon, aste_lat)

                if dists.min() < closest_distance:
                    closest_distance = dists.min()
                    closest_row, closest_col = np.unravel_index(np.argmin(dists), dists.shape)

                    profile = var[time_index, :, closest_row, closest_col].values

                    if np.any(profile != 0):
                        closest_profile = profile
                        closest_lat = aste_lat[closest_row, closest_col]
                        closest_lon = aste_lon[closest_row, closest_col]
                        closest_tile = aste_file

            if closest_profile is not None:
                ctd_filename = os.path.basename(path)


                date_part = ctd_filename.split("_")[1]
                year = date_part[:4]
                month = date_part[4:6]
                
                summary_rows.append({
                    "CTD_file": os.path.basename(path),
                    "Year": year,
                    "Month": month,
                    "ASTE_tile": closest_tile,
                    "ASTE_lat": closest_lat,
                    "Distance": closest_distance / 1000,
                    "ASTE_lon": closest_lon,
                    "Var_type": var_type,
                    "Profile": closest_profile.tolist()
                })

# Save results to CSV
with open("first_ASTE_profiles.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["CTD_file", "Year", "Month", "ASTE_tile", "ASTE_lat", "Distance", "ASTE_lon", "Var_type", "Profile"])
    writer.writeheader()
    writer.writerows(summary_rows)


print("Saved: first_ASTE_profiles.csv")

Saved: first_ASTE_profiles.csv


### At this point, I used QGIS's vector layer feature to figure out which points from the OMG/ASTE data points were in certain regions. I merged them onto a "joined" layer and added the "region" layer to the csv.

In [18]:
new_dir = "/Users/sherine_aldrin/Downloads/CoOL/ASTE_Greenland_Model"
os.chdir(new_dir)

print("Current directory is now:", os.getcwd())

Current directory is now: /Users/sherine_aldrin/Downloads/CoOL/ASTE_Greenland_Model


In [19]:
# Build lookup from CTD filename to time index
ctd_to_time_index = {}
for omg_month, time_index in omg_data:
    for path in omg_month:
        filename = os.path.basename(path)
        ctd_to_time_index[filename] = time_index

def get_aste_date(time_index):
    # Generate monthly dates from Jan 2002
    dates = pd.date_range(start='2002-01-01', periods=300, freq='MS')
    if time_index >= len(dates):
        return (np.nan, np.nan)
    date = dates[time_index]
    return date.year, date.month


# Load existing summary CSV (made from merging data in QGIS)
df = pd.read_csv("aste_regional.csv")

# Make a mapping from basename to full path for ASTE files
aste_files = {
    os.path.basename(f): f
    for f in asteData  # reuse original list of ASTE file paths
}

# Add new columns
df["ASTE_year"] = np.nan
df["ASTE_month"] = np.nan

# Fill new columns using the lookup and ASTE file info
for i, row in df.iterrows():
    ctd_file = row["CTD_file"]
    aste_tile = os.path.basename(row["ASTE_tile"])

    time_index = ctd_to_time_index.get(ctd_file, None)

    if time_index is not None:
        year, month = get_aste_date(time_index)
        df.at[i, "ASTE_year"] = year
        df.at[i, "ASTE_month"] = month


df.to_csv("FINAL_aste_regional.csv", index=False)
print("Updated CSV saved with ASTE_year and ASTE_month columns called FINAL_aste_regional.")


Updated CSV saved with ASTE_year and ASTE_month columns called FINAL_aste_regional.


### So now the csv with all the data is called "FINAL_aste_regional".

## Now the code below will make the plots

In [24]:
# Paths
csv_path = "FINAL_aste_regional.csv"
ctd_base_paths = {
    "2016": "./Monthly_OMG_2016",
    "2017": "./Monthly_OMG_2017",
}
plot_folder = "separated_gridspec_histograms"
os.makedirs(plot_folder, exist_ok=True)

# Load and Clean CSV
df = pd.read_csv(csv_path)

# Parse ASTE profile strings and remove trailing 0.0s
def clean_profile(profile_string):
    if pd.isna(profile_string):
        return []
    nums = re.findall(r'[-+]?\d*\.?\d+', profile_string)
    floats = [float(x) for x in nums if float(x) != 0.0]
    return floats

df['Profile'] = df['Profile'].apply(clean_profile) # actually clean up the column and get rid of the zeroes

# Functiom to read the omg CTD file
def read_ctd_profile(path, var_type):
    try:
        ds = xr.open_dataset(path)
        if var_type == "THETA":
            values = ds["potential_temperature"].values
        elif var_type == "SALT":
            values = ds["practical_salinity"].values
        else:
            return None, None
        depths = ds["depth"].values
        ds.close()
        return depths, values
    except:
        return None, None

# Compute Differences (this is what will be on the histograms)
diffs_by_group = {}
regions = sorted(df["Region"].dropna().unique()) # get each region

for (region, var_type), group in df.groupby(["Region", "Var_type"]):
    key = (region, var_type)
    region_diffs = []

    for _, row in group.iterrows():
        year_str = str(int(row["Year"]))
        month_num = int(row["Month"])
        month_name = pd.to_datetime(f"{year_str}-{month_num:02d}-01").strftime('%B')
        ctd_path = os.path.join(ctd_base_paths[year_str], f"{month_num:02d}_{month_name}", row["CTD_file"])

        if not os.path.exists(ctd_path):
            continue

        aste_profile = np.array(row["Profile"])
        ctd_depths, ctd_values = read_ctd_profile(ctd_path, var_type)

        if (
            aste_profile is None or len(aste_profile) == 0 or
            ctd_values is None or len(ctd_values) == 0 or
            ctd_depths is None or len(ctd_depths) == 0
        ):
            continue

        interp_len = min(len(aste_profile), len(ctd_values))
        common_depths = np.linspace(ctd_depths.min(), ctd_depths.max(), interp_len)
        ctd_interp = np.interp(common_depths, ctd_depths, ctd_values)
        aste_interp = np.interp(common_depths,
                                np.linspace(ctd_depths.min(), ctd_depths.max(), len(aste_profile)),
                                aste_profile)

        region_diffs.extend(aste_interp - ctd_interp)

    diffs_by_group[key] = region_diffs # the list of ASTE - CTD differences



region_positions = {
            "N": (0, 0),
            "NW":  (1, 0),
            "CW": (2, 0),
            "SW": (3, 0),
            "NE": (0, 1),
            "CE": (1, 1),
            "SE": (2, 1)
            }

regions = list(region_positions.keys())


# Plotting with GridSpec
for var_type in ["THETA", "SALT"]:
    fig = plt.figure(figsize=(20, 10))
    fig.suptitle(f"{var_type} Differences Across Regions", fontsize=16)
    gs = gridspec.GridSpec(4, 2, figure=fig)

    for region in regions:
        row, col = region_positions[region]
        ax = fig.add_subplot(gs[row, col])
        key = (region, var_type)
        diffs = diffs_by_group.get(key, [])

        if not diffs:
            ax.set_title(f"{region} (No Data)")
            ax.axis("off")
            continue

        # Clean diffs: remove extreme outliers
        diffs = np.array(diffs)
        diffs = diffs[~np.isnan(diffs)]
        diffs = diffs[np.abs(diffs) < 5]  # Remove outliers

        if len(diffs) == 0:
            ax.set_title(f"{region} (No Valid Data)")
            ax.axis("off")
            continue

        mean_diff = np.mean(diffs)
            
        ax.hist(diffs, bins=50, color="green", edgecolor="black") # more bins! See the differences clearer
        ax.axvline(mean_diff, color="red", linestyle="--", label=f"Mean = {mean_diff:.2f}")
        ax.set_title(region)
        ax.set_xlabel("ASTE - CTD")
        ax.set_ylabel("Count")
        ax.grid(True)
        ax.legend()


        plt.tight_layout(rect=[0, 0, 1, 0.95])


    # Saving the figure 
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    filename = f"{var_type}_gridspec_cleaned.png".replace(" ", "_")
    plt.savefig(os.path.join(plot_folder, filename), dpi=300)
    plt.close()

print(f"Saved plots to folder: {plot_folder}")


Saved plots to folder: separated_gridspec_histograms
