In [1]:
from pathlib import Path
import numpy as np
import xarray as xr
import geopandas as gp
import pylab as plt
import pandas as pd
from matplotlib import colors, cm
from matplotlib.colors import LightSource

from pypism import profiles
from pypism.profiles import extract_profile, process_profile
from pypism.utils import preprocess_nc
from pypism.hillshade import hillshade
from pypism.utils import qgis2cmap

In [2]:
profiles_path = Path("../tests/data/greenland-flux-gates-29_500m.gpkg")
profiles_path = Path("../data/greenland-flux-gates.gpkg")
profiles_gp = gp.read_file(profiles_path).rename(columns={"id": "profile_id"})
geom = profiles_gp.segmentize(500)
profiles_gp = gp.GeoDataFrame(profiles_gp, geometry=geom)

In [3]:
obs_file = Path("/Users/andy/Google Drive/My Drive/data/ITS_LIVE/GRE_G0240_0000.nc")
obs_ds = xr.open_dataset(obs_file)

In [4]:
pism_files = list(Path("../tests/data").glob("vel*.nc"))
pism_files = list(Path("/Users/andy/Google Drive/My Drive/Projects/gris-calib/data").glob("velsurf_mag_gris*.nc"))

In [5]:
sim_ds = xr.open_mfdataset(pism_files, 
                  preprocess=preprocess_nc,
                  concat_dim="exp_id",
                  combine="nested",
                  parallel=True)

In [6]:
profiles_gp

Unnamed: 0,profile_id,name,length,clon,clat,flightline,gtype,ftype,basin,geometry
0,148,Kangerdlugssuaq Gletscher,7208.0,-30.0285,67.50683,2.0,0.0,0,3.3,"LINESTRING (483605.659 -2294795.528, 483889.74..."
1,20,Helheimgletscher,7798.0,-32.2970,66.05382,2.0,0.0,0,4.1,"LINESTRING (304477.940 -2581625.466, 304515.55..."
2,15,Nioghalvfjerdsfiorden (79North),33179.0,-16.3719,79.97490,1.0,2.0,1,2.1,"LINESTRING (452477.632 -1081965.747, 452049.74..."
3,16,Zachariæ Isstrøm,26328.0,-16.0252,78.60965,1.0,2.0,1,2.1,"LINESTRING (491812.972 -1112203.670, 491609.74..."
4,12,Humboldt Gletscher,109909.0,-69.2475,79.58482,1.0,1.0,1,1.1,"LINESTRING (-342282.835 -1040260.781, -342617...."
...,...,...,...,...,...,...,...,...,...,...
175,175,Qooqqup Sermia,,,,0.0,4.0,0,5.0,"LINESTRING (-10354.025 -3181549.827, -9992.162..."
176,176,Qajuuttap Sermia,,,,0.0,,0,5.0,"LINESTRING (-43677.623 -3164116.053, -43364.00..."
177,177,Sermiligaarssuk Bræ,,,,1.0,,0,5.0,"LINESTRING (-173830.797 -3127615.042, -173467...."
178,178,Avannarleq Bræ,,,,1.0,,0,5.0,"LINESTRING (-210716.174 -3055524.016, -210450...."


In [7]:
import contextlib
import joblib
from joblib import Parallel, delayed
from tqdm.auto import tqdm

@contextlib.contextmanager
def tqdm_joblib(tqdm_object):
    """Context manager to patch joblib to report into tqdm progress bar given as argument"""

    class TqdmBatchCompletionCallback(joblib.parallel.BatchCompletionCallBack):
        """TQDM Callback"""

        def __call__(self, *args, **kwargs):
            tqdm_object.update(n=self.batch_size)
            return super().__call__(*args, **kwargs)

    old_batch_callback = joblib.parallel.BatchCompletionCallBack
    joblib.parallel.BatchCompletionCallBack = TqdmBatchCompletionCallback
    try:
        yield tqdm_object
    finally:
        joblib.parallel.BatchCompletionCallBack = old_batch_callback
        tqdm_object.close()


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
n_jobs = 8
with tqdm_joblib(tqdm(desc="Processing profiles", total=len(profiles_gp))) as progress_bar:
    result = Parallel(n_jobs=n_jobs)(
        delayed(process_profile)(profile, p,
            obs_ds,
            sim_ds
            )
            for p, profile in profiles_gp.iterrows()
        )
#profiles_stats = pd.concat(result)

  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
  rmsd = np.sqrt(np.n

In [9]:
from typing import Tuple

# def calculate_stats(df: pd.DataFrame, col1: str, col2: str) -> pd.DataFrame:
#     """
#     Calculate Pearson correlation and root mean square difference between two DataFrame columns.
#     """
#     pearson_r = df[col2].corr(df[col1])
#     rmsd = np.sqrt(np.nanmean((df[col1] - df[col2])**2))
#     return pd.DataFrame(data=[[pearson_r, rmsd]], columns=["pearson_r", "rmsd"])

# def process_profile(profile, p: int, obs_ds: xr.Dataset, sim_ds: xr.Dataset, crs: str = "epsg:3413") -> Tuple[xr.Dataset, xr.Dataset, pd.DataFrame]:

#     x, y = map(np.asarray, profile["geometry"].xy)
#     def extract_and_prepare(ds: xr.Dataset) -> xr.Dataset:
#         ds_profile = ds.profiles.extract_profile(x, y)
#         ds_profile = ds_profile.expand_dims(dim="profile_id")
#         ds_profile["profile_id"] = [p]
#         return ds_profile

#     obs_profile = extract_and_prepare(obs_ds)
#     sims_profile = extract_and_prepare(sim_ds)

#     obs_df = obs_profile.to_dataframe().reset_index()
#     sims_df = sims_profile.to_dataframe().reset_index()
    
#     profile_gp = gp.GeoDataFrame([profile], geometry=[profile.geometry], crs=crs)
#     intersection_keys = list(set(obs_df.columns) & set(sims_df.columns))
#     obs_sims_df = pd.merge(obs_df, sims_df, on=intersection_keys)
#     stats = obs_sims_df.groupby(by=['exp_id', 'profile_id']).apply(calculate_stats, col1="velsurf_mag", col2="v", include_groups=False)
#     stats_profile = stats.reset_index().assign(**profile_gp.iloc[0])
#     return obs_profile, sims_profile, stats_profile


In [10]:
    profile = profiles_gp.iloc[0]
    p = 0
    m = process_profile(profile, p,
            obs_ds,
            sim_ds
            )
    print(m[-1])

   profile_id                       name  length     clon      clat  \
0         148  Kangerdlugssuaq Gletscher  7208.0 -30.0285  67.50683   
1         148  Kangerdlugssuaq Gletscher  7208.0 -30.0285  67.50683   

   flightline  gtype  ftype  basin  \
0         2.0    0.0      0    3.3   
1         2.0    0.0      0    3.3   

                                            geometry exp_id  level_2  \
0  LINESTRING (483605.659 -2294795.528, 483889.74...  GRIMP        0   
1  LINESTRING (483605.659 -2294795.528, 483889.74...  RAGIS        0   

   pearson_r         rmsd  
0   0.959318  1731.330296  
1   0.930480  2241.289522  


In [11]:
obs_profiles = [r[:][0] for r in result]
sims_profiles = [r[:][1] for r in result]
stats_profiles = pd.concat([r[:][-1] for r in result]).reset_index()


In [None]:
qgis_colormap = Path("../tests/data/test_qgis_colorramp.txt")
cmap = qgis2cmap(qgis_colormap, name="speeds")

# Shade from the northwest, with the sun 45 degrees from horizontal
ls = LightSource(azdeg=315, altdeg=45)

# sel = .sel(x=slice(-210000, 200000), y=slice(-2100000, -2400000))

ds_dem = xr.open_dataset(Path("/Users/andy/Google Drive/My Drive/data/MCdataset/BedMachineGreenland-v5.nc"))
hs = hillshade(ds_dem["surface"], zf=5, normalize=True)
#hs.plot(cmap="Greys_r", vmin=0, vmax=1, add_colorbar=False)

jak_ds = obs_ds.sel(x=slice(-210000, 200000), y=slice(-2100000, -2400000))
jak_speed = jak_ds["v"].to_numpy()
jak_surface = ds_dem["surface"].interp_like(jak_ds).to_numpy()

gris_surface = ds_dem["surface"].interp_like(obs_ds).to_numpy()
gris_speed = obs_ds["v"].to_numpy()

norm = colors.Normalize(vmin=10.0, vmax=1500.0)
mapper = cm.ScalarMappable(norm=norm, cmap=cmap)
speed_img = mapper.to_rgba(gris_speed)

extent =[obs_ds["x"][0].to_numpy(), obs_ds["x"][-1].to_numpy(), obs_ds["y"][-1].to_numpy(), obs_ds["y"][0].to_numpy()]

ls = LightSource(azdeg=315, altdeg=45)
z = gris_surface
v = gris_speed
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
rgb = ls.shade_rgb(speed_img, elevation=z, vert_exag=0.5, blend_mode='overlay')
f = ax.imshow(rgb, extent=extent, origin="upper")
stats_profiles.plot(column="rmsd", cmap="RdYlGn_r", lw=2, ax=ax)

In [None]:
stats_profiles.crs

In [None]:
sigma = 25
for k, (_, profile) in enumerate(profiles_gp.iterrows()):
    print(profile["name"])
    obs_profile = obs_profiles[k].squeeze()
    sims_profile = sims_profiles[k].squeeze()
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.fill_between(obs_profile["profile_axis"], obs_profile["v"] - sigma * obs_profile["v_err"], 
                    obs_profile["v"] + sigma * obs_profile["v_err"], color="0.75")
    obs_profile["v"].plot.line(x="profile_axis", marker="o", ms=2, color="0.0", ax=ax)
    sims_profile["velsurf_mag"].plot.line(x="profile_axis", marker="o", ms=2, ax=ax)
    plt.title(profile["name"])

In [None]:
gp.GeoDataFrame(result[0][2], geometry=result[0][2]["geometry"], crs="epsg:3413")

In [None]:
obs_profile

In [None]:
profiles_gp

In [None]:
stats = obs_sims_df.groupby(by=['exp_id']).apply(calculate_stats, col1="velsurf_mag", col2="v", include_groups=False)

In [None]:
intersection_keys = list(set(obs.columns) & set(sims.columns))
merged = dd.merge(obs, sims, on=intersection_keys)
from dask.distributed import Client, progress
client = Client(threads_per_worker=4, n_workers=1)

s = merged.groupby(by=['profile_id', 'profile', 'exp_id']).apply(calculate_rmsd, col1="velsurf_mag", col2="v", meta=("float"))

In [None]:
obs_df = obs_profile.to_dataframe().reset_index()
sims_df = sims_profile.to_dataframe().reset_index()
intersection_keys = list(set(obs_df.columns) & set(sims_df.columns))
obs_sims_df = pd.merge(obs_df, sims_df, on=intersection_keys)

In [None]:
stats_df[0]

In [None]:
calculate_pearson_r(df, "v", "velsurf_mag")

In [None]:
pd.Da

In [None]:
df["velsurf_mag"]

In [None]:
np.corrcoef?

In [None]:
obs.keys()

In [None]:
calculate_rmsd(df, "velsurf_mag", "v")

In [None]:
for _, df in m.groupby(by=["profile_id", "exp_id"]):
    print(df)

In [None]:

def rmse(partition):
    return np.sqrt(partition['velsurf_mag'].values**2 - partition["v"].values**2)

a =  m.groupby(by=["profile_id", "exp_id"]).apply(rmse).reset_index()

In [None]:
m = pd.merge(obs, sims, on=intersection_keys)

In [None]:
gp.GeoDataFrame?

In [None]:
profile = profiles_gp.iloc[0]

merged

In [None]:
gp.GeoDataFrame([profile], geometry=[profile.geometry])

In [None]:
gp.GeoDataFrame(pd.DataFrame(profile.drop("geometry")).T, geometry=profile.geometry, crs="epsg:3413")

In [None]:
gp.GeoDataFrame.from_records(profile)