# In contours with sample MPAS

In [None]:
import itertools
import sys; sys.path.append("../")
import warnings

import geopandas as gpd
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
from shapely.errors import ShapelyDeprecationWarning
from shapely.geometry import MultiPolygon
from shapely.ops import unary_union

import tams

## Experimenting with single time step

In [None]:
ds = tams.load_example_mpas().isel(time=1)
tb = ds.tb
precip = ds.precip
precip.where(precip > 0).plot(norm=mpl.colors.LogNorm(vmin=1e-5))

In [None]:
res = tams.identify(tb)
cs235, cs219 = res[0][0], res[1][0]

In [None]:
tams.data_in_contours(precip, cs219[:5], merge=True)

In [None]:
cs235.head()

In [None]:
cs235.set_geometry("cs219").dissolve().cs219.geometry.values[0]

In [None]:
n1 = sum(len(mp.geoms) for mp in cs235.cs219)  # sum of polys over the multi-polys
n2 = len(cs235.set_geometry("cs219").dissolve().cs219.geometry.values[0].geoms)  # number of polys in dissolved multi-polys
print(n1, n2, cs235.inds219.apply(len).sum())

In [None]:
cs235.cs219.explode()

In [None]:
cs235.cs219.explode(index_parts=True)  # 235 index, 219 contour ind within that

In [None]:
tams.data_in_contours(
    precip,
    gpd.GeoDataFrame(crs="EPSG:4326", geometry=cs235.inds219.apply(lambda inds: MultiPolygon(cs219.geometry.iloc[inds].values)).rename("cs219")),
)

In [None]:
tams.data_in_contours(tb, gpd.GeoDataFrame(geometry=cs235.set_geometry("cs219").geometry))

In [None]:
tams.data_in_contours(tb, cs235.set_geometry("cs219", drop=True)).add_suffix("219")
# Without `drop=True` we get `AttributeError: 'DataFrame' object has no attribute 'geometry'`
# (seems like a bug)

In [None]:
_, ax = plt.subplots(figsize=(12, 5))
cs235.plot(ax=ax)
cs235.cs219.plot(facecolor="none", ax=ax)

In [None]:
# A check to see that the 235s have more count
tams.data_in_contours(tb, cs235, merge=True).count_tb

In [None]:
# 219 K stats inside MCSs

## Now with more time steps (adding `classify`)

In [None]:
ds = tams.load_example_mpas().isel(time=slice(1, 10))
tb = ds.tb
precip = ds.precip
times = ds.time.values

# Demonstrate some 219 holes
tb.isel(time=0).plot.contour(levels=[214, 219, 224], colors=["b", "0.3", "r"], linewidths=[0.7, 1.5, 0.7], size=10)
plt.gca().set(xlim=(90, 110), ylim=(-1, 6))

# To address, could use unary_region to smear over (essentially dropping the holes),
# or check temperature inside to determine whether should be hole and figure how to set it that way?

In [None]:
css235, css219 = tams.identify(tb)
cs = tams.track(css235, times)
cs = tams.classify(cs)
cs.head()

In [None]:
cs.cs219.crs

In [None]:
cs.index

In [None]:
cs["mcs_class"].value_counts()

### Selecting single MCS and time

In [None]:
mcs = cs.query("mcs_id == 9 and itime == 2")
mcs

In [None]:
mcs[["geometry", "cs219"]].dissolve().set_geometry("cs219", drop=True).explode().plot(fc="none")

In [None]:
mcs[["geometry", "cs219"]].set_geometry("cs219", drop=True).dissolve().plot(fc="none")

In [None]:
%timeit mcs[["geometry", "cs219"]].set_geometry("cs219", drop=True).dissolve()

In [None]:
mcs.crs  # preserved since we used .dissolve

In [None]:
MultiPolygon(itertools.chain.from_iterable((mp.geoms for mp in mcs.cs219.values)))

In [None]:
%timeit MultiPolygon(itertools.chain.from_iterable((mp.geoms for mp in mcs.cs219.values)))

In [None]:
unary_union(MultiPolygon(itertools.chain.from_iterable((mp.geoms for mp in mcs.cs219.values))))

In [None]:
%timeit unary_union(MultiPolygon(itertools.chain.from_iterable((mp.geoms for mp in mcs.cs219.values))))

### Selecting single MCS (multiple times)

to test out methods for constructing the MCS (time-resolved) and MCS-summary datasets

In [None]:
mcs = cs.query("mcs_id == 9")
mcs

In [None]:
mcs.groupby("time").apply(lambda g: MultiPolygon(g.geometry.values))

In [None]:
for t, g in mcs.groupby("time"): print(t, g[["geometry"]].dissolve())

In [None]:
mcs.groupby("time")[["geometry"]].apply(lambda g: g.dissolve())

In [None]:
gpd.GeoSeries(mcs[["time", "geometry"]].groupby("time").apply(lambda g: g.geometry.unary_union))

In [None]:
gpd.GeoSeries(mcs[["time", "geometry"]].groupby("time").apply(lambda g: unary_union(MultiPolygon(g.geometry.values))))

In [None]:
gpd.GeoSeries(mcs[["time", "geometry"]].groupby("time").apply(lambda g: MultiPolygon(g.geometry.values)))

### Agg over MCS CEs and times

In [None]:
# CE -> MCS

# TODO: inds219 isn't relevant anymore here, should remove in track

dfs_t = {}
res_t = {}
res_nt = {}
for mcs_id, mcs in cs.groupby("mcs_id"):
    
    # Time-varying
    time_group = mcs.groupby("time")
    d = {}
    
    with warnings.catch_warnings():
        # ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the  number of parts of a multi-part geometry.
        warnings.filterwarnings(
            "ignore",
            category=ShapelyDeprecationWarning,
            message="__len__ for multi-part geometries is deprecated",
        )
        # d["cs235"] = MultiPolygon(mcs.geometry.values)
        # d["cs219"] = MultiPolygon(itertools.chain.from_iterable((mp.geoms for mp in mcs.cs219.values)))
        # d["cs235"] = gpd.GeoSeries(mcs[["time", "geometry"]].groupby("time").apply(lambda g: MultiPolygon(g.geometry.values)))
        d["cs235"] = gpd.GeoSeries(time_group.apply(lambda g: MultiPolygon(g.geometry.values)))
        d["cs219"] = gpd.GeoSeries(time_group.apply(lambda g: MultiPolygon(itertools.chain.from_iterable((mp.geoms for mp in g.cs219.values)))))
    
    d["area_km2"] = time_group.area_km2.sum()
    d["area219_km2"] = time_group.area219_km2.sum()

    df = pd.DataFrame(d).reset_index()  # time -> column
    df["mcs_id"] = mcs_id
    assert mcs.mcs_class.unique().size == 1
    df["mcs_class"] = mcs.mcs_class.values[0]
    
    # Summary stuff
    d2 = {}
    times = mcs.time.unique()
    d2["first_time"] = times.min()
    d2["last_time"] = times.max()
    d2["duration"] = d2["last_time"] - d2["first_time"] + pd.Timedelta(hours=1)
    d2["mcs_id"] = mcs_id
    d2["mcs_class"] = mcs.mcs_class.values[0]

    # print(d)
    # print(d2)
    
    dfs_t[mcs_id] = df
    res_t[mcs_id] = d
    res_nt[mcs_id] = d2

In [None]:
pd.DataFrame(res_t[7])

In [None]:
mcs = gpd.GeoDataFrame(pd.concat(dfs_t.values()).reset_index(drop=True)).set_geometry("cs235", crs="EPSG:4326").convert_dtypes()
mcs.cs219 = mcs.cs219.set_crs("EPSG:4326")
mcs.mcs_class = mcs.mcs_class.astype("category")
mcs.head()

In [None]:
mcs.info()

In [None]:
mcs.query("mcs_id == 9")

### Add gridded data stats

In [None]:
# Add stats on gridded data for the different times
dfs = []
for t, g in mcs.groupby("time"):
    df1 = tams.data_in_contours(precip.sel(time=t), g, merge=True)
    df2 = tams.data_in_contours(precip.sel(time=t), g.set_geometry("cs219", drop=True), merge=False).add_suffix("219")
    df3 = tams.data_in_contours(tb.sel(time=t), g.set_geometry("cs219", drop=True), merge=False).add_suffix("219")
    df = df1.join(df2).join(df3).drop(columns=["count_precip219", ]).rename(columns={"count_precip": "npixel", "count_tb219": "npixel219"})
    dfs.append(df)
    
mcs = pd.concat(dfs)
mcs.head()

In [None]:
mcs.info()

In [None]:
mcs.geometry.boundary.plot()

In [None]:
mcs.cs219.boundary.plot(ec="r")

In [None]:
mcs.query("mcs_id == 9").set_index("time").area_km2.plot()

In [None]:
mcs.query("mcs_id == 9").set_index("time").mean_precip219.plot()

In [None]:
gpd.GeoDataFrame(res_t.values()).info()

In [None]:
mcs2 = pd.DataFrame(res_nt.values()).reset_index(drop=True)
mcs2.info()

In [None]:
mcs.cs235.to_crs("EPSG:32663").centroid.to_crs("EPSG:4326")
# Looks like no difference if project first  using this projection
# but warning avoided!

In [None]:
with warnings.catch_warnings():
    # UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
    warnings.filterwarnings(
        "ignore",
        category=UserWarning,
        message="Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect.",
    )
    p = mcs.cs235.centroid
    
p

### Add stats to MCS-summary df

In [None]:
mcs2.join(mcs.groupby("mcs_id")[["mean_precip", "mean_precip219", "mean_tb219", "area_km2", "area219_km2"]].mean())

In [None]:
mcs.attrs.update({"info": "this is the MCS dataset"})
mcs.info()

In [None]:
mcs.convert_dtypes().info()

In [None]:
mcs.assign(mcs_class=mcs.mcs_class.astype("category")).convert_dtypes().info()

### Add first and last loc to MCS-summary df

In [None]:
# Add initial and ending coords (from time-resolved dataset centroids)

def f(g):
    g.sort_values(by="time")  # should be already but just in case...
    cen = g.geometry.to_crs("EPSG:32663").centroid.to_crs("EPSG:4326")
    return gpd.GeoSeries({"first_centroid": cen.iloc[0], "last_centroid": cen.iloc[-1]})
    # return pd.Series({"first_centroid": cen.iloc[0], "last_centroid": cen.iloc[-1]})

points = gpd.GeoDataFrame(mcs.groupby("mcs_id").apply(f).astype("geometry"))
# points = mcs.groupby("mcs_id").apply(f).astype("geometry")
# ^ Initially we have GeoDataFrame but the columns don't have dtype geometry
# `.astype("geometry")` makes that conversion but we lose GeoDataFrame

# `.set_crs()` only works on a geometry column in a GeoDataFrame
points.first_centroid = points.first_centroid.set_crs("EPSG:4326")    
points.last_centroid = points.last_centroid.set_crs("EPSG:4326")    

assert points.first_centroid.crs == points.last_centroid.crs == "EPSG:4326"

points.info()

In [None]:
p = points.last_centroid.difference(points.first_centroid).to_crs("EPSG:32663")
p

In [None]:
points["distance_km"] = points.first_centroid.to_crs("EPSG:32663").distance(points.last_centroid.to_crs("EPSG:32663")) / 10**3
points

In [None]:
gpd.GeoDataFrame(mcs2).join(points).set_geometry("first_centroid").plot()

In [None]:
gpd.GeoDataFrame(mcs2).join(points).info()

In [None]:
gpd.GeoDataFrame(mcs2).join(points)