In [1]:
%reload_ext swagpy.jupyter

In [2]:
import numpy as np
import pandas as pd
from swagpy.api import extract_features

idx = pd.IndexSlice

In [5]:
def make_gridspace(
    x1: float, y1: float, x2: float, y2: float, x_size: int = 972, y_size: int = 635
) -> pd.DataFrame:

    x = np.linspace(x1, x2, x_size, dtype=np.float32)
    y = np.linspace(y1, y2, y_size, dtype=np.float32)

    grid = (
        pd.DataFrame(
            columns=pd.Index(x, name="lon"),
            index=pd.Index(y, name="lat"),
        )
        .unstack("lat")
        .reset_index()
        .dropna(axis=1)
    )
    grid["water_vapor"] = np.random.randint(0, 255, size=len(grid))
    grid["long_wave_ir"] = np.random.randint(0, 255, size=len(grid))
    return grid.set_index(["lat", "lon"]).sort_index(level=["lat", "lon"])


def make_features(
    x1: int, y1: int, x2: int, y2: int, n_features: int = 76_020
) -> pd.DataFrame:
    xx1 = np.random.randint(x1, x2, n_features)
    xx2 = np.random.randint(x1, x2, n_features)
    yy1 = np.random.randint(y2, y1, n_features)
    yy2 = np.random.randint(y2, y1, n_features)
    data = {
        "minx": np.min([xx1, xx2], axis=0),
        "maxx": np.max([xx1, xx2], axis=0),
        "miny": np.min([yy1, yy2], axis=0),
        "maxy": np.max([yy1, yy2], axis=0),
    }
    return pd.DataFrame(data)


floating1DArray = np.ndarray[tuple[int], np.floating]
unsignedinteger1DArray = np.ndarray[tuple[int], np.unsignedinteger]


def min_diff(
    target: floating1DArray, values: floating1DArray
) -> unsignedinteger1DArray:
    diff = abs(target[:, np.newaxis] - values)
    index = np.argmin(diff, axis=0)
    return index


def main():
    # setting up the gridspace
    xy_1 = -129, 54  # west, north
    xy_2 = -60, 20  # east, south
    x_size, y_size = 972, 635
    X1, Y1 = xy_1
    X2, Y2 = xy_2
    gs = make_gridspace(X1, Y1, X2, Y2, x_size=x_size, y_size=y_size)
    lat, lon = (gs.index.unique(crd).to_numpy() for crd in ("lat", "lon"))
    # FEATURES
    n_features = 76_020
    features = make_features(X1, Y1, X2, Y2, n_features=n_features)

    min_lon = min_diff(lon, features.minx.to_numpy())
    max_lon = min_diff(lon, features.maxx.to_numpy())
    min_lat = min_diff(lat, features.miny.to_numpy())
    max_lat = min_diff(lat, features.maxy.to_numpy())

    gs["water_vapor"].unstack("lat").to_numpy()[50:60, 20:40]

    features["water_vapor"] = extract_features(
        gs.unstack("lon")["water_vapor"].to_numpy(dtype=np.float32),
        min_lon,
        max_lon,
        min_lat,
        max_lat,
    )
    features["long_wave_ir"] = extract_features(
        gs.unstack("lon")["long_wave_ir"].to_numpy(dtype=np.float32),
        min_lon,
        max_lon,
        min_lat,
        max_lat,
    )
    return features


if __name__ == "__main__":
    features = main()
features

Unnamed: 0,minx,maxx,miny,maxy,water_vapor,long_wave_ir
0,-115,-66,38,43,"[[218.0, 137.0, 206.0, 161.0, 97.0, 70.0, 157....","[[20.0, 182.0, 177.0, 93.0, 52.0, 182.0, 21.0,..."
1,-116,-67,39,50,"[[251.0, 254.0, 237.0, 184.0, 134.0, 107.0, 69...","[[136.0, 231.0, 85.0, 2.0, 108.0, 55.0, 247.0,..."
2,-123,-84,44,53,"[[210.0, 92.0, 81.0, 234.0, 60.0, 23.0, 229.0,...","[[209.0, 57.0, 121.0, 147.0, 122.0, 15.0, 237...."
3,-82,-62,25,49,"[[98.0, 226.0, 230.0, 195.0, 208.0, 37.0, 42.0...","[[203.0, 138.0, 4.0, 197.0, 195.0, 163.0, 2.0,..."
4,-110,-85,34,46,"[[85.0, 117.0, 97.0, 205.0, 185.0, 125.0, 239....","[[17.0, 170.0, 21.0, 226.0, 52.0, 232.0, 246.0..."
...,...,...,...,...,...,...
76015,-114,-79,27,32,"[[69.0, 210.0, 49.0, 7.0, 45.0, 38.0, 60.0, 84...","[[124.0, 78.0, 37.0, 170.0, 3.0, 184.0, 127.0,..."
76016,-81,-69,28,45,"[[47.0, 193.0, 161.0, 176.0, 226.0, 156.0, 94....","[[119.0, 173.0, 150.0, 114.0, 129.0, 88.0, 44...."
76017,-104,-82,35,53,"[[112.0, 214.0, 118.0, 241.0, 154.0, 127.0, 32...","[[183.0, 159.0, 241.0, 248.0, 239.0, 204.0, 16..."
76018,-128,-101,47,52,"[[160.0, 157.0, 76.0, 128.0, 228.0, 154.0, 196...","[[10.0, 177.0, 72.0, 162.0, 233.0, 92.0, 55.0,..."
