In [None]:
import os

os.environ['AWS_PROFILE'] = 'admin'
os.environ['HAVEN_DATABASE'] = 'haven'

from mirrorverse.utils import read_data_w_cache

import h3
import plotly.express as px
from tqdm import tqdm
from collections import defaultdict
import pandas as pd
import numpy as np

In [None]:
sql = ''' 
select 
    round(lat, 1) as lat,
    round(lon, 1) as lon,
    avg(elevation) as elevation
from 
    elevation_uploads
group by 
    1, 2
'''
data = read_data_w_cache(sql)
print(data.shape)
data.head()

In [None]:
px.scatter_geo(
    data[data['elevation'] > 0].sample(10000), lat='lat', lon='lon',
    color='elevation',
)

In [None]:
water = defaultdict(list)
land = []
for _, row in tqdm(data.iterrows()):
    lat, lon, elevation = row['lat'], row['lon'], row['elevation']
    if elevation < 0:
        water[lat].append(lon)
    if elevation >= 0:
        land.append((lat, lon))

In [None]:
coastal = []
increment = 0.1
for lat, lon in tqdm(land):
    is_coastal = False
    for look_lat in [lat - increment, lat, lat + increment]:
        for look_lon in [lon - increment, lon, lon + increment]:
            if look_lon in water[look_lat]:
                is_coastal = True
                break
        if is_coastal:
            break
    if is_coastal:
        coastal.append((lat, lon))

coastal_df = pd.DataFrame(coastal, columns=['lat', 'lon'])
print(coastal_df.shape)
coastal_df.head()

In [None]:
px.scatter_geo(
    coastal_df, lat='lat', lon='lon',
)

In [None]:
px.scatter_geo(
    data[data['elevation'] < -2000].sample(10000), lat='lat', lon='lon',
    color='elevation',
)

In [None]:
basin = defaultdict(list)
coast = []
for _, row in tqdm(data.iterrows()):
    lat, lon, elevation = row['lat'], row['lon'], row['elevation']
    if elevation < -2000:
        basin[lat].append(lon)
    if 0 > elevation >= -2000:
        coast.append((lat, lon))

In [None]:
drop = []
increment = 0.1
for lat, lon in tqdm(coast):
    is_drop = False
    for look_lat in [lat - increment, lat, lat + increment]:
        for look_lon in [lon - increment, lon, lon + increment]:
            if look_lon in basin[look_lat]:
                is_drop = True
                break
        if is_drop:
            break
    if is_drop:
        drop.append((lat, lon))

drop_df = pd.DataFrame(drop, columns=['lat', 'lon'])
print(drop_df.shape)
drop_df.head()

In [None]:
px.scatter_geo(
    drop_df, lat='lat', lon='lon',
)

In [None]:
coastal_df['case'] = 'coastline'
drop_df['case'] = 'dropoff'
boundaries = pd.concat([coastal_df, drop_df])
px.scatter_geo(
    boundaries, lat='lat', lon='lon',
    color='case',
)

In [None]:
neighbors = {}
allowed = {}
to_close = {}

NUM_NEIGHBORS = 5
MAX_DISTANCE = 2
MIN_DISTANCE = 0.5

points = [
    [lat, lon] for lat, lon in zip(drop_df['lat'], drop_df['lon'])
]
for lat, lon in tqdm(points):
    drop_df['distance'] = ((drop_df['lat'] - lat) ** 2 + (drop_df['lon'] - lon) ** 2) ** 0.5
    df = drop_df[drop_df['distance'] > 0].sort_values('distance', ascending=True)
    neighbors[(lat, lon)] = {(lat_n, lon_n) for (lat_n, lon_n) in df[df['distance'] <= MAX_DISTANCE][['lat', 'lon']].head(NUM_NEIGHBORS).values.tolist()}
    allowed[(lat, lon)] = {(lat_n, lon_n) for (lat_n, lon_n) in df[df['distance'] <= MAX_DISTANCE][['lat', 'lon']].values.tolist()}
    to_close[(lat, lon)] = {(lat_n, lon_n) for (lat_n, lon_n) in df[df['distance'] <= MIN_DISTANCE][['lat', 'lon']].values.tolist()}

In [None]:
expanded_neighbors = {}
for (lat, lon), neighbor_set in tqdm(neighbors.items()):
    allowed_set = allowed[(lat, lon)]

    expanded_neighbors_set = {e for e in neighbor_set}

    while neighbor_set:
        next_level = set()
        for lat_n, lon_n in neighbor_set:
            new = neighbors[(lat_n, lon_n)]
            for lat_n2, lon_n2 in new:
                if (lat_n2, lon_n2) not in expanded_neighbors_set and (lat_n2, lon_n2) in allowed_set:
                    expanded_neighbors_set.add((lat_n2, lon_n2))
                    next_level.add((lat_n2, lon_n2))
        neighbor_set = next_level
    
    expanded_neighbors[(lat, lon)] = expanded_neighbors_set

In [None]:
rows = []
for i, ((lat, lon), neighbor_set) in tqdm(enumerate(expanded_neighbors.items())):
    neighbor_set -= to_close[(lat, lon)]
    for lat_n, lon_n in neighbor_set:
        rows.append((i, lat, lon, lat_n, lon_n))
df = pd.DataFrame(rows, columns=['i', 'lat', 'lon', 'lat_n', 'lon_n'])
df['distance'] = ((df['lat_n'] - df['lat']) ** 2 + (df['lon_n'] - df['lon']) ** 2) ** 0.5
df['lat_diff'] = df['lat'] - df['lat_n']
df['lon_diff'] = df['lon'] - df['lon_n']
df.loc[df['lon_diff'] < 0, 'lat_diff'] = -df['lat_diff']
df.loc[df['lon_diff'] < 0, 'lon_diff'] = -df['lon_diff']
df['lat_diff'] = df['lat_diff'] / df['distance']
df['lon_diff'] = df['lon_diff'] / df['distance']
df['angle'] = np.arctan2(df['lat_diff'], df['lon_diff'])
df = df.groupby(['lat', 'lon'])['angle'].mean().reset_index()
shelf_angle = df

In [None]:
px.scatter_geo(
    shelf_angle, lat='lat', lon='lon',
    color='angle',
)