In [None]:
# ----------------------------------------------------------------- #
#                              MODULES                              #

# Standard Modules
import os
import glob

# Third-Party Modules
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.ops import snap, linemerge, unary_union
from shapely.geometry import (
    box,
    LineString,
    MultiLineString,
    Point,
    MultiPoint,
    Polygon,
    MultiPolygon,
)
from scipy.spatial import cKDTree
import networkx as nx

#                                                                   #
# ----------------------------------------------------------------- #

# ----------------------------------------------------------------- #
#                             FUNCTIONS                             #


# Identify Pacific Border Waters
def build_us_waters_boundary(us_waters):
    us_waters_pacific_border = us_waters[
        (us_waters.REGION.isin(["Alaska", "US-Canada", "Pacific Coast"]))
        & (
            (us_waters.TS == 1)
            | (us_waters.REGION == "US-Canada")
            | us_waters.NOTE.astype(str).str.contains("Georgia")
        )
    ]
    us_waters_pacific_border = us_waters_pacific_border[["geometry"]].dissolve()

    clip_lon = -116.5
    bounds = us_waters_pacific_border.total_bounds  # [minx, miny, maxx, maxy]

    # Create a vertical clipping box: everything east of clip_lon
    clip_box = box(bounds[0], bounds[1], clip_lon, bounds[3])

    # Turn it into a GeoDataFrame
    clip_gdf = gpd.GeoDataFrame(geometry=[clip_box], crs=us_waters_pacific_border.crs)

    # Clip it
    us_waters_pacific_border = gpd.overlay(
        us_waters_pacific_border, clip_gdf, how="intersection"
    )

    return us_waters_pacific_border


def build_us_coastline(us_coastline):
    us_coastline_pacific = us_coastline[
        (us_coastline.NAME == "Pacific") & (us_coastline.MTFCC == "L4150")
    ]
    us_coastline_pacific = us_coastline_pacific[["geometry"]].dissolve()

    return us_coastline_pacific


def get_us_waters(us_waters_line, area_zone):
    bounds = us_waters_line.total_bounds  # [minx, miny, maxx, maxy]

    # GET US COASTLINE - CONTIGUOUS US
    if area_zone == "CONTIGUOUS":
        clip_lon_w = -140.5
        clip_lon_e = -115
        clip_lat_n = 50

        # Create a vertical clipping box: everything east of clip_lon
        clip_box = box(clip_lon_w, bounds[1], clip_lon_e, clip_lat_n)
    elif area_zone == "ALASKA":
        clip_lat_s = 50

        # Create a vertical clipping box: everything east of clip_lon
        clip_box = box(bounds[0], clip_lat_s, bounds[2], bounds[3])

    else:
        print(f"Area Zone Not Supported: {area_zone}")

    # Turn it into a GeoDataFrame
    clip_gdf = gpd.GeoDataFrame(geometry=[clip_box], crs=us_waters_line.crs)

    # Clip it
    us_waters_line = gpd.overlay(us_waters_line, clip_gdf, how="intersection")

    return us_waters_line


# Convert any closed LineStrings into Polygons temporarily
def line_to_poly_if_closed(geom):
    if isinstance(geom, LineString) and geom.is_ring:
        return Polygon(geom)
    return geom


def clean_connect_lines_fast(
    gdf, tolerance=0.0001, drop_polygons=True, find_polys=True
):
    """
    Snap, merge, and clean line geometries in a GeoDataFrame.
    Optimized for performance by reducing redundant operations.
    """
    # Merge all geometries at once (no need to dissolve)
    merged_geom = unary_union(gdf.geometry)

    # Snap all geometries to the merged base geometry (once!)
    snapped = snap(merged_geom, merged_geom, tolerance)

    # Linemerge to connect touching line segments
    merged_lines = linemerge(snapped)

    # Handle case where result is a single LineString, not Multi
    if merged_lines.geom_type == "LineString":
        geoms = [merged_lines]
    elif merged_lines.geom_type == "MultiLineString":
        geoms = list(merged_lines.geoms)
    else:
        raise ValueError("Unexpected geometry type after linemerge")

    # Convert to GeoDataFrame
    result_gdf = gpd.GeoDataFrame(geometry=geoms, crs=gdf.crs)

    if find_polys:
        # Only apply polygon conversion if requested
        result_gdf["geometry"] = result_gdf["geometry"].apply(line_to_poly_if_closed)

        if drop_polygons:
            result_gdf = result_gdf[result_gdf.geometry.type != "Polygon"]

    return result_gdf.reset_index(drop=True)


def round_geometry_coords(geom, precision=4):
    """
    Rounds coordinates of a shapely geometry to a given decimal precision.
    Supports Point, LineString, Polygon, and Multi* types.

    Parameters:
        geom (shapely geometry): The geometry to round.
        precision (int): Number of decimal places.

    Returns:
        shapely geometry with rounded coordinates.
    """
    if geom.is_empty:
        return geom

    def round_coords(coords):
        return [tuple(round(c, precision) for c in coord) for coord in coords]

    if geom.geom_type == "Point":
        return Point(*round_coords([geom.coords[0]])[0])

    elif geom.geom_type == "LineString":
        return LineString(round_coords(geom.coords))

    elif geom.geom_type == "Polygon":
        return Polygon(
            round_coords(geom.exterior.coords),
            [round_coords(ring.coords) for ring in geom.interiors],
        )

    elif geom.geom_type == "MultiPoint":
        return MultiPoint(
            [Point(*pt) for pt in round_coords([p.coords[0] for p in geom.geoms])]
        )

    elif geom.geom_type == "MultiLineString":
        return MultiLineString(
            [LineString(round_coords(geom_part.coords)) for geom_part in geom.geoms]
        )

    elif geom.geom_type == "MultiPolygon":
        return MultiPolygon(
            [
                Polygon(
                    round_coords(p.exterior.coords),
                    [round_coords(ring.coords) for ring in p.interiors],
                )
                for p in geom.geoms
            ]
        )

    else:
        raise ValueError(f"Geometry type {geom.geom_type} not supported.")


def connect_lines_via_endpoints_fast(gdf, endpoint_col="endpoints", tolerance=0.0001):
    """
    Connect lines where any endpoints are within `tolerance` of each other.

    Parameters:
        gdf (GeoDataFrame): Must have:
            - geometry: LineStrings
            - endpoint_col: tuple of (start_point, end_point)
        tolerance (float): Distance threshold for connection (degrees if WGS84).

    Returns:
        GeoDataFrame with connected LineStrings merged.
    """

    # --- Step 1: Explode endpoints into point list with line index ---
    point_records = []
    for idx, (pt1, pt2) in gdf[endpoint_col].items():
        point_records.append((idx, pt1.x, pt1.y))
        point_records.append((idx, pt2.x, pt2.y))

    df_pts = pd.DataFrame(point_records, columns=["line_idx", "x", "y"])
    points_array = df_pts[["x", "y"]].to_numpy()

    # --- Step 2: KDTree to find all endpoint pairs within tolerance ---
    tree = cKDTree(points_array)
    pairs = tree.query_pairs(r=tolerance)

    # --- Step 3: Build graph from endpoint connections ---
    G = nx.Graph()
    G.add_nodes_from(gdf.index)

    for i, j in pairs:
        idx_i = df_pts.loc[i, "line_idx"]
        idx_j = df_pts.loc[j, "line_idx"]
        if idx_i != idx_j:
            G.add_edge(idx_i, idx_j)

    # --- Step 4: Merge LineStrings per connected component ---
    merged_lines = []
    for component in nx.connected_components(G):
        lines = gdf.loc[list(component), "geometry"]
        unioned = unary_union(lines)
        merged = linemerge(unioned)
        if merged.geom_type == "MultiLineString":
            merged_lines.extend(merged.geoms)
        else:
            merged_lines.append(merged)

    return gpd.GeoDataFrame(geometry=merged_lines, crs=gdf.crs).reset_index(drop=True)


def extract_endpoints(line):
    coords = list(line.coords)
    return Point(coords[0]), Point(coords[-1])


def round_point(pt, decimals=4):
    return Point(round(pt.x, decimals), round(pt.y, decimals))


def get_smoothed_coastline(us_coastline_pacific_contiguous, tolerance=0.001):
    us_coastline_pacific_contiguous["geometry"] = us_coastline_pacific_contiguous[
        "geometry"
    ].apply(lambda g: round_geometry_coords(g, precision=5))

    us_coastline_pacific_contiguous = us_coastline_pacific_contiguous.explode()
    us_coastline_pacific_contiguous = us_coastline_pacific_contiguous.dissolve()

    # union all geometries into one MultiLineString
    us_coastline_pacific_contiguous["geometry"] = linemerge(
        unary_union(us_coastline_pacific_contiguous.geometry)
    )

    us_coastline_pacific_contiguous = us_coastline_pacific_contiguous.explode()
    us_coastline_pacific_contiguous["endpoints"] = us_coastline_pacific_contiguous[
        "geometry"
    ].apply(extract_endpoints)

    us_coastline_pacific_contiguous["endpoints"] = us_coastline_pacific_contiguous[
        "endpoints"
    ].apply(lambda ep: (round_point(ep[0]), round_point(ep[1])))

    us_coastline_pacific_contiguous = connect_lines_via_endpoints_fast(
        us_coastline_pacific_contiguous, tolerance=tolerance
    )
    us_coastline_pacific_contiguous = (
        us_coastline_pacific_contiguous.dissolve().explode().reset_index(drop=True)
    )

    us_coastline_pacific_contiguous = us_coastline_pacific_contiguous.to_crs(
        epsg=5070
    )  # Albers Equal Area (USA-wide)
    us_coastline_pacific_contiguous["length"] = us_coastline_pacific_contiguous.length
    us_coastline_pacific_contiguous = us_coastline_pacific_contiguous.to_crs(epsg=4326)

    return us_coastline_pacific_contiguous


def find_closed_lines(gdf):
    """
    Returns a GeoDataFrame of LineStrings that form closed loops (rings).
    """
    is_closed = gdf["geometry"].apply(lambda g: g.is_ring)
    return gdf[is_closed].copy()


def connect_lines_to_polygon(line1, line2, tolerance=1e-9):
    """
    Connects two LineStrings and returns a Polygon if they form a closed ring.
    Raises error if not topologically closable.
    """
    coords1 = list(line1.coords)
    coords2 = list(line2.coords)

    start1, end1 = Point(coords1[0]), Point(coords1[-1])
    start2, end2 = Point(coords2[0]), Point(coords2[-1])

    # Define closeness
    def close(p1, p2):
        return p1.distance(p2) < tolerance

    # Try all orientations to find one that forms a loop
    options = [
        coords1 + coords2[1:],  # end1 to start2
        coords1 + coords2[::-1][1:],  # end1 to end2
        coords2 + coords1[1:],  # end2 to start1
        coords2[::-1] + coords1[1:],  # start2 to start1
    ]

    for option in options:
        if Point(option[0]).distance(Point(option[-1])) < tolerance:
            return Polygon(option)

    raise ValueError("Cannot connect lines into a closed polygon.")


def linestrings_to_polygons_if_closed(gdf):
    """
    Converts closed LineStrings into Polygons. Leaves open lines unchanged.

    Parameters:
        gdf (GeoDataFrame): Must contain 'geometry' column with LineStrings.

    Returns:
        GeoDataFrame with closed loops turned into Polygons.
    """

    def convert_if_ring(geom):
        if isinstance(geom, LineString) and geom.is_ring:
            return Polygon(geom)
        return geom

    gdf = gdf.copy()
    gdf["geometry"] = gdf["geometry"].apply(convert_if_ring)
    return gdf


#                                                                   #
# ----------------------------------------------------------------- #

In [None]:
base_path = (
    "/Users/tylerstevenson/Documents/CODE/FindMyWhale/data/processed/GIS/RAW/NECESSARY"
)

In [None]:
# Marine Area Geometries
ca_regions_path = f"{base_path}/FederalMarineBioregions_SHP/FederalMarineBioregions.shp"
ca_bc_marine_geometries_path = f"{base_path}/ia_geography_sog.gdb"

# us_wa_marine_geometries_path = (
#     f"{base_path}/WSDOT_-_Major_Shorelines/WSDOT_-_Major_Shorelines.shp"
# )
wsdot_shorelines = f"{base_path}/WSDOT_-_Major_Shorelines/WSDOT_-_Major_Shorelines.shp"
ws_marine_shoreline_type_path = f"{base_path}/shstmp-ps-marine-shorelines-2018-nwfsc/SHSTMP_PS_Marine_Shorelines_2018.shp"
us_coastline_path = f"{base_path}/tl_2022_us_coastline/tl_2022_us_coastline.shp"

tz_file_path = f"{base_path}/World_12NM_v4_20231025/eez_12nm_v4.shp"
us_waters_path = (
    f"{base_path}/USMaritimeLimitsAndBoundariesSHP/USMaritimeLimitsNBoundaries.shp"
)

In [None]:
# Get International Waters Line - US Maritime Limits and Boundaries (NOAA)
us_waters = gpd.read_file(us_waters_path)
us_coastline = gpd.read_file(us_coastline_path)

### Build US Waters Polygons 

- Coastline to Territorial Waters
- Coastline to EEZ Waters

Using: 
- US Census Coastline Tiger Line File (Census) : https://catalog.data.gov/dataset/tiger-line-shapefile-2022-nation-u-s-coastline
- US Maritime Limits + Nautical Boundaries (NOAA)

#### Coastline to Territorial Waters

In [None]:
#########################################
# Get US Territorial Waters Border
## Full Pacific Borders
us_waters_pacific_border = build_us_waters_boundary(us_waters)
us_waters_pacific_border = us_waters_pacific_border.to_crs("EPSG:4326")

## US - Pacific Border (Contiguous)
us_waters_pacific_border_contiguous = get_us_waters(
    us_waters_pacific_border, "CONTIGUOUS"
)
## US - Simplify Geometry and Clean Border
us_waters_pacific_border_contiguous = clean_connect_lines_fast(
    us_waters_pacific_border_contiguous
)

## US - Pacific Border (Alaska)
us_waters_pacific_border_alaska = get_us_waters(us_waters_pacific_border, "ALASKA")

## US - Simplify Geometry and Clean Border
us_waters_pacific_border_alaska = clean_connect_lines_fast(
    us_waters_pacific_border_alaska
)

In [None]:
## Full Pacific Coastline
us_coastline_pacific = build_us_coastline(us_coastline)
us_coastline_pacific = us_coastline_pacific.to_crs("EPSG:4326")

## Get US - Territorial Zone (Contiguous United States)

In [None]:
#########################################
# Get US Coastline
## US - Pacific Border (Contiguous)
us_coastline_pacific_contiguous = get_us_waters(us_coastline_pacific, "CONTIGUOUS")

## US -  Simplify Geometry and Smooth (Contiguous)
us_coastline_pacific_contiguous = get_smoothed_coastline(
    us_coastline_pacific_contiguous
)

# Get Territorial Waters Polygon - Get Coast Line
us_coastline_pacific_contiguous = us_coastline_pacific_contiguous.explode()
us_coastline_pacific_contiguous_cs = us_coastline_pacific_contiguous[
    us_coastline_pacific_contiguous.length
    == us_coastline_pacific_contiguous.length.max()
]

# Get Island Polygons from Coastlines
us_coastline_pacific_contiguous_is = us_coastline_pacific_contiguous[
    us_coastline_pacific_contiguous.length
    < us_coastline_pacific_contiguous.length.max()
]
us_coastline_pacific_contiguous_is = find_closed_lines(
    us_coastline_pacific_contiguous_is
)
us_coastline_pacific_contiguous_is = linestrings_to_polygons_if_closed(
    us_coastline_pacific_contiguous_is
)

us_waters_continguous = pd.concat(
    [us_coastline_pacific_contiguous_cs, us_waters_pacific_border_contiguous]
)
us_waters_continguous = us_waters_continguous.dissolve()
us_waters_continguous = us_waters_continguous.explode()

us_waters_continguous = get_smoothed_coastline(us_waters_continguous, tolerance=1)
us_waters_continguous = us_waters_continguous[
    us_waters_continguous["length"] != us_waters_continguous["length"].min()
]
us_waters_continguous = connect_lines_to_polygon(
    us_waters_continguous.iloc[0].geometry,
    us_waters_continguous.iloc[1].geometry,
    tolerance=1e-9,
)
us_waters_continguous = gpd.GeoDataFrame(
    geometry=[us_waters_continguous], crs=us_coastline_pacific_contiguous_cs.crs
)


# Clip Out Islands
islands_union = unary_union(us_coastline_pacific_contiguous_is.geometry)
us_waters_continguous["geometry"] = us_waters_continguous.geometry.difference(
    islands_union
)

wsdot_shorelines = gpd.read_file(wsdot_shorelines)
wsdot_shorelines = wsdot_shorelines.to_crs("EPSG:4326")

# Get Bounds of Contiguous US to Fill In Areas
us_waters_continguous_exterior = us_waters_continguous.explode()
us_waters_continguous_exterior = us_waters_continguous_exterior["geometry"].apply(
    lambda x: Polygon(x.exterior)
)

wsdot_shorelines_add = wsdot_shorelines.clip(us_waters_continguous_exterior)

# Bounding box for Point Edwards
point_edwards = box(-123.0950, 48.90, -123.00, 49.0021)
point_edwards = gpd.GeoDataFrame(
    {"name": ["Point Roberts"]}, geometry=[point_edwards], crs="EPSG:4326"
)
point_edwards_water = wsdot_shorelines.clip(point_edwards)
point_edwards = point_edwards.difference(point_edwards_water).reset_index()
us_waters_continguous = us_waters_continguous.difference(point_edwards).reset_index()
us_waters_continguous.columns = ["", "geometry"]
us_waters_continguous = us_waters_continguous[["geometry"]]

# Bounding box for Sequim
sequim_bbox = box(-123.20, 47.53, -122.00, 48.15)
sequim_bbox = gpd.GeoDataFrame(geometry=[sequim_bbox], crs="EPSG:4326")
sequim_bbox = wsdot_shorelines.clip(sequim_bbox)

# Bounding box for Puget
puget_bbox = box(-123.90, 46.50, -120.60, 48.0)
puget_bbox = gpd.GeoDataFrame(geometry=[puget_bbox], crs="EPSG:4326")
puget_bbox = wsdot_shorelines.clip(puget_bbox)

# Bounding box for Deception Pass
everett_bbox = box(-122.90, 47.9000, -120.1000, 49.000)
everett_bbox = gpd.GeoDataFrame(geometry=[everett_bbox], crs="EPSG:4326")
everett_bbox = wsdot_shorelines.clip(everett_bbox)

# Add WSDOT Corrections
us_waters_continguous = pd.concat(
    [us_waters_continguous, wsdot_shorelines_add, puget_bbox, sequim_bbox, everett_bbox]
)
us_waters_continguous = gpd.GeoDataFrame(
    us_waters_continguous, geometry="geometry", crs="EPSG:4326"
)
us_waters_continguous = us_waters_continguous.dissolve()

islands_puget_bbox = box(-122.80, 48.3000, -120.1000, 48.750)
islands_puget_bbox = gpd.GeoDataFrame(geometry=[islands_puget_bbox], crs="EPSG:4326")
islands_puget_bbox = wsdot_shorelines.clip(islands_puget_bbox)

gf = gpd.read_file(ws_marine_shoreline_type_path)
gf = gf[["geometry"]].dissolve().explode()
gf = gf.to_crs("EPSG:4326")
gf = gf.clip(islands_puget_bbox)
gf = gf.dissolve().explode()
gf = get_smoothed_coastline(gf, tolerance=2)
gf = linestrings_to_polygons_if_closed(gf)
gf = gf[gf.geometry.type == "Polygon"]
gf = gf.dissolve()

us_waters_continguous["geometry"] = us_waters_continguous.difference(gf)
us_waters_continguous["NAME"] = "UNITED_STATES"
us_waters_continguous["AREA"] = "CONTIGUOUS"
us_waters_continguous["TYPE"] = "TERRITORIAL"

## Get US - Territorial Zone (Alaska United States)

In [None]:
####################

## US - Pacific Border (Alaska)
us_coastline_pacific_alaska = get_us_waters(us_coastline_pacific, "ALASKA")

## US -  Simplify Geometry and Smooth (Alaska)
us_coastline_pacific_alaska = get_smoothed_coastline(us_coastline_pacific_alaska)

# Get Territorial Waters Polygon - Get Coast Line
us_coastline_pacific_alaska = us_coastline_pacific_alaska.explode()
us_coastline_pacific_alaska_cs = us_coastline_pacific_alaska[
    us_coastline_pacific_alaska.length == us_coastline_pacific_alaska.length.max()
]

# Get Island Polygons from Coastlines
us_coastline_pacific_alaska_is = us_coastline_pacific_alaska[
    us_coastline_pacific_alaska.length < us_coastline_pacific_alaska.length.max()
]
us_coastline_pacific_alaska_is = find_closed_lines(us_coastline_pacific_alaska_is)
us_coastline_pacific_alaska_is = linestrings_to_polygons_if_closed(
    us_coastline_pacific_alaska_is
)

In [None]:
min_lon = -180.0  # Wrapping antimeridian west of Alaska
max_lon = -130.0  # East border of Alaska / Yukon
min_lat = 65.5
max_lat = 72.0

# Make the box
beringia_box = box(min_lon, min_lat, max_lon, max_lat)
beringia_gdf = gpd.GeoDataFrame(
    {"name": ["Beringia + Alaska to Canada"]}, geometry=[beringia_box], crs="EPSG:4326"
)

# Filter to Southern Alaska
us_waters_pacific_border_alaska = us_waters_pacific_border_alaska.dissolve()
us_waters_pacific_border_alaska["geometry"] = (
    us_waters_pacific_border_alaska.difference(beringia_gdf.geometry)
)

In [None]:
us_waters_alaska = pd.concat(
    [us_coastline_pacific_alaska_cs, us_waters_pacific_border_alaska]
)
us_waters_alaska = us_waters_alaska.dissolve()
us_waters_alaska = us_waters_alaska.explode()

us_waters_alaska = get_smoothed_coastline(us_waters_alaska, tolerance=5)
us_waters_alaska = us_waters_alaska[
    us_waters_alaska["length"] != us_waters_alaska["length"].min()
]

us_waters_alaska = us_waters_alaska[us_waters_alaska.index != 6]

us_waters_alaska["start_point"] = us_waters_alaska.geometry.apply(
    lambda geom: Point(geom.coords[0])
)
us_waters_alaska["end_point"] = us_waters_alaska.geometry.apply(
    lambda geom: Point(geom.coords[-1])
)

us_waters_alaska = us_waters_alaska.drop([0, 4])

In [None]:
from shapely.geometry import LineString, Point
from shapely.geometry import LineString


def linestring_to_polygon(line, tolerance=1e-9):
    start = Point(line.coords[0])
    end = Point(line.coords[-1])

    if start.distance(end) > tolerance:
        # Add the start point at the end to close it
        closed_coords = list(line.coords) + [line.coords[0]]
    else:
        closed_coords = list(line.coords)

    polygon = Polygon(closed_coords)
    return polygon


line3 = us_waters_alaska.loc[3, "geometry"]
line2 = us_waters_alaska.loc[2, "geometry"]

l3_start, l3_end = Point(line3.coords[0]), Point(line3.coords[-1])
l2_start, l2_end = Point(line2.coords[0]), Point(line2.coords[-1])

# If line3's end is NOT close to line2's start, reverse line3
if l3_end.distance(l2_start) > l3_start.distance(l2_start):
    print("Reversing line 3 to match line 2 start")
    line3 = LineString(list(line3.coords)[::-1])

combined_coords = list(line3.coords) + list(line2.coords)
combined_line = LineString(combined_coords)

us_waters_alaska.loc["3+2"] = combined_line

# Optionally drop old rows
us_waters_alaska = us_waters_alaska.drop([2, 3])

##############

line3 = us_waters_alaska.loc["3+2", "geometry"]
line2 = us_waters_alaska.loc[1, "geometry"]

l3_start, l3_end = Point(line3.coords[0]), Point(line3.coords[-1])
l2_start, l2_end = Point(line2.coords[0]), Point(line2.coords[-1])


# If line3's end is NOT close to line2's start, reverse line3
if l3_end.distance(l2_start) > l3_start.distance(l2_start):
    print("Reversing line 3 to match line 2 start")
    line3 = LineString(list(line3.coords)[::-1])

combined_coords = list(line3.coords) + list(line2.coords)
combined_line = LineString(combined_coords)

us_waters_alaska.loc["3+2+1"] = combined_line

# Optionally drop old rows
us_waters_alaska = us_waters_alaska.drop([1, "3+2"])


line3 = us_waters_alaska.loc["3+2+1", "geometry"]
line2 = us_waters_alaska.loc[5, "geometry"]

l3_start, l3_end = Point(line3.coords[0]), Point(line3.coords[-1])
l2_start, l2_end = Point(line2.coords[0]), Point(line2.coords[-1])

# If line3's end is NOT close to line2's start, reverse line3
if l3_end.distance(l2_start) > l3_start.distance(l2_start):
    print("Reversing line 3 to match line 2 start")
    line3 = LineString(list(line3.coords)[::-1])

combined_coords = list(line3.coords) + list(line2.coords)
combined_line = LineString(combined_coords)

us_waters_alaska.loc["5+3+2+1"] = combined_line

# Optionally drop old rows
us_waters_alaska = us_waters_alaska.drop([5, "3+2+1", 8])

us_waters_alaska = gpd.GeoDataFrame(
    us_waters_alaska, geometry="geometry", crs="EPSG:4326"
)


alaska_poly = linestring_to_polygon(us_waters_alaska.geometry.iloc[0])
us_waters_alaska = gpd.GeoDataFrame(geometry=[alaska_poly], crs="EPSG:4326")

# Add Back in Islands
us_coastline_pacific_alaska_is = us_coastline_pacific_alaska_is[["geometry"]].dissolve()
us_coastline_pacific_alaska_is["geometry"] = us_coastline_pacific_alaska_is.buffer(0)

us_waters_alaska_ = us_waters_alaska.copy()
us_waters_alaska_["geometry"] = us_waters_alaska_.buffer(0)

us_coastline_pacific_alaska_is_outside = gpd.overlay(
    us_coastline_pacific_alaska_is, us_waters_alaska_, how="difference"
)

In [None]:
us_waters_alaska_["geometry"] = us_waters_alaska_.dissolve().difference(
    us_coastline_pacific_alaska_is.dissolve()
)

In [None]:
us_waters_alaska_

In [None]:
us_waters_alaska_ = pd.concat(
    [us_waters_alaska_, us_coastline_pacific_alaska_is_outside]
)
us_waters_alaska = gpd.GeoDataFrame(
    us_waters_alaska_, geometry="geometry", crs="EPSG:4326"
)
us_waters_alaska = us_waters_alaska.dissolve()
us_waters_alaska = us_waters_alaska[["geometry"]]
us_waters_alaska["NAME"] = "UNITED_STATES"
us_waters_alaska["AREA"] = "CONTIGUOUS"
us_waters_alaska["TYPE"] = "TERRITORIAL"

us_waters_alaska = us_waters_alaska.dropna()

### Canada Waters - Territorial

In [None]:
# Open CA Files
ca_ = gpd.read_file(ca_regions_path)
bc_ca = ca_[ca_.OCEAN_E == "Pacific"]
bc_ca = bc_ca.to_crs("EPSG:4326")

tz = gpd.read_file(tz_file_path)
tz = tz[tz.SOVEREIGN1 == "Canada"]
tz = tz.to_crs("EPSG:4326")

# British Columbia full bounding box
minx, miny = -140.30, 40.25
maxx, maxy = -122.00, 60.50

bc_bbox = box(minx, miny, maxx, maxy)
bc_bbox = gpd.GeoDataFrame(
    {"name": ["British Columbia"]}, geometry=[bc_bbox], crs="EPSG:4326"
)

tz_new = tz.explode()
tz_new["geometry"] = tz_new.buffer(0.01)
tz_new = tz_new.clip(bc_bbox)

# British Columbia full bounding box
minx, miny = -133.25, 54.4
maxx, maxy = -132.85, 54.6

bc_tt_bbox_add = box(minx, miny, maxx, maxy)
bc_tt_bbox_add = gpd.GeoDataFrame(
    {"name": ["British Columbia"]}, geometry=[bc_tt_bbox_add], crs="EPSG:4326"
)

tz_new = pd.concat([bc_tt_bbox_add, tz_new])
tz_new = tz_new.dissolve()

bc_ca = bc_ca.dissolve()
bc_ca["geometry"] = bc_ca.difference(tz_new.dissolve())
bc_ca = bc_ca.explode()
bc_ca = bc_ca[bc_ca.area != bc_ca.area.max()]
bc_ca = bc_ca.dissolve()

bc_waters = pd.concat([bc_ca, tz_new])
bc_waters = bc_waters.dissolve()
bc_waters = bc_waters[["geometry"]]

bc_waters["NAME"] = "CANADA"
bc_waters["AREA"] = "BRITISH_COLUMBIA"
bc_waters["TYPE"] = "TERRITORIAL"

In [None]:
us_waters_alaska = gpd.overlay(us_waters_alaska, bc_waters, how="difference")
bc_waters = gpd.overlay(bc_waters, us_waters_continguous, how="difference")

In [None]:
m = us_waters_continguous.explore(color="#20ABAD", tiles="CartoDB positron")
bc_waters.explore(m=m, color="#8367C7")
us_waters_alaska.explore(m=m, color="#DB5461")

In [None]:
us_waters_continguous = us_waters_continguous[["NAME", "AREA", "TYPE", "geometry"]]
us_waters_alaska = us_waters_alaska[["NAME", "AREA", "TYPE", "geometry"]]
us_waters_alaska = us_waters_alaska.dissolve()
us_waters_alaska["AREA"] = "ALASKA"
bc_waters = bc_waters[["NAME", "AREA", "TYPE", "geometry"]]

In [None]:
all_waters = pd.concat([bc_waters, us_waters_alaska, us_waters_continguous])

In [None]:
# Save them out
all_waters.to_parquet(
    "/Users/tylerstevenson/Documents/CODE/FindMyWhale/data/processed/GIS/POLYGONS/TERRITORIAL_WATERS.parquet"
)

In [None]:
"#C2F8CB"