# US Coastline Shapefile Visualization

This notebook demonstrates how to load, explore, and visualize the US coastline shapefile data using various geospatial libraries.

## Overview
- Load and examine the US coastline shapefile
- Display basic information about the dataset
- Create static and interactive visualizations
- Explore different mapping techniques

In [3]:
%pip install geopandas matplotlib folium

/Users/liang.cheng/Documents/advana/databricks-geosptial-notebooks/.venv/bin/python: No module named pip
Note: you may need to restart the kernel to use updated packages.


In [4]:
# Import necessary libraries
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import folium
from folium import plugins
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Set up plotting parameters
plt.rcParams['figure.figsize'] = (12, 8)
plt.style.use('default')

print("Libraries imported successfully!")

Libraries imported successfully!




In [5]:
## 1. Load and Examine the Shapefile

# Load the US coastline shapefile
shapefile_path = "data/tl_2023_us_coastline/tl_2023_us_coastline.shp"

try:
    # Load the shapefile
    gdf = gpd.read_file(shapefile_path)
    print("✅ Shapefile loaded successfully!")
    print(f"Shape: {gdf.shape}")
    print(f"CRS: {gdf.crs}")
    print(f"Columns: {list(gdf.columns)}")

    # Display basic information
    print("\n📊 Dataset Information:")
    print(f"Number of features: {len(gdf)}")
    print(f"Geometry type: {gdf.geometry.geom_type.unique()}")
    print(f"Bounds: {gdf.total_bounds}")

except Exception as e:
    print(f"❌ Error loading shapefile: {e}")
    print("Make sure the shapefile exists and all required files (.shp, .shx, .dbf, .prj) are present.")

✅ Shapefile loaded successfully!
Shape: (4236, 3)
CRS: EPSG:4269
Columns: ['NAME', 'MTFCC', 'geometry']

📊 Dataset Information:
Number of features: 4236
Geometry type: ['LineString']
Bounds: [-179.147236  -14.548699  179.77847    71.39038 ]


## Create a simplifed version of the coastline for faster plotting



In [9]:
# create gdf_simple by simplifying the geometries in gdf
gdf_simple = gdf.copy()
gdf_simple['geometry'] = gdf_simple['geometry'].simplify(tolerance=1000, preserve_topology=True)
# get the pionts of the geometries in gdf_simple
gdf_simple['points'] = gdf_simple['geometry'].apply(lambda x: np.array(x.exterior.coords) if x.geom_type == 'Polygon' else np.array(x.coords))
gdf_simple.head()

Unnamed: 0,NAME,MTFCC,geometry,points
0,Atlántico,L4150,"LINESTRING (-66.01689 18.44877, -66.01667 18.4...","[[-66.016887, 18.448772], [-66.016672, 18.4480..."
1,Atlántico,L4150,"LINESTRING (-65.56333 18.37807, -65.56438 18.3...","[[-65.563334, 18.378069], [-65.564376, 18.3779..."
2,Atlántico,L4150,"LINESTRING (-65.59793 18.39765, -65.5982 18.39...","[[-65.597932, 18.397647], [-65.598201, 18.39823]]"
3,Atlántico,L4150,"LINESTRING (-65.55133 18.37372, -65.55158 18.3...","[[-65.551327, 18.373715], [-65.551576, 18.37383]]"
4,Atlántico,L4150,"LINESTRING (-65.56089 18.3754, -65.56096 18.37...","[[-65.560892, 18.375399], [-65.560956, 18.3754..."


In [10]:
# get me all the distaint "NAME" values in gdf_simple
gdf_simple['NAME'].unique()

array(['Atlántico', 'Caribe', 'Atlantic', 'Arctic', 'Caribbean',
       'Great Lakes', 'Pacific', 'Gulf'], dtype=object)