In [None]:
## Code to convert KML file of IOOS glider path data (2009 - 2025) 
## Code to extract glider path data from Rutger's repository and save as shapefile
## For masters project
## Nusrat Noor - nusratjnoor@gmail.com

In [None]:
# Import packages
import arcpy
import geopandas as gpd
import requests
from bs4 import BeautifulSoup
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from datetime import datetime, timedelta
import pandas as pd
from shapely.geometry import LineString

In [None]:
#Allow overwriting and set workspace
arcpy.env.overwriteOutput = True
arcpy.env.workspace = "N:/MP_Noor"

In [None]:
# Bring in kml file
kml_file = ".\Data\Glider\IOOS Gliders Map - Catalog - Catalog.kml"

In [None]:
# Convert to gis layer
arcpy.conversion.KMLToLayer(kml_file, r'.\Data\Glider', r'ioos_glider_paths.lyrx')

In [None]:
# Base URL for the Rutgers Glider THREDDS catalog
catalog_url = "https://tds.marine.rutgers.edu/thredds/catalog/cool/glider/all/catalog.html"
base_data_url = "https://tds.marine.rutgers.edu/thredds/dodsC/cool/glider/all/"

# Scrape the webpage for dataset links
response = requests.get(catalog_url)
soup = BeautifulSoup(response.text, 'html.parser')

# Open a file in write mode
with open("N:/MP_Noor/Data/Glider/glider_links.txt", "w") as f:
    # Traverse links in the HTML and extract dataset paths
    for link in soup.find_all("a", href=True):
        data = link.get('href')
        if "dataset=cool/glider/all/" in data:  # Filter for relevant dataset links
            # Extract the filename from the query string
            file_name = data.split("dataset=cool/glider/all/")[-1]
            # Construct the full URL
            full_url = f"{base_data_url}{file_name}"
            # Write the full URL to the file
            f.write(full_url + "\n")


In [None]:
# Load the links into the glider_links list from the text file
glider_links = []
with open("N:/MP_Noor/Data/Glider/glider_links.txt", "r") as f:
    glider_links = f.readlines()

# Strip newline characters from each link
glider_links = [link.strip() for link in glider_links]

# Initialize an empty list to store LineString geometries and other attributes
geometries = []
attributes = []

# Loop through each glider link
for path in glider_links:
    try:
        # Open the dataset from the THREDDS server
        remote_data = xr.open_dataset(path, decode_times=False)
    
        # Convert to DataFrame
        df = remote_data[['time', 'temperature', 'latitude', 'longitude', 'depth']].to_dataframe()
    
        # Filter by latitude and longitude range (validating coordinates)
        df = df.dropna(subset=['latitude', 'longitude'])  # Remove rows with NaN in lat/long
        df = df[df['latitude'].notna() & df['longitude'].notna()]  # Remove NaNs if any
        df = df[(df['longitude'] != float('inf')) & (df['latitude'] != float('inf'))]  # Remove infinite values
        df = df[(df['longitude'] != -float('inf')) & (df['latitude'] != -float('inf'))]  # Remove negative infinity

        # If the DataFrame is empty after cleaning, skip it
        if df.empty:
            continue

        # Create a LineString from the latitude and longitude
        line_geom = LineString(df[['longitude', 'latitude']].values)
        
        # Append the geometry and relevant attributes (e.g., temperature) to the lists
        geometries.append(line_geom)
        attributes.append({
            'temperature': df['temperature'].mean(),  # Example of an additional attribute: average temperature
            'num_points': len(df)  # Number of points in the line
        })

    except Exception as e:
        print(f"Failed to process {path}: {e}")

# Create a GeoDataFrame from the geometries and attributes
gdf = gpd.GeoDataFrame(attributes, geometry=geometries)

# Set the coordinate reference system to WGS84 (EPSG:4326)
gdf.set_crs('EPSG:4326', inplace=True)

# Save the GeoDataFrame as a shapefile
output_shapefile = "N:/MP_Noor/Data/Glider/Rutgers.shp"
gdf.to_file(output_shapefile)

print(f"Shapefile saved at: {output_shapefile}")