In [41]:
# Import necessary libraries
import geopandas as gpd
import pandas as pd
import requests
from io import BytesIO
from shapely.geometry import MultiLineString, LineString

In [42]:
# URL of the GeoJSON file
url = 'https://github.com/newzealandpaul/Shipping-Lanes/blob/main/data/Shipping_Lanes_v1.geojson?raw=true'

# Fetch the GeoJSON file
response = requests.get(url)
response.raise_for_status()  # Ensure the request was successful

# Read the GeoJSON data into a GeoDataFrame
geojson_data = BytesIO(response.content)
gdf = gpd.read_file(geojson_data)

# Display the first few rows of the GeoDataFrame
gdf.head()

Unnamed: 0,FID,OBJECTID,Type,geometry
0,0,1,Major,"MULTILINESTRING ((-34.8576 -8.13738, -34.67437..."
1,1,2,Middle,"MULTILINESTRING ((-52.14663 47.21993, -52.0640..."
2,2,3,Minor,"MULTILINESTRING ((100.43797 13.33198, 100.4257..."


In [43]:
# Explode the MultiLineString geometries into separate LineString geometries
gdf_exploded = gdf.explode(index_parts=False)
# Convert the exploded geometries to LineString
gdf_exploded['geometry'] = gdf_exploded['geometry'].apply(
    lambda geom: LineString(geom) if isinstance(geom, MultiLineString) else geom
)
# Display the first few rows of the exploded GeoDataFrame
gdf_exploded.head()

Unnamed: 0,FID,OBJECTID,Type,geometry
0,0,1,Major,"LINESTRING (-34.8576 -8.13738, -34.67437 -8.07..."
0,0,1,Major,"LINESTRING (-165.50085 53.93727, -165.06269 53..."
0,0,1,Major,"LINESTRING (-166.54962 54.00381, -166.74346 54..."
0,0,1,Major,"LINESTRING (179.99999 53.13424, 179.9854 53.13..."
0,0,1,Major,"LINESTRING (174.90066 52.38317, 174.57294 52.3..."


In [19]:
# Transform gdf so that there is a single row for each linestring in the coordinates array
# Create a new DataFrame by iterating over the rows and exploding the coordinates
exploded_data = []

for idx, row in gdf.iterrows():
    for line in row['coordinates']:
        exploded_data.append({
            'FID': row['FID'],
            'OBJECTID': row['OBJECTID'],
            'Type': row['Type'],
            'geometry': LineString(line) if isinstance(line, list) else None
        })

# Create a new GeoDataFrame from the exploded data
gdf_exploded = gpd.GeoDataFrame(exploded_data, geometry='geometry', crs=gdf.crs)

# Drop rows where geometry is None
gdf_exploded.dropna(subset=['geometry'], inplace=True)

# Display the first few rows of the exploded GeoDataFrame
gdf_exploded

Unnamed: 0,FID,OBJECTID,Type,geometry
0,0,1,Major,"LINESTRING (-34.8576 -8.13738, -34.67437 -8.07..."
1,0,1,Major,"LINESTRING (-165.50085 53.93727, -165.06269 53..."
2,0,1,Major,"LINESTRING (-166.54962 54.00381, -166.74346 54..."
3,0,1,Major,"LINESTRING (179.99999 53.13424, 179.9854 53.13..."
4,0,1,Major,"LINESTRING (174.90066 52.38317, 174.57294 52.3..."
...,...,...,...,...
234,2,3,Minor,"LINESTRING (-34.40892 -4.66871, -34.75814 -4.2..."
235,2,3,Minor,"LINESTRING (-34.40892 -4.66871, -34.74195 -4.1..."
236,2,3,Minor,"LINESTRING (138.05422 9.42472, 137.99273 9.324..."
237,2,3,Minor,"LINESTRING (-157.88571 21.23252, -157.94719 21..."


In [44]:
# Extract the coordinates from the exploded GeoDataFrame
gdf_exploded['coordinates'] = gdf_exploded['geometry'].apply(lambda geom: list(geom.coords))
# Display the first few rows of the updated exploded GeoDataFrame

# Ensure coordinates are shown to the 16th decimal place
gdf_exploded['coordinates'] = gdf_exploded['coordinates'].apply(lambda coords: [[round(coord, 16) for coord in pair] for pair in coords])

gdf_exploded.head()

Unnamed: 0,FID,OBJECTID,Type,geometry,coordinates
0,0,1,Major,"LINESTRING (-34.8576 -8.13738, -34.67437 -8.07...","[[-34.85759679699993, -8.13737959499997], [-34..."
0,0,1,Major,"LINESTRING (-165.50085 53.93727, -165.06269 53...","[[-165.50085262999997, 53.93727424300005], [-1..."
0,0,1,Major,"LINESTRING (-166.54962 54.00381, -166.74346 54...","[[-166.54962390799997, 54.003807401000074], [-..."
0,0,1,Major,"LINESTRING (179.99999 53.13424, 179.9854 53.13...","[[179.99998854000012, 53.134237075000044], [17..."
0,0,1,Major,"LINESTRING (174.90066 52.38317, 174.57294 52.3...","[[174.90065502200002, 52.38316970000005], [174..."


In [None]:
from math import radians, cos, sin, asin, sqrt, atan2, degrees
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees), returns kilometers.
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371  # Radius of earth in kilometers. Use 3956 for miles.
    return c * r


def process_coordinates(row, row_index):
    """Process coordinates to create a dictionary with FID, OBJECTID, Type, and coordinates."""
    df = pd.DataFrame(columns=['source', 'target', 'weight', 'lane-type', 'lane-idx'])

    for i, coord in enumerate(row['coordinates'][:-1]):
        # Print the current coordinate
        print(f"Processing coordinate {i}: {coord}")

        # Compare with the next coordinate if not the last one
        if i < len(row['coordinates']) - 1:
            next_coord = row['coordinates'][i + 1]
            print(f"\tComparing with next coordinate: {next_coord}")
    
        # Calculate the distance bewteen coord and next_coord using the haversine formula
        dist = haversine(coord[0], coord[1], next_coord[0], next_coord[1]) if i < len(row['coordinates']) - 1 else None
        print(f"\tDistance to next coordinate: {dist} km")


        # Append a new row to the DataFrame
        df = pd.concat([
            df,
            pd.DataFrame({
                'source': [coord],
                'target': [next_coord],
                'weight': [dist],
                'lane-type': [row['Type']],
                'lane-idx': int(row_index),
            })
        ], ignore_index=True)

    return df

# Apply this function to all rows
processed_rows = []
for index, row in gdf_exploded.iterrows():
    processed_row = process_coordinates(row, index)
    processed_rows.append(processed_row)
# Concatenate all processed rows into a single DataFrame
final_df = pd.concat(processed_rows, ignore_index=True)

# Display the first few rows of the final DataFrame
final_df.head()
# Save the final DataFrame to a CSV file
final_df.to_csv('shipping_lanes.csv', index=False)


Processing coordinate 0: [-34.85759679699993, -8.13737959499997]
	Comparing with next coordinate: [-34.67436962399995, -8.070254808999948]
	Distance to next coordinate: 21.507177441064414 km
Processing coordinate 1: [-34.67436962399995, -8.070254808999948]
	Comparing with next coordinate: [-34.674368303999984, -8.070249185999955]
	Distance to next coordinate: 0.0006419154028373217 km
Processing coordinate 2: [-34.674368303999984, -8.070249185999955]
	Comparing with next coordinate: [-25.630149081999946, 16.957174956000074]
	Distance to next coordinate: 2955.2686346210367 km
Processing coordinate 3: [-25.630149081999946, 16.957174956000074]
	Comparing with next coordinate: [-25.483361960999957, 17.286788839000053]
	Distance to next coordinate: 39.832647359458875 km
Processing coordinate 4: [-25.483361960999957, 17.286788839000053]
	Comparing with next coordinate: [-25.337770409999962, 17.616515286000038]
	Distance to next coordinate: 39.78384456392597 km
Processing coordinate 5: [-25.33

  df = pd.concat([


Unnamed: 0,source,target,weight,lane-type,lane-idx
0,"[-34.85759679699993, -8.13737959499997]","[-34.67436962399995, -8.070254808999948]",21.507177,Major,0
1,"[-34.67436962399995, -8.070254808999948]","[-34.674368303999984, -8.070249185999955]",0.000642,Major,0
2,"[-34.674368303999984, -8.070249185999955]","[-25.630149081999946, 16.957174956000074]",2955.268635,Major,0
3,"[-25.630149081999946, 16.957174956000074]","[-25.483361960999957, 17.286788839000053]",39.832647,Major,0
4,"[-25.483361960999957, 17.286788839000053]","[-25.337770409999962, 17.616515286000038]",39.783845,Major,0
...,...,...,...,...,...
145,"[12.53692574300004, 54.98677156600007]","[15.651368257000058, 55.37689178900007]",202.417806,Major,0
146,"[15.651368257000058, 55.37689178900007]","[18.79559796700005, 56.75540469200007]",248.127452,Major,0
147,"[18.79559796700005, 56.75540469200007]","[21.55857336400004, 59.10152640700005]",307.624485,Major,0
148,"[21.55857336400004, 59.10152640700005]","[22.729170431000057, 59.594176221000055]",86.048137,Major,0
