## **Experiments for Polygon Data**

In [2]:
import geopandas as gpd
import math
import numpy as np

# Load the shapefile
shapefile_path = 'C:\\Users\\User\\Downloads\\GTAA_SHAPEFILES\\GTAA_POLYGON.shp'

gdf = gpd.read_file(shapefile_path)

# exploding
gdf = gdf.explode(index_parts=True).reset_index()


# Function to compute aspect ratio using the minimum bounding rectangle
def compute_aspect_ratio(geometry):
    if geometry is None or geometry.is_empty:
        return np.nan

    # Get the bounding box (minimum rotated rectangle)
    min_rect = geometry.minimum_rotated_rectangle

    # Get the rectangle's corner points
    coords = list(min_rect.exterior.coords)

    # Compute distances of rectangle edges
    edge_lengths = [np.linalg.norm(
        np.array(coords[i]) - np.array(coords[i+1])) for i in range(3)]

    # Major & minor axis are the longest & shortest edge
    major_axis = max(edge_lengths)
    minor_axis = min(edge_lengths)

    # Avoid division by zero
    if minor_axis == 0:
        return np.nan

    return major_axis / minor_axis

def compute_num_vertices(geometry):
    if geometry == None:
            return
    
    if geometry.geom_type == 'Polygon':
        return len(geometry.exterior.coords)
    elif geometry.geom_type == 'MultiPolygon':
        return sum(len(part.exterior.coords) for part in geometry.geoms)
    else:
        return None
    
# Calculate convexity
def compute_convexity(geom):
    if geom.geom_type in ['Polygon', 'MultiPolygon']:
        polygon_perimeter = geom.length
        hull_perimeter = geom.convex_hull.length
        return hull_perimeter / polygon_perimeter if polygon_perimeter > 0 else 0
    return None


# Create a spatial index (R-tree index)
sindex = gdf.sindex

# Function to compute neighbors
def compute_neighbors_optimized(gdf):
    neighbors_count = []

    # Loop over each polygon
    for idx, polygon in gdf.iterrows():
        neighbors = 0
        # Use the spatial index to find candidates that might be neighbors
        possible_neighbors = list(
            sindex.intersection(polygon['geometry'].bounds))

        # Loop through possible neighbors based on spatial index results
        for candidate_idx in possible_neighbors:
            candidate = gdf.iloc[candidate_idx]
            try:
            # Check if the polygons actually touch or intersect
                if polygon['geometry'].touches(candidate['geometry']) and polygon['geometry'] != candidate['geometry']:
                    neighbors += 1
            except:
                pass

        neighbors_count.append(neighbors)

    return neighbors_count

def compute_overlapping_shapes_optimized(gdf):
    neighbors_count = []

    # Loop over each polygon
    for idx, polygon in gdf.iterrows():
        neighbors = 0
        # Use the spatial index to find candidates that might be neighbors
        possible_neighbors = list(
            sindex.intersection(polygon['geometry'].bounds))

        # Loop through possible neighbors based on spatial index results
        for candidate_idx in possible_neighbors:
            candidate = gdf.iloc[candidate_idx]
            try:
                # Check if the polygons actually touch or intersect
                if polygon['geometry'].intersects(candidate['geometry']) and polygon['geometry'] != candidate['geometry']:
                    neighbors += 1
            except:
                pass

        neighbors_count.append(neighbors)

    return neighbors_count


def calculate_shared_boundary(poly):
    res = 0
    # Use spatial index to find potential neighbors
    possible_matches_index = list(sindex.intersection(poly.bounds))
    possible_matches = gdf.iloc[possible_matches_index].geometry

    for other in possible_matches:
        if poly == other:
            continue

        try:
            # Only compute intersection if geometries are actually touching
            if poly.touches(other) or poly.overlaps(other):
                res += poly.intersection(other).length
        except:
            pass

    return res

def compute_overlap_area(gdf):
    # Ensure geometries are valid
    gdf['geometry'] = gdf['geometry'].buffer(0)
    
    # Create spatial index for faster lookups
    sindex = gdf.sindex
    
    # Initialize results
    overlap_areas = []

    # Loop over each polygon
    for idx, poly1 in gdf.iterrows():
        # Get candidate polygons using spatial index (bounding box intersection)
        possible_matches_index = list(sindex.intersection(poly1.geometry.bounds))
        possible_matches = gdf.iloc[possible_matches_index]
        
        # Calculate the intersection area with every other polygon
        total_overlap_area = sum(
            poly1.geometry.intersection(poly2.geometry).area
            for i, poly2 in possible_matches.iterrows()
            if idx != i and not poly1.geometry.intersection(poly2.geometry).is_empty
        )
        
        overlap_areas.append(total_overlap_area)

    return overlap_areas
    
# Compute aspect ratio for each polygon
gdf["area"] = gdf.geometry.area
gdf["perimeter"] = gdf.geometry.length
gdf["compactness"] = (4 * np.pi * gdf["area"]) / (gdf["perimeter"] ** 2)
gdf["aspect_ratio"] = gdf.geometry.apply(compute_aspect_ratio)
gdf["num_vertices"] = gdf.geometry.apply(compute_num_vertices)
gdf["vertex_density"] = gdf["num_vertices"] / gdf["perimeter"]
gdf["comvexity"] = gdf.geometry.apply(compute_convexity)
gdf["neighbor_touch_count"] = compute_neighbors_optimized(gdf)
gdf["number_overlapping"] = compute_overlapping_shapes_optimized(gdf)
gdf['shared_boundary'] = gdf.geometry.apply(calculate_shared_boundary)
gdf['overlapping_area_sum'] = compute_overlap_area(gdf)

# Export the GeoDataFrame to CSV
output_path = '.\\GTAA_POLYGON_with_metrics.csv'
gdf.to_csv(output_path, index=False)

gdf.head(20)


Unnamed: 0,level_0,level_1,Entity,Handle,Layer,LyrFrzn,LyrLock,LyrOn,LyrVPFrzn,LyrHandle,...,perimeter,compactness,aspect_ratio,num_vertices,vertex_density,comvexity,neighbor_touch_count,number_overlapping,shared_boundary,overlapping_area_sum
0,0,0,LWPolyline,157E59,SP-E-RDW-PV,0,0,1,0,3FC970,...,3.432929,0.599991,1.25,4,1.165186,1.0,0,1,0.0,0.562684
1,1,0,LWPolyline,157E7F,SP-E-RDW-PV,0,0,1,0,3FC970,...,50.745932,0.02128,20.706421,5,0.09853,0.997524,1,3,0.0,8.721594
2,2,0,LWPolyline,157E82,SP-E-RDW-PV,0,0,1,0,3FC970,...,125.204751,0.009923,12.825395,7,0.055908,0.983607,1,3,0.0,24.758081
3,3,0,LWPolyline,157E8F,SP-E-RDW-PV,0,0,1,0,3FC970,...,198.420365,0.84218,1.894166,17,0.085677,1.0,0,3,0.0,2655.303545
4,4,0,LWPolyline,19C830,SP-E-RDW-PV,0,0,1,0,3FC970,...,14.954825,0.050005,31.142853,4,0.267472,1.0,0,0,0.0,0.0
5,5,0,LWPolyline,19C831,SP-E-RDW-PV,0,0,1,0,3FC970,...,29.422242,0.04983,6.608333,5,0.169939,0.986881,0,0,0.0,0.0
6,6,0,LWPolyline,19C832,SP-E-RDW-PV,0,0,1,0,3FC970,...,39.058476,0.025134,14.470587,5,0.128013,0.996998,0,0,0.0,0.0
7,6,1,LWPolyline,19C832,SP-E-RDW-PV,0,0,1,0,3FC970,...,630.114923,0.001795,169.367669,5,0.007935,0.999746,0,0,0.0,0.0
8,7,0,LWPolyline,19C834,SP-E-RDW-PV,0,0,1,0,3FC970,...,876.365707,0.001315,80.897456,7,0.007988,0.998614,0,1,182.308699,30.530843
9,8,0,LWPolyline,19C835,SP-E-RDW-PV,0,0,1,0,3FC970,...,55.716404,0.018528,4.763809,9,0.161532,0.954857,0,1,0.0,4.576924


## **Experiment for Polyline Data**

Reading Polyline Data

In [4]:
import geopandas as gpd

# Load the shapefile
shapefile_path = 'C:\\Users\\User\\Downloads\\GTAA_SHAPEFILES\\GTAA_POLYLINE.shp'
gdf = gpd.read_file(shapefile_path)

# Extract specific features (geometry, attributes, etc.)
# print("Columns:", gdf.columns)
# print("Geometry:", gdf.geometry.head(1000))
# print(gdf.geometry.head(5)[0])

# print(12)
# gdf["area"] = gdf.geometry.area
# gdf["perimeter"] = gdf.geometry.length
# print(gdf[["area", "perimeter"]])
print(gdf)

# Get unique geometry types
geometry_types = gdf.geometry.geom_type.unique()
print(len(geometry_types))

# Split and save each geometry type separately
for geom_type in geometry_types:
    subset = gdf[gdf.geometry.geom_type == geom_type]
    print(geom_type)
    print(subset)
    print()

         Entity  Handle                    Layer  LyrFrzn  LyrLock  LyrOn  \
0        Insert     3FA                        0        0        0      1   
1        Insert     4D5              SP-E-RNW-PV        0        0      1   
2        Spline  157DC0              SP-E-RDW-PV        0        0      1   
3        Spline  157DC1              SP-E-RDW-PV        0        0      1   
4        Spline  157DC2              SP-E-RDW-PV        0        0      1   
..          ...     ...                      ...      ...      ...    ...   
185      Insert  5B7DC6                        0        0        0      1   
186      Insert  5B8426                        0        0        0      1   
187      Insert  6195F6                        0        0        0      1   
188  LWPolyline  61D5E5                        0        0        0      1   
189  LWPolyline  61D625  GTA-C2-INTERIM TERMINAL        0        0      1   

     LyrVPFrzn LyrHandle  Color  EntColor  ...  ExtZ     DocName  \
0      

In [5]:
import geopandas as gpd
import math
import numpy as np
from shapely import Point

# Load the shapefile
shapefile_path = 'C:\\Users\\User\\Downloads\\GTAA_SHAPEFILES\\GTAA_POLYLINE.shp'
gdf = gpd.read_file(shapefile_path)

# exploding
gdf = gdf.explode(index_parts=True).reset_index()

gdf = gdf.head(1000)

# Create a spatial index (R-tree index)
sindex = gdf.sindex


def compute_line_properties(geometry):
    if geometry is None or geometry.is_empty:
        return np.nan, np.nan

    # Get start and end points
    start_point = Point(geometry.coords[0])
    end_point = Point(geometry.coords[-1])

    # Calculate actual length and straight-line distance
    actual_length = geometry.length
    straight_length = start_point.distance(end_point)

    # Calculate sinuosity (ratio of actual length to straight-line distance)
    sinuosity = actual_length / straight_length if straight_length > 0 else np.nan

    return actual_length, sinuosity


def compute_num_vertices(geometry):
    if geometry is None:
        return None

    if geometry.geom_type == 'LineString':
        return len(geometry.coords)
    elif geometry.geom_type == 'MultiLineString':
        return sum(len(line.coords) for line in geometry.geoms)
    else:
        return None


def compute_vertex_density(geometry):
    if geometry is None or geometry.is_empty:
        return np.nan

    num_vertices = compute_num_vertices(geometry)
    length = geometry.length

    return num_vertices / length if length > 0 else np.nan


def compute_connected_lines(gdf):
    connections_count = []

    # Loop over each line
    for idx, line in gdf.iterrows():
        connections = 0
        # Use spatial index to find potential connections
        possible_neighbors = list(sindex.intersection(line['geometry'].bounds))

        # Get endpoints of current line
        start_point = Point(line['geometry'].coords[0])
        end_point = Point(line['geometry'].coords[-1])

        # Check each potential neighbor
        for neighbor_idx in possible_neighbors:
            if neighbor_idx == idx:
                continue

            neighbor = gdf.iloc[neighbor_idx]
            try:
                # Get endpoints of neighbor line
                neighbor_start = Point(neighbor['geometry'].coords[0])
                neighbor_end = Point(neighbor['geometry'].coords[-1])

                # Check if any endpoints touch
                if (start_point.distance(neighbor_start) < 1e-8 or
                    start_point.distance(neighbor_end) < 1e-8 or
                    end_point.distance(neighbor_start) < 1e-8 or
                        end_point.distance(neighbor_end) < 1e-8):
                    connections += 1
            except:
                pass

        connections_count.append(connections)

    return connections_count


def compute_intersecting_lines(gdf):
    intersections_count = []

    # Loop over each line
    for idx, line in gdf.iterrows():
        intersections = 0
        # Use spatial index to find potential intersections
        possible_neighbors = list(sindex.intersection(line['geometry'].bounds))

        # Check each potential neighbor
        for neighbor_idx in possible_neighbors:
            if neighbor_idx == idx:
                continue

            neighbor = gdf.iloc[neighbor_idx]
            try:
                if line['geometry'].crosses(neighbor['geometry']):
                    intersections += 1
            except:
                pass

        intersections_count.append(intersections)

    return intersections_count


def compute_angle_changes(geometry):
    if geometry is None or geometry.is_empty:
        return np.nan

    coords = list(geometry.coords)
    if len(coords) < 3:
        return 0

    total_angle_change = 0
    for i in range(len(coords) - 2):
        # Get three consecutive points
        p1 = np.array(coords[i])
        p2 = np.array(coords[i + 1])
        p3 = np.array(coords[i + 2])

        # Calculate vectors
        v1 = p2 - p1
        v2 = p3 - p2

        # Calculate angle between vectors
        dot_product = np.dot(v1, v2)
        norms = np.linalg.norm(v1) * np.linalg.norm(v2)

        if norms > 0:
            cos_angle = dot_product / norms
            # Ensure the value is within [-1, 1] to avoid numerical errors
            cos_angle = np.clip(cos_angle, -1.0, 1.0)
            angle = np.arccos(cos_angle)
            total_angle_change += np.degrees(angle)

    return total_angle_change


def is_open_polyline(line):
    return not line.is_ring


# Compute metrics for each line
gdf['length'], gdf['sinuosity'] = zip(
    *gdf.geometry.apply(compute_line_properties))
gdf['num_vertices'] = gdf.geometry.apply(compute_num_vertices)
gdf['vertex_density'] = gdf.geometry.apply(compute_vertex_density)
gdf['connected_lines'] = compute_connected_lines(gdf)
gdf['intersecting_lines'] = compute_intersecting_lines(gdf)
gdf['total_angle_change'] = gdf.geometry.apply(compute_angle_changes)

# Compute bounding box of the Polyline
gdf['bounding_box'] = gdf.geometry.apply(lambda x: x.bounds)

# Compute centroid of the Polyline
gdf['centroid'] = gdf.geometry.centroid

# Compute curvature of the Polyline (Angle Change / Path Length)
gdf['curvature'] = gdf.apply(lambda row: row['total_angle_change'] /
                             row['length'] if row['length'] != 0 else np.nan, axis=1)

# Boolean value for whether the endpoints form a loop or not
gdf['is_open'] = gdf['geometry'].apply(is_open_polyline)


# Print summary statistics
print("\nSummary Statistics:")
# print(gdf[['length', 'sinuosity', 'num_vertices', 'vertex_density',
#            'connected_lines', 'intersecting_lines', 'total_angle_change']].describe())

output_path = '.\\GTAA_POLYLINE_with_metrics.csv'
gdf.to_csv(output_path)

gdf.head(20)


Summary Statistics:


Unnamed: 0,level_0,level_1,Entity,Handle,Layer,LyrFrzn,LyrLock,LyrOn,LyrVPFrzn,LyrHandle,...,sinuosity,num_vertices,vertex_density,connected_lines,intersecting_lines,total_angle_change,bounding_box,centroid,curvature,is_open
0,0,0,Insert,3FA,0,0,0,1,0,10,...,1.0,2,1414.213331,0,0,0.0,"(611729.0617915001, 4836687.383470699, 611729....",POINT (611729.062 4836687.384),0.0,True
1,0,1,Insert,3FA,0,0,0,1,0,10,...,1.0,2,1414.214648,0,0,0.0,"(611693.6627914999, 4836651.984470701, 611693....",POINT (611693.663 4836651.985),0.0,True
2,0,2,Insert,3FA,0,0,0,1,0,10,...,1.0,2,1414.214648,0,0,0.0,"(612035.2599999998, 4837079.517000001, 612035....",POINT (612035.26 4837079.518),0.0,True
3,0,3,Insert,3FA,0,0,0,1,0,10,...,1.0,2,1414.213331,0,0,0.0,"(612070.659, 4837044.118000001, 612070.6600000...",POINT (612070.66 4837044.119),0.0,True
4,0,4,Insert,3FA,0,0,0,1,0,10,...,1.0,2,1414.213331,0,0,0.0,"(612407.1812533, 4837445.75, 612407.1822533002...",POINT (612407.182 4837445.75),0.0,True
5,0,5,Insert,3FA,0,0,0,1,0,10,...,1.0,2,1414.213331,0,0,0.0,"(612442.5802533003, 4837410.351, 612442.581253...",POINT (612442.581 4837410.352),0.0,True
6,0,6,Insert,3FA,0,0,0,1,0,10,...,1.0,2,1414.248458,0,0,0.0,"(608814.8200187003, 4836396.531212, 608814.820...",POINT (608814.82 4836396.532),0.0,True
7,0,7,Insert,3FA,0,0,0,1,0,10,...,1.0,2,1414.203475,0,0,0.0,"(608837.2083120001, 4836441.307798799, 608837....",POINT (608837.209 4836441.308),0.0,True
8,1,0,Insert,4D5,SP-E-RNW-PV,0,0,1,0,3FC96E,...,,5,0.140849,0,0,323.318881,"(613097.0119252997, 4837002.2117483, 613098.11...",POINT (613097.509 4837010.966),9.107839,False
9,1,1,Insert,4D5,SP-E-RNW-PV,0,0,1,0,3FC96E,...,,7,0.054708,0,0,450.775968,"(613107.2579252999, 4836997.286748299, 613139....",POINT (613115.935 4837005.927),3.52298,False
