In [1]:
import pandas as pd
from shapely.geometry import Polygon
import shapely.wkt

In [2]:
def coordinates_to_wkt(row):
    """
    Convert coordinates in the format (-15.824...; 35.002...) to a WKT polygon string,
    swapping the order to (35.002..., -15.824...)
    
    Args:
        row: A dictionary-like object (e.g., pandas Series) containing pre_top_left, 
             pre_top_right, pre_bottom_right, and pre_bottom_left coordinates
    
    Returns:
        str: A WKT polygon string
    """
    def process_coord_pair(coord_str):
        # Remove parentheses and split by semicolon
        lat, lon = coord_str.strip('()').split('; ')
        # Return longitude first, then latitude
        return f"{lon} {lat}"
    
    # Get all coordinates in correct order
    coords = [
        process_coord_pair(row['pre_top_left']),
        process_coord_pair(row['pre_top_right']),
        process_coord_pair(row['pre_bottom_right']),
        process_coord_pair(row['pre_bottom_left']),
        process_coord_pair(row['pre_top_left'])  # Close the polygon by repeating first point
    ]
    
    # Format as WKT polygon string
    coords_str = ', '.join(coords)
    wkt = f"POLYGON(({coords_str}))"
    
    return wkt

In [3]:
test_coords = pd.read_csv('test_image_coords.csv')

In [4]:
test_coords.head(2)

Unnamed: 0,id,pre_top_left,pre_top_right,pre_bottom_right,pre_bottom_left,post_top_left,post_top_right,post_bottom_right,post_bottom_left
0,malawi-cyclone_00000000,(-15.824129507838975; 35.002070173540666),(-15.824129507838975; 35.003990425148785),(-15.825037409226914; 35.003990425148785),(-15.825037409226914; 35.002070173540666),(-15.824129507838975; 35.002070173540666),(-15.824129507838975; 35.003990425148785),(-15.825037409226914; 35.003990425148785),(-15.825037409226914; 35.002070173540666)
1,malawi-cyclone_00000001,(-15.82512979754548; 35.00206749389429),(-15.82512979754548; 35.0039867934907),(-15.826038510517881; 35.0039867934907),(-15.826038510517881; 35.00206749389429),(-15.82512979754548; 35.00206749389429),(-15.82512979754548; 35.0039867934907),(-15.826038510517881; 35.0039867934907),(-15.826038510517881; 35.00206749389429)


In [5]:
athena_full = pd.read_csv('athena-data/cafa8613-2759-4788-920c-2528e7ee3bb3.csv')

In [6]:
athena_full.head(3)

Unnamed: 0,geometry_id,class,subclass,name,area,wkt
0,n12065135579@1,settlement,village,Matope 1,,POINT (35.0469238 -15.7761741)
1,n12064985032@1,settlement,village,Chilobwe Naotcha,,POINT (35.0054562 -15.8387263)
2,n12064666378@1,settlement,village,Manja B,,POINT (35.029234 -15.8207067)


In [7]:
athena_full[athena_full['subclass']=='building'].iloc[0]

geometry_id                                         w457315413@1
class                                                    general
subclass                                                building
name                                                         NaN
area                                                       114.5
wkt            POLYGON ((35.0467713 -15.8421189, 35.046745 -1...
Name: 65, dtype: object

In [8]:
wkt_polygon = """
POLYGON((
    35.002070173540666 -15.824129507838975,
    35.003990425148785 -15.824129507838975,
    35.003990425148785 -15.825037409226914,
    35.002070173540666 -15.825037409226914,
    35.002070173540666 -15.824129507838975
))
"""

In [9]:
poly = shapely.wkt.loads(wkt_polygon)
print(poly.is_valid)   # Should be True if it’s a proper polygon
print(poly.bounds)     # Shows (minx, miny, maxx, maxy)

True
(35.002070173540666, -15.825037409226914, 35.003990425148785, -15.824129507838975)


In [10]:
def process_buildings_in_area(df, area_wkt):
    # Convert the area WKT to a shapely polygon
    area_polygon = shapely.wkt.loads(area_wkt)
    
    # Convert building WKT strings to shapely polygons
    def parse_building_polygon(wkt_str):
        try:
            return shapely.wkt.loads(wkt_str)
        except Exception as e:
            print(f"Error parsing WKT: {e}")
            return None
    
    # Create a new column with shapely polygons
    df['geometry'] = df['wkt'].apply(parse_building_polygon)
    
    # Check which buildings intersect with the area
    df['in_area'] = df['geometry'].apply(lambda x: x.intersects(area_polygon) if x is not None else False)
    
    # Filter to only buildings in the area
    buildings_in_area = df[df['in_area']].copy()
    
    # Calculate the intersection area (optional)
    buildings_in_area['intersection_area'] = buildings_in_area['geometry'].apply(
        lambda x: x.intersection(area_polygon).area if x is not None else 0
    )
    
    return buildings_in_area

In [11]:
result = process_buildings_in_area(athena_full, wkt_polygon)

In [12]:
result['subclass'].value_counts()

building       14
residential     3
Name: subclass, dtype: int64

In [13]:
wkt_polygon = coordinates_to_wkt(test_coords[test_coords['id']=="malawi-cyclone_00000165"].iloc[0])

In [14]:
result = process_buildings_in_area(athena_full, wkt_polygon)
result['subclass'].value_counts()

building       19
residential     1
Name: subclass, dtype: int64

In [15]:
sum(result['subclass']=='building')

19

In [16]:
def count_buildings_in_areas(coords_csv, buildings_csv):
    """
    Count buildings in each area of the athena_full dataframe
    
    Returns:
        DataFrame with columns 'id' and 'building_count'
    """
    results = []
    
    for idx, row in coords_csv.iterrows():
        # Convert coordinates to WKT polygon
        area_wkt = coordinates_to_wkt(row)
        
        # Process buildings for this area
        buildings_in_area = process_buildings_in_area(buildings_csv, area_wkt)
        
        # Count buildings
        building_count = sum(buildings_in_area['subclass'] == 'building')
        
        # Store result
        results.append({
            'id': row['id'],
            'building_count': building_count
        })
    
    # Convert to DataFrame
    return pd.DataFrame(results)

In [17]:
ids = ['malawi-cyclone_00000009','malawi-cyclone_00000017','malawi-cyclone_00000024',
      'malawi-cyclone_00000030','malawi-cyclone_00000039','malawi-cyclone_00000046',
      'malawi-cyclone_00000055','malawi-cyclone_00000063','malawi-cyclone_00000072',
      'malawi-cyclone_00000078','malawi-cyclone_00000084','malawi-cyclone_00000090',
      'malawi-cyclone_00000094','malawi-cyclone_00000101','malawi-cyclone_00000108',
      'malawi-cyclone_00000114','malawi-cyclone_00000119','malawi-cyclone_00000124',
      'malawi-cyclone_00000132','malawi-cyclone_00000139','malawi-cyclone_00000145',
      'malawi-cyclone_00000152','malawi-cyclone_00000158','malawi-cyclone_00000165',
      'malawi-cyclone_00000172']
#known_coords = test_coords[test_coords['id'].isin(ids)]
known_coords = test_coords
known_coords.shape

(348, 9)

In [18]:
counts_df = count_buildings_in_areas(known_coords, athena_full)
counts_df

Unnamed: 0,id,building_count
0,malawi-cyclone_00000000,14
1,malawi-cyclone_00000001,22
2,malawi-cyclone_00000002,9
3,malawi-cyclone_00000003,11
4,malawi-cyclone_00000004,15
...,...,...
343,malawi-cyclone_00000343,12
344,malawi-cyclone_00000344,32
345,malawi-cyclone_00000345,33
346,malawi-cyclone_00000346,44


In [None]:
# 009: 28 // b 19, r 4, t 1, c 1
# 017: 31 // b 26, r 3, f 2
# 024: 28 // b 47, r 1, p 1
# 030: 25 // b 20, r 4

In [19]:
counts_df.to_csv('athena_buildings.csv',index=False)