In [2]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from geopy.distance import geodesic
import requests

In [9]:
# Load the country-capital dataset
# df_capitals = pd.read_csv('country-capital-lat-long-population.csv')
capital_data = pd.read_csv('country-capital-lat-long-population.csv')

In [4]:
url = "https://storage.googleapis.com/open-buildings-data/v3/polygons_s2_level_4_gzip/179_buildings.csv.gz"

In [5]:
# Load building datasets
kenya_buildings = pd.read_csv(url).sample(n=100000, random_state=42)

In [6]:
kenya_buildings.info()

<class 'pandas.core.frame.DataFrame'>
Index: 100000 entries, 4458432 to 421217
Data columns (total 6 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   latitude        100000 non-null  float64
 1   longitude       100000 non-null  float64
 2   area_in_meters  100000 non-null  float64
 3   confidence      100000 non-null  float64
 4   geometry        100000 non-null  object 
 5   full_plus_code  100000 non-null  object 
dtypes: float64(4), object(2)
memory usage: 5.3+ MB


In [7]:
tanzania_buildings = pd.read_csv("https://storage.googleapis.com/open-buildings-data/v3/polygons_s2_level_4_gzip/185_buildings.csv.gz").sample(n=100000, random_state=42)

In [8]:
uganda_buildings = pd.read_csv("https://storage.googleapis.com/open-buildings-data/v3/polygons_s2_level_4_gzip/177_buildings.csv.gz").sample(n=100000, random_state=42)

In [10]:
# Extract capital coordinates
capital_coords = capital_data[capital_data['Country'].isin(['Kenya', 'Tanzania', 'Uganda'])][['Country', 'Capital City', 'Latitude', 'Longitude']]

In [11]:
# Function to calculate distance
def calculate_distance(row, capital_lat, capital_lon):
    return geodesic((row['latitude'], row['longitude']), (capital_lat, capital_lon)).kilometers

In [13]:
# Process each country's buildings
results = []
capital_coords['Country'] = capital_coords['Country'].str.strip().str.title()
for country in ['Kenya', 'Tanzania', 'Uganda']:
    capital = capital_coords[capital_coords['Country'] == country]

    if capital.empty:
        print(f"Capital data for {country} not found!")
        continue  # Skip this iteration if no match found

    capital_lat = capital['Latitude'].values[0]
    capital_lon = capital['Longitude'].values[0]

    if country == 'Kenya':
        buildings = kenya_buildings
    elif country == 'Tanzania':
        buildings = tanzania_buildings
    else:
        buildings = uganda_buildings

    # Calculate distance
    buildings['distance_to_capital'] = buildings.apply(calculate_distance, axis=1, capital_lat=capital_lat, capital_lon=capital_lon)

    # Filter buildings within 100 km
    buildings_within_100km = buildings[buildings['distance_to_capital'] <= 100]

    # Number of buildings
    num_buildings = buildings_within_100km.shape[0]

    # Average area
    avg_area = buildings_within_100km['area_in_meters'].mean()

    results.append({
        'Country': country,
        'Capital City': capital['Capital City'].values[0],
        'Num Buildings within 100km': num_buildings,
        'Avg Area within 100km': avg_area
    })

Capital data for Tanzania not found!


In [14]:
# Convert results to DataFrame
results_df = pd.DataFrame(results)
print(results_df)

  Country Capital City  Num Buildings within 100km  Avg Area within 100km
0   Kenya      Nairobi                           0                    NaN
1  Uganda      Kampala                       21958              64.181515
