<a href="https://colab.research.google.com/github/maria-palacios-ricaldi/MiBarrio-Web-App/blob/main/OSMv3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install requests folium overpy geopandas pandas


Collecting overpy
  Downloading overpy-0.6.tar.gz (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.9/47.9 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: overpy
  Building wheel for overpy (setup.py) ... [?25l[?25hdone
  Created wheel for overpy: filename=overpy-0.6-py3-none-any.whl size=14091 sha256=a5485c0fdf26a70fbf4d769191d17230979ab936c7b102a17946b81992dc2cca
  Stored in directory: /root/.cache/pip/wheels/6f/05/ef/529f51b5983091d5b53077e4f2f81d052a939573c5dd07acfc
Successfully built overpy
Installing collected packages: overpy
Successfully installed overpy-0.6


In [2]:
# Import necessary libraries
from google.colab import drive
import json
import geopandas as gpd

# Mount Google Drive
drive.mount('/content/drive', force_remount= True)

# Load Google Cloud service account credentials
geojson_file_path = '/content/drive/MyDrive/Colab Notebooks/Official_Planning_Suburbs.geojson'
capetown_suburbs = gpd.read_file(geojson_file_path)

Mounted at /content/drive


In [16]:
import overpy
import geopandas as gpd
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

# Density mapping
density_mapping = {
    1: (0, 0),
    2: (1, 1),
    3: (2, 3),
    4: (4, 5),
    5: (6, float('inf'))
}

# Amenities categorisation
amenities_mapping = {
    'Eating': {'amenity': ['cafe', 'restaurant', 'food_court']},
    'Nightlife': {'amenity': ['bar', 'pub', 'nightclub', 'casino']},
    'Culture': {'amenity': ['social_centre', 'theatre', 'fountain', 'events_venue',
                            'community_centre', 'cinema', 'arts_centre', 'conference_centre']},
    'Community': {'office': ['association', 'charity', 'coworking', 'educational_institution',
                             'employment_agency', 'foundation', 'ngo', 'political_party']},
    'Retail Shopping Centres': {'building': ['retail'], 'landuse': ['commercial'], 'shop': ['supermarket']},
    'Clinic': {'amenity': ['clinic']},
    'Dentist': {'amenity': ['dentist']},
    'Doctors': {'amenity': ['doctors']},
    'Hospital': {'amenity': ['hospital']},
    'Pharmacy': {'amenity': ['pharmacy']},
    'Veterinary': {'amenity': ['veterinary']},
    'Leisure spots': {'leisure': ['bird_hide', 'dog_park', 'firepit', 'swimming_pool',
                                  'stadium', 'sports_centre', 'picnic_table', 'fitness_centre']},
    'Sports Centres': {'leisure': ['stadium', 'sports_centre', 'pitch']},
    'Leisure Areas': {'leisure': ['park', 'garden', 'swimming_area', 'playground', 'nature_reserve', 'marina'],
                      'landuse': ['forest']},
    'Economic': {'amenity': ['atm', 'bank', 'bureau_de_change']},
    'Education': {'amenity': ['kindergarten', 'language_school',
                              'library', 'music_school', 'school']},
    'Public Transportation': {'amenity': ['bus_station'],
                              'bus_bay': '',
                              'highway': ['bus_stop', 'platform']},
    'Active Transportation': {'amenity': ['bicycle_parking', 'cycleway', 'footway', 'bicycle_rental']},
}


#Instead of just using the bounding box, you can use the actual coordinates
#of the polygon to refine the search. For MultiPolygon geometries,
#I'll use the coordinates from the largest polygon (by area) which is typically the main polygon.
def geometry_to_overpass_coords(geom):
    if geom.geom_type == "Polygon":
        coords = geom.exterior.coords
    elif geom.geom_type == "MultiPolygon":
    # Use the largest polygon by area from the MultiPolygon
      largest_polygon = max(geom.geoms, key=lambda x: x.area)
      coords = largest_polygon.exterior.coords
    else:
        raise ValueError(f"Unsupported geometry type: {geom.geom_type}")

    return ",".join([f"({y},{x})" for x, y in coords])



def process_suburb(suburb):
    suburb_name = suburb['OFC_SBRB_NAME']
    suburb_boundary = suburb['geometry']
    suburb_coords = geometry_to_overpass_coords(suburb_boundary)

    scores = {
        "Suburb_name": suburb_name,
        "Coordinates": suburb_coords
    }

    # Extract the bounding box for the suburb
    minx, miny, maxx, maxy = suburb_boundary.bounds

    # Process each amenity category
    for category, queries in amenities_mapping.items():
        total_count = 0
        for key, values in queries.items():
            if values:
                query_part = f'{key}~"{ "|".join(values) }"'
            else:
                query_part = key

            amenities_query = f"""node[{query_part}]({miny},{minx},{maxy},{maxx});"""
            query = f"""
            [out:json];
            (
                {amenities_query}
            );
            out body;
            """
            response = api.query(query)
            total_count += len(response.nodes)

        if category in ['Public Transportation', 'Private Transportation', 'Active Transportation']:
            scores[category] = 1 if total_count > 0 else 0
        else:
            for density, (lower, upper) in density_mapping.items():
                if lower <= total_count <= upper:
                    scores[category] = density
                    break

    return scores


# Overpass API initialization
api = overpy.Overpass()

results = []

# Using multithreading to process the suburbs in batches
with ThreadPoolExecutor() as executor:
    suburbs_as_dicts = capetown_suburbs.to_dict('records')
    results = list(executor.map(process_suburb, suburbs_as_dicts))


# Convert to DataFrame and save as CSV
df = pd.DataFrame(results)
df.to_csv("suburbs_amenities_scores.csv", index=False)


In [17]:
df.head(25)

Unnamed: 0,Suburb_name,Coordinates,Eating,Nightlife,Culture,Community,Retail Shopping Centres,Clinic,Dentist,Doctors,Hospital,Pharmacy,Veterinary,Leisure spots,Sports Centres,Leisure Areas,Economic,Education,Public Transportation,Active Transportation
0,HYDE PARK,"(-34.030985348999934,18.585262972000066),(-34....",1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,1,0,0
1,SPRINGFIELD,"(-33.83932709399994,18.605365945000074),(-33.8...",3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0
2,NIEUW MAASTRECHT-2,"(-33.85189198799998,18.598448094000048),(-33.8...",1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0
3,CHARLESVILLE,"(-33.96376606499996,18.568782201000033),(-33.9...",1,1,1,1,1,1,1,1,1,1,1,5,1,3,1,1,0,0
4,WILDWOOD,"(-34.03246041099993,18.580968721000033),(-34.0...",1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,0,0
5,MALIBU VILLAGE,"(-34.00755935799998,18.691724392000026),(-34.0...",1,1,1,1,1,1,1,1,1,1,1,5,1,2,1,1,0,0
6,TUSCANY GLEN,"(-34.00212028599998,18.702139862000024),(-34.0...",1,1,1,1,1,1,1,1,1,1,1,5,1,2,1,1,0,0
7,VICTORIA MXENGE,"(-34.02811200299993,18.66356131400005),(-34.02...",1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,1,0,0
8,ELECTRIC CITY,"(-34.014812269999936,18.710470472000054),(-34....",1,1,1,1,1,1,1,1,1,1,1,1,1,4,1,1,1,0
9,KHAYELITSHA,"(-34.06117821699996,18.653402818000075),(-34.0...",1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,0,0


In [18]:
df.to_csv('/content/drive/My Drive/suburbs_amenity_scores_full.csv', index=False)