# **Southeast Asia GeoGuessr Project**
## GeoJSON to .csv File Converter

This notebook contains the pipeline code that converts .json files obtained from Overpass Turbo into .csv files.

For each country, we will have these characteristics:

- **Philippines:** Beaches, Cities/towns, Countryside (farmland), Mountains, Forest/jungle, Roads (urban and rural)
- **Thailand:** Beaches, temples, cities/towns, mountains, jungle/forest, farmlands (countryside), marketplaces
- **Malaysia:** Beaches, jungles, cities/town, countryside (farmland), mosques, mountains
- **Indonesia:** Beaches, volcanoes/mountains, cities/towns, jungles, countryside, temples




In [None]:
import geopandas as gpd
import pandas as pd
import requests
import time

In [None]:
# Your API key here
API_KEY = "API_key"  # API key removed for privacy purposes

In [None]:
# Define Street View availability checker -- because we need to make sure the coordinates listed are actually on the system
def has_street_view(lat, lon, api_key):
    url = f"https://maps.googleapis.com/maps/api/streetview/metadata?location={lat},{lon}&key={api_key}"
    try:
        response = requests.get(url)
        data = response.json()
        return data.get("status") == "OK"
    except Exception as e:
        print(f"Error checking ({lat}, {lon}): {e}")
        return False

## **Philippines**

**Beaches**

In [None]:
# Query code to find beaches in the Philippines - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
area["name"="Philippines"]->.searchArea;
(
  node["natural"="beach"](area.searchArea);
  way["natural"="beach"](area.searchArea);
  relation["natural"="beach"](area.searchArea);
);
out center;

In [None]:
# Load GeoJSON file
gdf_beach_ph = gpd.read_file("beach_ph.geojson")

# Extract lat/lon from geometry
gdf_beach_ph["lat"] = gdf_beach_ph.geometry.y
gdf_beach_ph["lon"] = gdf_beach_ph.geometry.x

# Add scene type and country
gdf_beach_ph["scene_type"] = "beach"
gdf_beach_ph["country"] = "Philippines"

# Select the relevant columns
df_beach_ph = gdf_beach_ph[["country", "scene_type", "lat", "lon"]]

# Save full unsampled CSV
df_beach_ph.to_csv("beach_ph_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_beach_ph)} rows")

✅ Saved full coordinate list: 3574 rows


In [None]:
# Check each point for Street View availability
filtered_rows = []
print("🔍 Checking for Street View imagery...")

for idx, row in df_beach_ph.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:  # Stop once we have 150 samples
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Create filtered DataFrame and save
df_beach_ph_available = pd.DataFrame(filtered_rows)
df_beach_ph_available.to_csv("beach_ph_with_streetview.csv", index=False)
print(f"Filtered: {len(df_beach_ph_available)} coordinates with Street View")

# Since you already stopped at 150, this just saves your filtered sample:
df_beach_ph_sampled = df_beach_ph_available.copy()
df_beach_ph_sampled.to_csv("beach_ph_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_beach_ph_sampled)} rows to beach_ph_final_sample.csv")

🔍 Checking for Street View imagery...
✅ Filtered: 529 coordinates with Street View
📦 Final sample saved: 150 rows to beach_ph_final_sample.csv


**Cities**

In [None]:
# Query code to find cities and towns in the Philippines - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
area["name"="Philippines"]->.searchArea;
(
  node["place"="city"](area.searchArea);
  node["place"="town"](area.searchArea);
);
out;

In [None]:
gdf_city_town_ph = gpd.read_file("city_town_ph.geojson")

# Extract lat/lon from geometry
gdf_city_town_ph["lat"] = gdf_city_town_ph.geometry.y
gdf_city_town_ph["lon"] = gdf_city_town_ph.geometry.x

# Add scene type and country
gdf_city_town_ph["scene_type"] = "city_town"
gdf_city_town_ph["country"] = "Philippines"

# Select the relevant columns
df_city_town_ph = gdf_city_town_ph[["country", "scene_type", "lat", "lon"]]

# Save full unsampled CSV
df_city_town_ph.to_csv("city_town_ph_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_city_town_ph)} rows")

✅ Saved full coordinate list: 1651 rows


In [None]:
filtered_rows = []
print("🔍 Checking for Street View imagery...")

for idx, row in df_city_town_ph.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:  # Stop once we have 150 samples
            print("🎯 Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Create filtered DataFrame and save
df_city_town_ph_available = pd.DataFrame(filtered_rows)
df_city_town_ph_available.to_csv("city_town_ph_with_streetview.csv", index=False)
print(f"Filtered: {len(df_city_town_ph_available)} coordinates with Street View")

# Since you already stopped at 150, this just saves your filtered sample:
df_city_town_ph_sampled = df_city_town_ph_available.copy()
df_city_town_ph_sampled.to_csv("city_town_ph_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_city_town_ph_sampled)} rows to city_town_ph_final_sample.csv")

🔍 Checking for Street View imagery...
🎯 Reached 150 available samples, stopping early.
✅ Filtered: 150 coordinates with Street View
📦 Final sample saved: 150 rows to city_town_ph_final_sample.csv


**Countryside/Farmland**

In [None]:
# Query code to find countryside and farmland in the Philippines - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
area["name"="Philippines"]->.searchArea;
(
  node["landuse"="farmland"](area.searchArea);
  way["landuse"="farmland"](area.searchArea);
  relation["landuse"="farmland"](area.searchArea);
);
out center;

In [None]:
gdf_countryside_ph = gpd.read_file("countryside_ph.geojson")

# Reproject to projected CRS for accurate centroid calculation
gdf_countryside_ph_proj = gdf_countryside_ph.to_crs(epsg=32651)

# Calculate centroids in projected CRS
gdf_countryside_ph_proj['centroid'] = gdf_countryside_ph_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_countryside_ph_centroids = gdf_countryside_ph_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_countryside_ph["lat"] = gdf_countryside_ph_centroids.geometry.y
gdf_countryside_ph["lon"] = gdf_countryside_ph_centroids.geometry.x

# Add scene type and country
gdf_countryside_ph["scene_type"] = "countryside"
gdf_countryside_ph["country"] = "Philippines"

# Select relevant columns
df_countryside_ph = gdf_countryside_ph[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_countryside_ph.to_csv("countryside_ph_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_countryside_ph)} rows")

✅ Saved full coordinate list: 69617 rows


In [None]:
filtered_rows = []
print("🔍 Checking for Street View imagery...")

for idx, row in df_countryside_ph.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("🎯 Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_countryside_ph_available = pd.DataFrame(filtered_rows)
df_countryside_ph_available.to_csv("countryside_ph_with_streetview.csv", index=False)
print(f"Filtered: {len(df_countryside_ph_available)} coordinates with Street View")

df_countryside_ph_sampled = df_countryside_ph_available.copy()
df_countryside_ph_sampled.to_csv("countryside_ph_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_countryside_ph_sampled)} rows to countryside_ph_final_sample.csv")

🔍 Checking for Street View imagery...
🎯 Reached 150 available samples, stopping early.
✅ Filtered: 150 coordinates with Street View
📦 Final sample saved: 150 rows to countryside_ph_final_sample.csv


**Mountains**

In [None]:
# Query code to find mountains in the Philippines - copy and paste this into Overpass Turbo to generate .geojson file
[out:json][timeout:25];
area["name"="Philippines"]->.searchArea;
(
  node["natural"="peak"](area.searchArea);
  way["natural"="peak"](area.searchArea);
  relation["natural"="peak"](area.searchArea);
);
out center;

In [None]:
gdf_mountains_ph = gpd.read_file("mountains_ph.geojson")

# Reproject to projected CRS for accurate centroid calculation (e.g., UTM zone 51N)
gdf_mountains_ph_proj = gdf_mountains_ph.to_crs(epsg=32651)

# Calculate centroids in projected CRS
gdf_mountains_ph_proj['centroid'] = gdf_mountains_ph_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_mountains_ph_centroids = gdf_mountains_ph_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_mountains_ph["lat"] = gdf_mountains_ph_centroids.geometry.y
gdf_mountains_ph["lon"] = gdf_mountains_ph_centroids.geometry.x

# Add scene type and country
gdf_mountains_ph["scene_type"] = "mountain"
gdf_mountains_ph["country"] = "Philippines"

# Select relevant columns
df_mountains_ph = gdf_mountains_ph[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_mountains_ph.to_csv("mountains_ph_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_mountains_ph)} rows")

✅ Saved full coordinate list: 13152 rows


In [None]:
filtered_rows = []
print("🔍 Checking for Street View imagery...")

for idx, row in df_mountains_ph.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("🎯 Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_mountains_ph_available = pd.DataFrame(filtered_rows)
df_mountains_ph_available.to_csv("mountains_ph_with_streetview.csv", index=False)
print(f"Filtered: {len(df_mountains_ph_available)} coordinates with Street View")

df_mountains_ph_sampled = df_mountains_ph_available.copy()
df_mountains_ph_sampled.to_csv("mountains_ph_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_mountains_ph_sampled)} rows to mountains_ph_final_sample.csv")

🔍 Checking for Street View imagery...
🎯 Reached 150 available samples, stopping early.
✅ Filtered: 150 coordinates with Street View
📦 Final sample saved: 150 rows to mountains_ph_final_sample.csv


**Forests/Jungles**

In [None]:
# Query code to find forests/jungles in the Philippines - copy and paste this into Overpass Turbo to generate .geojson file
[out:json][timeout:25];
area["name"="Philippines"]->.searchArea;
(
  node["landuse"="forest"](area.searchArea);
  way["landuse"="forest"](area.searchArea);
  relation["landuse"="forest"](area.searchArea);
);
out center;

In [None]:
gdf_forest_ph = gpd.read_file("jungle_forest_ph.geojson")

# Reproject to projected CRS for accurate centroid calculation (e.g., UTM zone 51N)
gdf_forest_ph_proj = gdf_forest_ph.to_crs(epsg=32651)

# Calculate centroids in projected CRS
gdf_forest_ph_proj['centroid'] = gdf_forest_ph_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_forest_ph_centroids = gdf_forest_ph_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_forest_ph["lat"] = gdf_forest_ph_centroids.geometry.y
gdf_forest_ph["lon"] = gdf_forest_ph_centroids.geometry.x

# Add scene type and country
gdf_forest_ph["scene_type"] = "forest"
gdf_forest_ph["country"] = "Philippines"

# Select relevant columns
df_forest_ph = gdf_forest_ph[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_forest_ph.to_csv("forest_ph_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_forest_ph)} rows")

✅ Saved full coordinate list: 3677 rows


In [None]:
filtered_rows = []
print("🔍 Checking for Street View imagery...")

for idx, row in df_forest_ph.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("🎯 Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_forest_ph_available = pd.DataFrame(filtered_rows)
df_forest_ph_available.to_csv("forest_ph_with_streetview.csv", index=False)
print(f"Filtered: {len(df_forest_ph_available)} coordinates with Street View")

df_forest_ph_sampled = df_forest_ph_available.copy()
df_forest_ph_sampled.to_csv("forest_ph_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_forest_ph_sampled)} rows to forest_ph_final_sample.csv")

🔍 Checking for Street View imagery...
🎯 Reached 150 available samples, stopping early.
✅ Filtered: 150 coordinates with Street View
📦 Final sample saved: 150 rows to forest_ph_final_sample.csv


**Major Roads (Urban and Rural)**

In [None]:
# Query code to find major roads in the Philippines - copy and paste this into Overpass Turbo to generate .geojson file
[out:json][timeout:25];
area["name"="Philippines"]->.searchArea;
(
  way["highway"="primary"](area.searchArea);
);
out center;

In [None]:
gdf_roads_ph = gpd.read_file("roads_ph.geojson")

# Reproject to projected CRS for accurate centroid calculation (e.g., UTM zone 51N)
gdf_roads_ph_proj = gdf_roads_ph.to_crs(epsg=32651)

# Calculate centroids in projected CRS
gdf_roads_ph_proj['centroid'] = gdf_roads_ph_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_roads_ph_centroids = gdf_roads_ph_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_roads_ph["lat"] = gdf_roads_ph_centroids.geometry.y
gdf_roads_ph["lon"] = gdf_roads_ph_centroids.geometry.x

# Add scene type and country
gdf_roads_ph["scene_type"] = "road"
gdf_roads_ph["country"] = "Philippines"

# Select relevant columns
df_roads_ph = gdf_roads_ph[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_roads_ph.to_csv("roads_ph_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_roads_ph)} rows")

✅ Saved full coordinate list: 22717 rows


In [None]:
filtered_rows = []
print("🔍 Checking for Street View imagery...")

for idx, row in df_roads_ph.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("🎯 Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_roads_ph_available = pd.DataFrame(filtered_rows)
df_roads_ph_available.to_csv("roads_ph_with_streetview.csv", index=False)
print(f"Filtered: {len(df_roads_ph_available)} coordinates with Street View")

df_roads_ph_sampled = df_roads_ph_available.copy()
df_roads_ph_sampled.to_csv("roads_ph_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_roads_ph_sampled)} rows to roads_ph_final_sample.csv")

🔍 Checking for Street View imagery...
🎯 Reached 150 available samples, stopping early.
✅ Filtered: 150 coordinates with Street View
📦 Final sample saved: 150 rows to roads_ph_final_sample.csv


## **Thailand**

**Note:** Note that this query doesn't work for the english name Thailand and will rather work if ราชอาณาจักรไทย is used instead. So to fix that problem, we use the "geocodeArea:Thailand" instead.

**Beaches**

In [None]:
# Query code to find beaches in Thailand - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Thailand}}->.searchArea;
(
  node["natural"="beach"](area.searchArea);
  way["natural"="beach"](area.searchArea);
  relation["natural"="beach"](area.searchArea);
);
out center;


In [None]:
gdf_beach_th = gpd.read_file("beach_th.geojson")

# Reproject to projected CRS for accurate centroid calculation
gdf_beach_th_proj = gdf_beach_th.to_crs(epsg=32647)  # UTM zone for Thailand

# Calculate centroids in projected CRS
gdf_beach_th_proj['centroid'] = gdf_beach_th_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_beach_th_centroids = gdf_beach_th_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_beach_th["lat"] = gdf_beach_th_centroids.geometry.y
gdf_beach_th["lon"] = gdf_beach_th_centroids.geometry.x

# Add scene type and country
gdf_beach_th["scene_type"] = "beach"
gdf_beach_th["country"] = "Thailand"

# Select relevant columns
df_beach_th = gdf_beach_th[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_beach_th.to_csv("beach_th_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_beach_th)} rows")

Saved full coordinate list: 1245 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_beach_th.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_beach_th_available = pd.DataFrame(filtered_rows)
df_beach_th_available.to_csv("beach_th_with_streetview.csv", index=False)
print(f"Filtered: {len(df_beach_th_available)} coordinates with Street View")

df_beach_th_sampled = df_beach_th_available.copy()
df_beach_th_sampled.to_csv("beach_th_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_beach_th_sampled)} rows to beach_th_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to beach_th_final_sample.csv


**Temples**

In [None]:
# Query code to find temples in Thailand - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Thailand}}->.searchArea;
(
  node["amenity"="place_of_worship"]["religion"="buddhist"](area.searchArea);
  way["amenity"="place_of_worship"]["religion"="buddhist"](area.searchArea);
  relation["amenity"="place_of_worship"]["religion"="buddhist"](area.searchArea);
);
out center;

In [None]:
gdf_temple_th = gpd.read_file("temple_th.geojson")

# Reproject to projected CRS for accurate centroid calculation
gdf_temple_th_proj = gdf_temple_th.to_crs(epsg=32647)  # UTM zone for Thailand

# Calculate centroids in projected CRS
gdf_temple_th_proj['centroid'] = gdf_temple_th_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_temple_th_centroids = gdf_temple_th_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_temple_th["lat"] = gdf_temple_th_centroids.geometry.y
gdf_temple_th["lon"] = gdf_temple_th_centroids.geometry.x

# Add scene type and country
gdf_temple_th["scene_type"] = "temple"
gdf_temple_th["country"] = "Thailand"

# Select relevant columns
df_temple_th = gdf_temple_th[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_temple_th.to_csv("temple_th_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_temple_th)} rows")

Saved full coordinate list: 17778 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_temple_th.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_temple_th_available = pd.DataFrame(filtered_rows)
df_temple_th_available.to_csv("temple_th_with_streetview.csv", index=False)
print(f"Filtered: {len(df_temple_th_available)} coordinates with Street View")

df_temple_th_sampled = df_temple_th_available.copy()
df_temple_th_sampled.to_csv("temple_th_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_temple_th_sampled)} rows to temple_th_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to temple_th_final_sample.csv


**Cities/Towns**

In [None]:
# Query code to find cities/towns in Thailand - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Thailand}}->.searchArea;
(
  node["place"="city"](area.searchArea);
  node["place"="town"](area.searchArea);
);
out;

In [None]:
gdf_city_th = gpd.read_file("city_th.geojson")

# Reproject to projected CRS for accurate centroid calculation
gdf_city_th_proj = gdf_city_th.to_crs(epsg=32647)  # UTM zone for Thailand

# Calculate centroids in projected CRS
gdf_city_th_proj['centroid'] = gdf_city_th_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_city_th_centroids = gdf_city_th_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_city_th["lat"] = gdf_city_th_centroids.geometry.y
gdf_city_th["lon"] = gdf_city_th_centroids.geometry.x

# Add scene type and country
gdf_city_th["scene_type"] = "city"
gdf_city_th["country"] = "Thailand"

# Select relevant columns
df_city_th = gdf_city_th[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_city_th.to_csv("city_th_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_city_th)} rows")

Saved full coordinate list: 1054 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_city_th.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_city_th_available = pd.DataFrame(filtered_rows)
df_city_th_available.to_csv("city_th_with_streetview.csv", index=False)
print(f"Filtered: {len(df_city_th_available)} coordinates with Street View")

df_city_th_sampled = df_city_th_available.copy()
df_city_th_sampled.to_csv("city_th_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_city_th_sampled)} rows to city_th_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to city_th_final_sample.csv


**Mountains**

In [None]:
# Query code to find mountains in Thailand - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Thailand}}->.searchArea;
(
  node["natural"="peak"](area.searchArea);
  way["natural"="peak"](area.searchArea);
  relation["natural"="peak"](area.searchArea);
);
out center;

SyntaxError: invalid syntax (ipython-input-11-1760585589.py, line 3)

In [None]:
gdf_mountain_th = gpd.read_file("mountain_th.geojson")

# Reproject to projected CRS for accurate centroid calculation
gdf_mountain_th_proj = gdf_mountain_th.to_crs(epsg=32647)  # UTM zone for Thailand

# Calculate centroids in projected CRS
gdf_mountain_th_proj['centroid'] = gdf_mountain_th_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_mountain_th_centroids = gdf_mountain_th_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_mountain_th["lat"] = gdf_mountain_th_centroids.geometry.y
gdf_mountain_th["lon"] = gdf_mountain_th_centroids.geometry.x

# Add scene type and country
gdf_mountain_th["scene_type"] = "mountain"
gdf_mountain_th["country"] = "Thailand"

# Select relevant columns
df_mountain_th = gdf_mountain_th[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_mountain_th.to_csv("mountain_th_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_mountain_th)} rows")

Saved full coordinate list: 2122 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_mountain_th.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_mountain_th_available = pd.DataFrame(filtered_rows)
df_mountain_th_available.to_csv("mountain_th_with_streetview.csv", index=False)
print(f"Filtered: {len(df_mountain_th_available)} coordinates with Street View")

df_mountain_th_sampled = df_mountain_th_available.copy()
df_mountain_th_sampled.to_csv("mountain_th_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_mountain_th_sampled)} rows to mountain_th_final_sample.csv")

Checking for Street View imagery...
Filtered: 111 coordinates with Street View
Final sample saved: 111 rows to mountain_th_final_sample.csv


**Jungle/forest**

In [None]:
# Query code to find jungles/forests in Thailand - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Thailand}}->.searchArea;
(
  node["landuse"="forest"](area.searchArea);
  way["landuse"="forest"](area.searchArea);
  relation["landuse"="forest"](area.searchArea);
);
out center;

In [None]:
gdf_forest_th = gpd.read_file("forest_th.geojson")

# Reproject to projected CRS for accurate centroid calculation
gdf_forest_th_proj = gdf_forest_th.to_crs(epsg=32647)  # UTM zone 47N for Thailand

# Calculate centroids in projected CRS
gdf_forest_th_proj['centroid'] = gdf_forest_th_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_forest_th_centroids = gdf_forest_th_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_forest_th["lat"] = gdf_forest_th_centroids.geometry.y
gdf_forest_th["lon"] = gdf_forest_th_centroids.geometry.x

# Add scene type and country
gdf_forest_th["scene_type"] = "forest"
gdf_forest_th["country"] = "Thailand"

# Select relevant columns
df_forest_th = gdf_forest_th[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_forest_th.to_csv("forest_th_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_forest_th)} rows")

Saved full coordinate list: 7337 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_forest_th.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):  # Assuming has_street_view is defined elsewhere
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_forest_th_available = pd.DataFrame(filtered_rows)
df_forest_th_available.to_csv("forest_th_with_streetview.csv", index=False)
print(f"Filtered: {len(df_forest_th_available)} coordinates with Street View")

df_forest_th_sampled = df_forest_th_available.copy()
df_forest_th_sampled.to_csv("forest_th_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_forest_th_sampled)} rows to forest_th_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to forest_th_final_sample.csv


**Farmlands (Countryside)**

In [None]:
# Query code to find farmlands (countryside) in Thailand - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Thailand}}->.searchArea;
(
  node["landuse"="farmland"](area.searchArea);
  way["landuse"="farmland"](area.searchArea);
  relation["landuse"="farmland"](area.searchArea);
);
out center;



In [None]:
gdf_countryside_th = gpd.read_file("countryside_th.geojson")

# Reproject to projected CRS for accurate centroid calculation
gdf_countryside_th_proj = gdf_countryside_th.to_crs(epsg=32647)  # UTM zone 47N for Thailand

# Calculate centroids in projected CRS
gdf_countryside_th_proj['centroid'] = gdf_countryside_th_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_countryside_th_centroids = gdf_countryside_th_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_countryside_th["lat"] = gdf_countryside_th_centroids.geometry.y
gdf_countryside_th["lon"] = gdf_countryside_th_centroids.geometry.x

# Add scene type and country
gdf_countryside_th["scene_type"] = "countryside"
gdf_countryside_th["country"] = "Thailand"

# Select relevant columns
df_countryside_th = gdf_countryside_th[["country", "scene_type", "lat", "lon"]]

# Save full CSV
df_countryside_th.to_csv("countryside_th_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_countryside_th)} rows")

Saved full coordinate list: 21601 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_countryside_th.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):  # has_street_view assumed defined
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_countryside_th_available = pd.DataFrame(filtered_rows)
df_countryside_th_available.to_csv("countryside_th_with_streetview.csv", index=False)
print(f"Filtered: {len(df_countryside_th_available)} coordinates with Street View")

df_countryside_th_sampled = df_countryside_th_available.copy()
df_countryside_th_sampled.to_csv("countryside_th_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_countryside_th_sampled)} rows to countryside_th_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to countryside_th_final_sample.csv


**Markets**

In [None]:
# Query code to find marketplaces in Thailand - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Thailand}}->.searchArea;
(
  node["amenity"="marketplace"](area.searchArea);
  way["amenity"="marketplace"](area.searchArea);
  relation["amenity"="marketplace"](area.searchArea);
);
out center;

In [None]:
# Load marketplace geojson
gdf_marketplace_th = gpd.read_file("marketplace_th.geojson")

# Reproject for centroid calculation (use appropriate EPSG for Thailand)
gdf_marketplace_th_proj = gdf_marketplace_th.to_crs(epsg=32647)
gdf_marketplace_th_proj['centroid'] = gdf_marketplace_th_proj.geometry.centroid
gdf_marketplace_th_centroids = gdf_marketplace_th_proj.set_geometry('centroid').to_crs(epsg=4326)
gdf_marketplace_th["lat"] = gdf_marketplace_th_centroids.geometry.y
gdf_marketplace_th["lon"] = gdf_marketplace_th_centroids.geometry.x

# Add scene type and country columns
gdf_marketplace_th["scene_type"] = "marketplace"
gdf_marketplace_th["country"] = "Thailand"

# Select relevant columns
df_marketplace_th = gdf_marketplace_th[["country", "scene_type", "lat", "lon"]]

In [None]:
already_have = 861  # You have 861 images, so you only need 39 more
needed = 900 - already_have  # = 39

filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_marketplace_th.iterrows():
    if len(filtered_rows) >= needed:
        print(f"Reached target of {needed} images, stopping.")
        break
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
    time.sleep(0.1)  # Rate limit

df_marketplace_th_available = pd.DataFrame(filtered_rows)
df_marketplace_th_available.to_csv("marketplace_th_with_streetview.csv", index=False)
print(f"Filtered: {len(df_marketplace_th_available)} coordinates with Street View")

df_marketplace_th_sampled = df_marketplace_th_available.copy()
df_marketplace_th_sampled.to_csv("marketplace_th_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_marketplace_th_sampled)} rows to marketplace_th_final_sample.csv")

Checking for Street View imagery...
Reached target of 39 images, stopping.
Filtered: 39 coordinates with Street View
Final sample saved: 39 rows to marketplace_th_final_sample.csv


##**Malaysia**

**Note:** Malaysia has different ESPG's one for mainland and the peninsulas. We will use ESP:3857 which encompasses the whole country. The caviat is that it is off by just a few meters, which is fine.

**Beaches**

In [None]:
# Query code to find beaches in Malaysia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Malaysia}}->.searchArea;
(
  node["natural"="beach"](area.searchArea);
  way["natural"="beach"](area.searchArea);
  relation["natural"="beach"](area.searchArea);
);
out center;

In [None]:
gdf_beach_my = gpd.read_file("beach_my.geojson")

# Reproject to EPSG:3857 for centroid calculation
gdf_beach_my_proj = gdf_beach_my.to_crs(epsg=3857)

# Calculate centroids in projected CRS
gdf_beach_my_proj['centroid'] = gdf_beach_my_proj.geometry.centroid

# Convert centroids back to WGS84 (lat/lon)
gdf_beach_my_centroids = gdf_beach_my_proj.set_geometry('centroid').to_crs(epsg=4326)

# Extract lat/lon from centroids and assign back to original GeoDataFrame
gdf_beach_my["lat"] = gdf_beach_my_centroids.geometry.y
gdf_beach_my["lon"] = gdf_beach_my_centroids.geometry.x

# Add scene type and country
gdf_beach_my["scene_type"] = "beach"
gdf_beach_my["country"] = "Malaysia"

df_beach_my = gdf_beach_my[["country", "scene_type", "lat", "lon"]]

df_beach_my.to_csv("beach_my_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_beach_my)} rows")

Saved full coordinate list: 620 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_beach_my.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# Save filtered and final sampled CSVs
df_beach_my_available = pd.DataFrame(filtered_rows)
df_beach_my_available.to_csv("beach_my_with_streetview.csv", index=False)
print(f"Filtered: {len(df_beach_my_available)} coordinates with Street View")

df_beach_my_sampled = df_beach_my_available.copy()
df_beach_my_sampled.to_csv("beach_my_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_beach_my_sampled)} rows to beach_my_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to beach_my_final_sample.csv


**Jungles/forests**

In [None]:
# Query code to find jungles/forests in Malaysia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:60];
{{geocodeArea:Malaysia}}->.searchArea;
(
  // Forest landuse
  node["landuse"="forest"](area.searchArea);
  way["landuse"="forest"](area.searchArea);
  relation["landuse"="forest"](area.searchArea);

  // Natural wooded areas
  node["natural"="wood"](area.searchArea);
  way["natural"="wood"](area.searchArea);
  relation["natural"="wood"](area.searchArea);

  // Optional: forest areas tagged as landcover
  node["landcover"="trees"](area.searchArea);
  way["landcover"="trees"](area.searchArea);
  relation["landcover"="trees"](area.searchArea);
);
out center;

In [None]:
gdf_forest_my = gpd.read_file("forest_my.geojson")

# Reproject to projected CRS for accurate centroid calculation
# UTM zone for Malaysia is generally 47N (EPSG:32647) or 48N (EPSG:32648)
# You might want to choose based on location; let's use 47N here
gdf_forest_my_proj = gdf_forest_my.to_crs(epsg=32647)

gdf_forest_my_proj['centroid'] = gdf_forest_my_proj.geometry.centroid
gdf_forest_my_centroids = gdf_forest_my_proj.set_geometry('centroid').to_crs(epsg=4326)
gdf_forest_my["lat"] = gdf_forest_my_centroids.geometry.y
gdf_forest_my["lon"] = gdf_forest_my_centroids.geometry.x

gdf_forest_my["scene_type"] = "forest"
gdf_forest_my["country"] = "Malaysia"

df_forest_my = gdf_forest_my[["country", "scene_type", "lat", "lon"]]

df_forest_my.to_csv("forest_my_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_forest_my)} rows")

Saved full coordinate list: 5988 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_forest_my.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):  # Make sure you have this function defined earlier
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_forest_my_available = pd.DataFrame(filtered_rows)
df_forest_my_available.to_csv("forest_my_with_streetview.csv", index=False)
print(f"Filtered: {len(df_forest_my_available)} coordinates with Street View")

df_forest_my_sampled = df_forest_my_available.copy()
df_forest_my_sampled.to_csv("forest_my_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_forest_my_sampled)} rows to forest_my_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to forest_my_final_sample.csv


**Cities/towns**

In [None]:
# Query code to find cities/town in Malaysia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Malaysia}}->.searchArea;
(
  node["place"="city"](area.searchArea);
  node["place"="town"](area.searchArea);
);
out;

In [None]:
gdf_city_my = gpd.read_file("city_my.geojson")

# Using UTM zone 47N for Malaysia
gdf_city_my_proj = gdf_city_my.to_crs(epsg=32647)

gdf_city_my_proj['centroid'] = gdf_city_my_proj.geometry.centroid

gdf_city_my_centroids = gdf_city_my_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_city_my["lat"] = gdf_city_my_centroids.geometry.y
gdf_city_my["lon"] = gdf_city_my_centroids.geometry.x

gdf_city_my["scene_type"] = "city"
gdf_city_my["country"] = "Malaysia"

df_city_my = gdf_city_my[["country", "scene_type", "lat", "lon"]]

df_city_my.to_csv("city_my_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_city_my)} rows")

Saved full coordinate list: 451 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_city_my.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_city_my_available = pd.DataFrame(filtered_rows)
df_city_my_available.to_csv("city_my_with_streetview.csv", index=False)
print(f"Filtered: {len(df_city_my_available)} coordinates with Street View")

df_city_my_sampled = df_city_my_available.copy()
df_city_my_sampled.to_csv("city_my_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_city_my_sampled)} rows to city_my_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to city_my_final_sample.csv


**Countryside/farmland**

In [None]:
# Query code to find countryside/farmland in Malaysia - copy and paste this into Overpass Turbo to generate .geojson file
[out:json][timeout:25];
{{geocodeArea:Malaysia}}->.searchArea;
(
  node["landuse"="farmland"](area.searchArea);
  way["landuse"="farmland"](area.searchArea);
  relation["landuse"="farmland"](area.searchArea);
);
out center;

In [None]:
gdf_countryside_my = gpd.read_file("countryside_my.geojson")

# Using UTM zone 47N for Malaysia
gdf_countryside_my_proj = gdf_countryside_my.to_crs(epsg=32647)

gdf_countryside_my_proj['centroid'] = gdf_countryside_my_proj.geometry.centroid

gdf_countryside_my_centroids = gdf_countryside_my_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_countryside_my["lat"] = gdf_countryside_my_centroids.geometry.y
gdf_countryside_my["lon"] = gdf_countryside_my_centroids.geometry.x

gdf_countryside_my["scene_type"] = "countryside"
gdf_countryside_my["country"] = "Malaysia"

df_countryside_my = gdf_countryside_my[["country", "scene_type", "lat", "lon"]]

df_countryside_my.to_csv("countryside_my_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_countryside_my)} rows")

Saved full coordinate list: 5347 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_countryside_my.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_countryside_my_available = pd.DataFrame(filtered_rows)
df_countryside_my_available.to_csv("countryside_my_with_streetview.csv", index=False)
print(f"Filtered: {len(df_countryside_my_available)} coordinates with Street View")

df_countryside_my_sampled = df_countryside_my_available.copy()
df_countryside_my_sampled.to_csv("countryside_my_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_countryside_my_sampled)} rows to countryside_my_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to countryside_my_final_sample.csv


**Mosques**

In [None]:
# Query code to find mosques in Malaysia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:25];
{{geocodeArea:Malaysia}}->.searchArea;
(
  node["amenity"="place_of_worship"]["religion"="muslim"](area.searchArea);
  way["amenity"="place_of_worship"]["religion"="muslim"](area.searchArea);
  relation["amenity"="place_of_worship"]["religion"="muslim"](area.searchArea);
);
out center;

In [None]:
gdf_mosque_my = gpd.read_file("mosque_my.geojson")

gdf_mosque_my["lat"] = gdf_mosque_my.geometry.y
gdf_mosque_my["lon"] = gdf_mosque_my.geometry.x

gdf_mosque_my["scene_type"] = "mosque"
gdf_mosque_my["country"] = "Malaysia"

df_mosque_my = gdf_mosque_my[["country", "scene_type", "lat", "lon"]]

df_mosque_my.to_csv("mosque_my_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_mosque_my)} rows")

Saved full coordinate list: 6072 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_mosque_my.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_mosque_my_available = pd.DataFrame(filtered_rows)
df_mosque_my_available.to_csv("mosque_my_with_streetview.csv", index=False)
print(f"Filtered: {len(df_mosque_my_available)} coordinates with Street View")

df_mosque_my_sampled = df_mosque_my_available.copy()
df_mosque_my_sampled.to_csv("mosque_my_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_mosque_my_sampled)} rows to mosque_my_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to mosque_my_final_sample.csv


**Mountains**

In [None]:
# Query code to find mountains in Malaysia - copy and paste this into Overpass Turbo to generate .geojson file
[out:json][timeout:25];
{{geocodeArea:Malaysia}}->.searchArea;
(
  node["natural"="peak"](area.searchArea);
  way["natural"="peak"](area.searchArea);
  relation["natural"="peak"](area.searchArea);
);
out center;

In [None]:
gdf_mountain_my = gpd.read_file("mountain_my.geojson")

# Use UTM zone 47N for Malaysia
gdf_mountain_my_proj = gdf_mountain_my.to_crs(epsg=32647)

gdf_mountain_my_proj['centroid'] = gdf_mountain_my_proj.geometry.centroid

gdf_mountain_my_centroids = gdf_mountain_my_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_mountain_my["lat"] = gdf_mountain_my_centroids.geometry.y
gdf_mountain_my["lon"] = gdf_mountain_my_centroids.geometry.x

gdf_mountain_my["scene_type"] = "mountain"
gdf_mountain_my["country"] = "Malaysia"

df_mountain_my = gdf_mountain_my[["country", "scene_type", "lat", "lon"]]

df_mountain_my.to_csv("mountain_my_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_mountain_my)} rows")

Saved full coordinate list: 2853 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_mountain_my.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_mountain_my_available = pd.DataFrame(filtered_rows)
df_mountain_my_available.to_csv("mountain_my_with_streetview.csv", index=False)
print(f"Filtered: {len(df_mountain_my_available)} coordinates with Street View")

df_mountain_my_sampled = df_mountain_my_available.copy()
df_mountain_my_sampled.to_csv("mountain_my_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_mountain_my_sampled)} rows to mountain_my_final_sample.csv")

Checking for Street View imagery...
Filtered: 146 coordinates with Street View
Final sample saved: 146 rows to mountain_my_final_sample.csv


## **Indonesia**

**Note:** For the jungle/forest query, we had to focus on the Kalimantan Borneo jungle region. Otherwise, Indonesia is too forest-dense and the query outputs a dataset that is too large. For more information, this is how Borneo is divided:

- 🇮🇩 Kalimantan — Indonesia (covers about 73% of Borneo)
- 🇲🇾 Sabah & Sarawak — Malaysia
- 🇧🇳 Brunei — small, independent nation on the north coast

**Note:** Similar to Malaysia, because Indonesia contains islands and spans multiple UTM zones, we will just stick to ESPG: 3857 for Indonesia. Since our project focuses on street-view and not spacial accuracy, we don't really need to re-project. ESPG: 3857 works for anywhere on Earth because it's a global projection used widely in web mapping.

**Beaches**

In [None]:
# Query code to find beaches in Indonesia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:60];
{{geocodeArea:Indonesia}}->.searchArea;
(
  node["natural"="beach"](area.searchArea);
  way["natural"="beach"](area.searchArea);
  relation["natural"="beach"](area.searchArea);
);
out center;


In [None]:
gdf_beach_id = gpd.read_file("beach_id.geojson")

gdf_beach_id_proj = gdf_beach_id.to_crs(epsg=3857)

gdf_beach_id_proj['centroid'] = gdf_beach_id_proj.geometry.centroid

gdf_beach_id_centroids = gdf_beach_id_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_beach_id["lat"] = gdf_beach_id_centroids.geometry.y
gdf_beach_id["lon"] = gdf_beach_id_centroids.geometry.x

gdf_beach_id["scene_type"] = "beach"
gdf_beach_id["country"] = "Indonesia"

df_beach_id = gdf_beach_id[["country", "scene_type", "lat", "lon"]]

df_beach_id.to_csv("beach_id_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_beach_id)} rows")

Saved full coordinate list: 2911 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_beach_id.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # To avoid hitting API limits

df_beach_id_available = pd.DataFrame(filtered_rows)
df_beach_id_available.to_csv("beach_id_with_streetview.csv", index=False)
print(f"Filtered: {len(df_beach_id_available)} coordinates with Street View")

df_beach_id_sampled = df_beach_id_available.copy()
df_beach_id_sampled.to_csv("beach_id_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_beach_id_sampled)} rows to beach_id_final_sample.csv")


Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to beach_id_final_sample.csv


**Volcanoes/Mountains**

In [None]:
# Query code to find volcanoes/mountains in Indonesia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:60];
{{geocodeArea:Indonesia}}->.searchArea;
(
  node["natural"="peak"](area.searchArea);
  way["natural"="peak"](area.searchArea);
  relation["natural"="peak"](area.searchArea);
  node["volcano:type"](area.searchArea);
  way["volcano:type"](area.searchArea);
  relation["volcano:type"](area.searchArea);
);
out center;

In [None]:
gdf_mountain_id = gpd.read_file("mountain_id.geojson")

gdf_mountain_id_proj = gdf_mountain_id.to_crs(epsg=3857)

gdf_mountain_id_proj['centroid'] = gdf_mountain_id_proj.geometry.centroid

gdf_mountain_id_centroids = gdf_mountain_id_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_mountain_id["lat"] = gdf_mountain_id_centroids.geometry.y
gdf_mountain_id["lon"] = gdf_mountain_id_centroids.geometry.x

gdf_mountain_id["scene_type"] = "mountain"
gdf_mountain_id["country"] = "Indonesia"

df_mountain_id = gdf_mountain_id[["country", "scene_type", "lat", "lon"]]

df_mountain_id.to_csv("mountain_id_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_mountain_id)} rows")

Saved full coordinate list: 21828 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_mountain_id.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_mountain_id_available = pd.DataFrame(filtered_rows)
df_mountain_id_available.to_csv("mountain_id_with_streetview.csv", index=False)
print(f"Filtered: {len(df_mountain_id_available)} coordinates with Street View")

df_mountain_id_sampled = df_mountain_id_available.copy()
df_mountain_id_sampled.to_csv("mountain_id_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_mountain_id_sampled)} rows to mountain_id_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to mountain_id_final_sample.csv


**Cities/towns**

In [None]:
# Query code to find cities and towns in Indonesia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:60];
{{geocodeArea:Indonesia}}->.searchArea;
(
  node["place"="city"](area.searchArea);
  node["place"="town"](area.searchArea);
);
out;

In [None]:
gdf_city_id = gpd.read_file("city_id.geojson")

gdf_city_id_proj = gdf_city_id.to_crs(epsg=3857)

gdf_city_id_proj['centroid'] = gdf_city_id_proj.geometry.centroid

gdf_city_id_centroids = gdf_city_id_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_city_id["lat"] = gdf_city_id_centroids.geometry.y
gdf_city_id["lon"] = gdf_city_id_centroids.geometry.x

gdf_city_id["scene_type"] = "city"
gdf_city_id["country"] = "Indonesia"

df_city_id = gdf_city_id[["country", "scene_type", "lat", "lon"]]

df_city_id.to_csv("city_id_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_city_id)} rows")

Saved full coordinate list: 2012 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_city_id.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_city_id_available = pd.DataFrame(filtered_rows)
df_city_id_available.to_csv("city_id_with_streetview.csv", index=False)
print(f"Filtered: {len(df_city_id_available)} coordinates with Street View")

df_city_id_sampled = df_city_id_available.copy()
df_city_id_sampled.to_csv("city_id_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_city_id_sampled)} rows to city_id_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to city_id_final_sample.csv


**Jungle/forest**

In [None]:
# Query code to find jungle and forests in Indonesia - copy and paste this into Overpass Turbo to generate .geojson file
# Note that for this query, we limit it to just the Borneo jungle region (Kalimantan) beacuse otherwise, the data would be too large - this introduces a bit of bias

[out:json][timeout:60];
{{geocodeArea:Kalimantan}}->.searchArea;
(
  way["landuse"="forest"](area.searchArea);
  relation["landuse"="forest"](area.searchArea);
  way["natural"="wood"](area.searchArea);
  relation["natural"="wood"](area.searchArea);
);
out center;

In [None]:
gdf_jungle_id = gpd.read_file("jungle_id.geojson")

gdf_jungle_id_proj = gdf_jungle_id.to_crs(epsg=3857)

gdf_jungle_id_proj['centroid'] = gdf_jungle_id_proj.geometry.centroid

gdf_jungle_id_centroids = gdf_jungle_id_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_jungle_id["lat"] = gdf_jungle_id_centroids.geometry.y
gdf_jungle_id["lon"] = gdf_jungle_id_centroids.geometry.x

gdf_jungle_id["scene_type"] = "jungle"
gdf_jungle_id["country"] = "Indonesia"

df_jungle_id = gdf_jungle_id[["country", "scene_type", "lat", "lon"]]

df_jungle_id.to_csv("jungle_id_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_jungle_id)} rows")

Saved full coordinate list: 3442 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_jungle_id.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_jungle_id_available = pd.DataFrame(filtered_rows)
df_jungle_id_available.to_csv("jungle_id_with_streetview.csv", index=False)
print(f"Filtered: {len(df_jungle_id_available)} coordinates with Street View")

df_jungle_id_sampled = df_jungle_id_available.copy()
df_jungle_id_sampled.to_csv("jungle_id_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_jungle_id_sampled)} rows to jungle_id_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to jungle_id_final_sample.csv


**Countryside and rice fields**

In [None]:
# Query code to find country side and rice fields in Indonesia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:60];
{{geocodeArea:Indonesia}}->.searchArea;
(
  way["landuse"="paddy"](area.searchArea);
  relation["landuse"="paddy"](area.searchArea);
  way["crop"="rice"](area.searchArea);
  relation["crop"="rice"](area.searchArea);
);
out center;

In [None]:
gdf_countryside_id = gpd.read_file("countryside_id.geojson")

gdf_countryside_id_proj = gdf_countryside_id.to_crs(epsg=3857)

gdf_countryside_id_proj['centroid'] = gdf_countryside_id_proj.geometry.centroid

gdf_countryside_id_centroids = gdf_countryside_id_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_countryside_id["lat"] = gdf_countryside_id_centroids.geometry.y
gdf_countryside_id["lon"] = gdf_countryside_id_centroids.geometry.x

gdf_countryside_id["scene_type"] = "countryside"
gdf_countryside_id["country"] = "Indonesia"

df_countryside_id = gdf_countryside_id[["country", "scene_type", "lat", "lon"]]

df_countryside_id.to_csv("countryside_id_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_countryside_id)} rows")

Saved full coordinate list: 5635 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_countryside_id.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

df_countryside_id_available = pd.DataFrame(filtered_rows)
df_countryside_id_available.to_csv("countryside_id_with_streetview.csv", index=False)
print(f"Filtered: {len(df_countryside_id_available)} coordinates with Street View")

df_countryside_id_sampled = df_countryside_id_available.copy()
df_countryside_id_sampled.to_csv("countryside_id_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_countryside_id_sampled)} rows to countryside_id_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to countryside_id_final_sample.csv


**Temples (Buddhist and Hindu)**

In [None]:
# Query code to find Buddhist and Hindu temples in Indonesia - copy and paste this into Overpass Turbo to generate .geojson file

[out:json][timeout:60];
{{geocodeArea:Indonesia}}->.searchArea;
(
  node["historic"="temple"](area.searchArea);
  way["historic"="temple"](area.searchArea);
  relation["historic"="temple"](area.searchArea);
  node["amenity"="place_of_worship"]["religion"~"hindu|buddhist"](area.searchArea);
  way["amenity"="place_of_worship"]["religion"~"hindu|buddhist"](area.searchArea);
  relation["amenity"="place_of_worship"]["religion"~"hindu|buddhist"](area.searchArea);
);
out center;

In [None]:
gdf_temple_id = gpd.read_file("temple_id.geojson")

gdf_temple_id_proj = gdf_temple_id.to_crs(epsg=3857)

gdf_temple_id_proj['centroid'] = gdf_temple_id_proj.geometry.centroid

gdf_temple_id_centroids = gdf_temple_id_proj.set_geometry('centroid').to_crs(epsg=4326)

gdf_temple_id["lat"] = gdf_temple_id_centroids.geometry.y
gdf_temple_id["lon"] = gdf_temple_id_centroids.geometry.x

gdf_temple_id["scene_type"] = "temple"
gdf_temple_id["country"] = "Indonesia"

df_temple_id = gdf_temple_id[["country", "scene_type", "lat", "lon"]]

df_temple_id.to_csv("temple_id_full.csv", index=False)
print(f"Saved full coordinate list: {len(df_temple_id)} rows")

Saved full coordinate list: 2681 rows


In [None]:
filtered_rows = []
print("Checking for Street View imagery...")

for idx, row in df_temple_id.iterrows():
    lat, lon = row["lat"], row["lon"]
    if has_street_view(lat, lon, API_KEY):
        filtered_rows.append(row)
        if len(filtered_rows) >= 150:
            print("Reached 150 available samples, stopping early.")
            break
    time.sleep(0.1)  # Respect API rate limits

# === 10. Save filtered and final sampled CSVs ===
df_temple_id_available = pd.DataFrame(filtered_rows)
df_temple_id_available.to_csv("temple_id_with_streetview.csv", index=False)
print(f"Filtered: {len(df_temple_id_available)} coordinates with Street View")

df_temple_id_sampled = df_temple_id_available.copy()
df_temple_id_sampled.to_csv("temple_id_final_sample.csv", index=False)
print(f"Final sample saved: {len(df_temple_id_sampled)} rows to temple_id_final_sample.csv")

Checking for Street View imagery...
Reached 150 available samples, stopping early.
Filtered: 150 coordinates with Street View
Final sample saved: 150 rows to temple_id_final_sample.csv
