In [18]:
import os
import json
import time
import pickle
import requests
import pandas as pd


In [19]:
API_KEY = os.getenv("GOOGLE_MAPS_API_KEY")
if not API_KEY:
    raise ValueError("API_KEY not found in environment variables.")

In [20]:
TEXT_SEARCH_URL = "https://places.googleapis.com/v1/places:searchText"
DETAILS_BASE_URL = "https://places.googleapis.com/v1/places/"

# Field masks for the two types of calls:
# For text search, we only request the place IDs (and nextPageToken if present)
TEXT_FIELD_MASK = "places.id,nextPageToken"
# For place details, use the long list of fields as in your original code.
fields_list = [
    "attributions", "id", "name", "photos",
    "addressComponents", "adrFormatAddress", "formattedAddress", "location", "plusCode", "shortFormattedAddress", "types", "viewport",
    "accessibilityOptions", "businessStatus", "containingPlaces", "displayName", "googleMapsLinks", "googleMapsUri", "iconBackgroundColor", "iconMaskBaseUri", "primaryType", "primaryTypeDisplayName", "pureServiceAreaBusiness", "subDestinations", "utcOffsetMinutes",
    "currentOpeningHours", "currentSecondaryOpeningHours", "internationalPhoneNumber", "nationalPhoneNumber", "priceLevel", "priceRange", "rating", "regularOpeningHours", "regularSecondaryOpeningHours", "userRatingCount", "websiteUri"
]
DETAILS_FIELD_MASK = ",".join(fields_list)

# Common headers for both requests
common_headers = {
    "Content-Type": "application/json",
    "X-Goog-Api-Key": API_KEY,
}


In [21]:

# List of Egyptian governorates (you can adjust the list as needed)
# governorates = [
#     "Cairo", "Giza", "Alexandria", "Dakahlia", "Red Sea", "Beheira", 
#     "Fayoum", "Gharbia", "Ismailia", "Menoufia", "Minya", "Qaliubiya",
#     "New Valley", "Suez", "Aswan", "Assiut", "Beni Suef", "Port Said",
#     "Damietta", "Sharkia", "South Sinai", "Kafr El Sheikh", "Matrouh",
#     "Luxor", "Qena", "Sohag"
# ]

# governorates = [
#     "Alexandria", "Red Sea", "Ras Ghareb", "Hurghada", "Marsa Alam",
#     "Ismailia", "El dabaa",
#     "Suez", "Port Said", "El Alamein", 
#     "dahab", "Nuweiba", "Taba", "Sharm Al Shiekh", "El Tor", "Abo Redis", "Ras Sidr", "Abu Zenima",
#     "Damietta", "North Sinai", "Marsa Matrouh"
# ]
governorates = [ 
                "Ras Ghareb", "Hurghada", "Marsa Alam",
                "El dabaa", "El Alamein", 
                "dahab", "Nuweiba", "Taba", "Sharm Al Shiekh", "El Tor", "Abo Redis", "Ras Sidr", "Abu Zenima",
                "North Sinai", "Marsa Matrouh"
                "Cairo", "Giza", "Alexandria", "Dakahlia", "Red Sea", "Beheira", 
                "Fayoum", "Gharbia", "Ismailia", "Menoufia", "Minya", "Qaliubiya",
                "New Valley", "Suez", "Aswan", "Assiut", "Beni Suef", "Port Said",
                "Damietta", "Sharkia", "South Sinai", "Kafr El Sheikh",
                "Luxor", "Qena", "Sohag"
]


# Place queries you want to search for
# place_queries = [
#     "restaurant", "cafe", "museum", "art gallery", "tourist attraction", "monument"
# ]
# place_queries = ["beach"]
place_queries = ["Nature preserve", "Park", "Ski resort"]


In [22]:
# Files to save checkpoints
PLACE_IDS_CHECKPOINT = "place_ids_checkpoint.pkl"
DETAILS_CHECKPOINT = "place_details_checkpoint.pkl"

# Load checkpoint for place IDs if available
if os.path.exists(PLACE_IDS_CHECKPOINT):
    with open(PLACE_IDS_CHECKPOINT, "rb") as f:
        collected_place_ids = pickle.load(f)
    print(f"Loaded {len(collected_place_ids)} place IDs from checkpoint.")
else:
    collected_place_ids = set()



Loaded 442 place IDs from checkpoint.


In [23]:
# -------------------------------
# STEP 1: Text Search to collect Place IDs
# -------------------------------
for governorate in governorates:
    for query in place_queries:
        # Build the text query: e.g., "restaurant in Cairo, Egypt"
        text_query = f"{query} in {governorate}, Egypt"
        print(f"Searching for: {text_query}")
        
        # Initial request body and parameters
        request_body = {
            "textQuery": text_query,
            "pageSize": 20  # maximum allowed per page
        }
        next_page_token = None

        while True:
            if next_page_token:
                request_body["pageToken"] = next_page_token

            try:
                response = requests.post(
                    TEXT_SEARCH_URL,
                    headers={**common_headers, "X-Goog-FieldMask": TEXT_FIELD_MASK},
                    json=request_body
                )
                response.raise_for_status()
            except Exception as e:
                print(f"Error during text search for '{text_query}': {e}")
                break

            data = response.json()
            places = data.get("places", [])
            for place in places:
                # The id is under the "id" key in each place (or within places.id field)
                place_id = place.get("id")
                if place_id:
                    collected_place_ids.add(place_id)

            # Save checkpoint after each page
            with open(PLACE_IDS_CHECKPOINT, "wb") as f:
                pickle.dump(collected_place_ids, f)
            print(f"Collected {len(collected_place_ids)} unique place IDs so far.")

            # Check if there is a nextPageToken for pagination
            next_page_token = data.get("nextPageToken")
            if not next_page_token:
                break

            # Sleep a bit before requesting next page (rate limit caution)
            time.sleep(2)
        # Pause between different queries to be safe
        time.sleep(2)

# Convert set to list for iteration
place_ids_list = list(collected_place_ids)
print(f"Total unique place IDs collected: {len(place_ids_list)}")

Searching for: Nature preserve in Ras Ghareb, Egypt
Collected 448 unique place IDs so far.
Searching for: Park in Ras Ghareb, Egypt
Collected 459 unique place IDs so far.
Searching for: Ski resort in Ras Ghareb, Egypt
Collected 469 unique place IDs so far.
Searching for: Nature preserve in Hurghada, Egypt
Collected 474 unique place IDs so far.
Searching for: Park in Hurghada, Egypt
Collected 494 unique place IDs so far.
Collected 502 unique place IDs so far.
Searching for: Ski resort in Hurghada, Egypt
Collected 521 unique place IDs so far.
Searching for: Nature preserve in Marsa Alam, Egypt
Collected 539 unique place IDs so far.
Searching for: Park in Marsa Alam, Egypt
Collected 546 unique place IDs so far.
Collected 559 unique place IDs so far.
Searching for: Ski resort in Marsa Alam, Egypt
Collected 576 unique place IDs so far.
Searching for: Nature preserve in El dabaa, Egypt
Collected 583 unique place IDs so far.
Searching for: Park in El dabaa, Egypt
Collected 603 unique place ID

In [24]:

# -------------------------------
# STEP 2: Get Place Details for each ID
# -------------------------------
# Load checkpoint for details if available
if os.path.exists(DETAILS_CHECKPOINT):
    with open(DETAILS_CHECKPOINT, "rb") as f:
        place_details_list = pickle.load(f)
    print(f"Loaded {len(place_details_list)} place details from checkpoint.")
else:
    place_details_list = []

# Create a set of already processed place IDs to skip duplicates
processed_ids = {detail.get("id") for detail in place_details_list if "id" in detail}

for idx, place_id in enumerate(place_ids_list):
    if place_id in processed_ids:
        continue  # skip already processed details

    details_url = f"{DETAILS_BASE_URL}{place_id}"
    # We can pass the fields as header instead of URL param
    try:
        details_response = requests.get(
            details_url,
            headers={**common_headers, "X-Goog-FieldMask": DETAILS_FIELD_MASK}
        )
        details_response.raise_for_status()
    except Exception as e:
        print(f"Error fetching details for {place_id}: {e}")
        continue

    details_data = details_response.json()
    place_details_list.append(details_data)
    processed_ids.add(place_id)
    print(f"Fetched details for {place_id} ({idx+1}/{len(place_ids_list)})")

    # Save checkpoint every few iterations
    if (idx + 1) % 10 == 0:
        with open(DETAILS_CHECKPOINT, "wb") as f:
            pickle.dump(place_details_list, f)
        print(f"Checkpoint saved for {idx+1} places.")

    time.sleep(0.5)  # pause to avoid rate limits

# Save final checkpoint for details
with open(DETAILS_CHECKPOINT, "wb") as f:
    pickle.dump(place_details_list, f)
print("Final details checkpoint saved.")



Loaded 442 place details from checkpoint.
Fetched details for ChIJ90NDu81NUxQR3ZzGHyFccX4 (1/2317)
Fetched details for ChIJde_7zJVLqxURxrJIGM57pfw (2/2317)
Fetched details for ChIJhzTWtTs3VBQRN_lVTepBjxU (3/2317)
Fetched details for ChIJPX9gAJY5UxQRwqFdk4-NzF8 (4/2317)
Fetched details for ChIJz2_6c057NhQRZFNYrbx7hBE (5/2317)
Fetched details for ChIJWxahaYXY9T4RHBzXk3WPVa4 (6/2317)
Fetched details for ChIJiVMTx9QVWBQRiUHaOCuqotI (7/2317)
Fetched details for ChIJxz4Xbe1pVhQRMcMd6vwuQko (8/2317)
Fetched details for ChIJLf2-r2Vp9xQRrCFIsrA6mF0 (9/2317)
Fetched details for ChIJnU94kEjuXxQRqJ9m8ndBEfE (10/2317)
Checkpoint saved for 10 places.
Fetched details for ChIJUQTS-y33UxQR71F7A851fj8 (12/2317)
Fetched details for ChIJGWZeRDx5TRQRLbx_ZRyJlg4 (13/2317)
Fetched details for ChIJ_Uyl8Vec-RQRJmPmAE5FpmQ (14/2317)
Fetched details for ChIJ1Sv01iolVBQRk814zhyAL6c (15/2317)
Fetched details for ChIJeV-DPOjtXxQRlkLVPMjDjik (16/2317)
Fetched details for ChIJIeLXBABrWBQRSOGK9u0Ykyk (17/2317)
Fetched

In [25]:
# -------------------------------
# STEP 3: Parse the detailed responses to a DataFrame
# -------------------------------
# Convert list of dictionaries to a pandas DataFrame.
# Depending on the structure of the JSON, you may need to adjust the parsing.
df_places = pd.json_normalize(place_details_list)
print("Final DataFrame head:")
print(df_places.head())

# Save the DataFrame to CSV as a backup
df_places.to_csv("places_details_raw2.csv", index=False)

Final DataFrame head:
                                 name                           id  \
0  places/ChIJ2ThejIT4VhQRGjEFWkYkNX4  ChIJ2ThejIT4VhQRGjEFWkYkNX4   
1  places/ChIJ4xOxlkT8XxQRGAM5B2w-iVE  ChIJ4xOxlkT8XxQRGAM5B2w-iVE   
2  places/ChIJXb2g8O3tXxQROIa2cMoLFes  ChIJXb2g8O3tXxQROIa2cMoLFes   
3  places/ChIJjfdEPzaV9RQRBDK0HXOHYC0  ChIJjfdEPzaV9RQRBDK0HXOHYC0   
4  places/ChIJAULdcQ1nVhQRj0hCN_oHyGo  ChIJAULdcQ1nVhQRj0hCN_oHyGo   

                                               types  \
0            [beach, natural_feature, establishment]   
1  [resort_hotel, bed_and_breakfast, hotel, lodgi...   
2  [tourist_attraction, point_of_interest, establ...   
3            [beach, natural_feature, establishment]   
4  [resort_hotel, hotel, lodging, point_of_intere...   

                                    formattedAddress  \
0       Porto Beach, Attaka, Suez Governorate, Egypt   
1  51, El Alamein, Marsa Matrouh Governorate 5173...   
2  العلمين، مدينة العلمين،، RXC3+XPM, Al Alameen ...