<a href="https://colab.research.google.com/github/solosolve-ai/solosolve-ai-demo/blob/main/Apify_Scraping_Attempt_WOLT_GOOGLEMAPS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
pip install apify-client polars tqdm



In [18]:
import time
import polars as pl
from tqdm import tqdm
from apify_client import ApifyClient
import json

APIFY_TOKEN = 'apify_api_J8hQtltGbhQUK1NSyDwSAIviz9haSe0qSZhJ'  # Replace with your Apify API token

# List of city names as they appear in Wolt Israel
wolt_cities = [
    "Tel Aviv-Yafo",
    "Jerusalem",
    "Haifa",
    "Rishon LeZion",
    "Petah Tikva",
    "Netanya",
    "Afula & Emek Yizrael area",
    # Add more as needed
]

client = ApifyClient(APIFY_TOKEN)
all_wolt_items = []

start_total = time.time()
print("=== Scraping Wolt restaurants from Israeli cities ===")
for city_name in tqdm(wolt_cities, desc="Cities"):
    t0 = time.time()
    wolt_input = {"city": city_name}
    try:
        wolt_run = client.actor('lucen_data/wolt-restaurants-scraper').call(run_input=wolt_input)
        wolt_dataset_id = wolt_run['defaultDatasetId']
        wolt_items = list(client.dataset(wolt_dataset_id).iterate_items())
        for item in wolt_items:
            item['city'] = city_name
        all_wolt_items.extend(wolt_items)
        print(f"  {city_name}: {len(wolt_items)} restaurants scraped in {time.time()-t0:.1f}s")
    except Exception as e:
        print(f"  [ERROR] Failed to scrape {city_name}: {e}")
    time.sleep(2)  # Respectful delay

wolt_df = pl.DataFrame(all_wolt_items)
print(f"Total Wolt restaurants scraped: {wolt_df.height}")

# Prepare Google Maps search queries
search_queries = []
for row in wolt_df.iter_rows(named=True):
    name = row.get('name')
    city = row.get('city') or ''
    if name:
        search_queries.append(f"{name}, {city}")

print("\n=== Scraping Google Maps for reviews ===")
reviews_data = []
gmaps_nodes = []
gmaps_edges = []
start_gmaps = time.time()
for idx, query in enumerate(tqdm(search_queries, desc="Restaurants")):
    gmaps_input = {
        "searchStringsArray": [query],
        "maxCrawledPlacesPerSearch": 1,
        "includeReviews": True,
        "maxReviews": 10,
    }
    try:
        gmaps_run = client.actor('compass/crawler-google-places').call(run_input=gmaps_input)
        gmaps_dataset_id = gmaps_run['defaultDatasetId']
        time.sleep(2)
        for place in client.dataset(gmaps_dataset_id).iterate_items():
            reviews_data.append({
                "wolt_name": query,
                "gmaps_name": place.get('title'),
                "gmaps_address": place.get('address'),
                "gmaps_rating": place.get('rating'),
                "gmaps_reviews": place.get('reviews'),
            })
            # For vis.js graph: node for Wolt, node for GMaps, edge
            wolt_node_id = f"wolt_{idx}"
            gmaps_node_id = f"gmaps_{idx}"
            gmaps_nodes.extend([
                {"id": wolt_node_id, "label": query, "group": "Wolt"},
                {"id": gmaps_node_id, "label": place.get('title', 'Unknown'), "group": "GMaps"}
            ])
            gmaps_edges.append({"from": wolt_node_id, "to": gmaps_node_id, "label": "match"})
    except Exception as e:
        print(f"  [ERROR] Failed to scrape Google Maps for '{query}': {e}")

print(f"Google Maps scraping finished in {time.time()-start_gmaps:.1f}s")

# Save combined results
results_df = pl.DataFrame(reviews_data)
results_df.write_csv('wolt_gmaps_israel_combined.csv')
print("Combined data saved to wolt_gmaps_israel_combined.csv")

# Scrape company-level complaints
print("\n=== Scraping Google Maps for Wolt company complaints ===")
company_search = {
    "searchStringsArray": ["Wolt תל אביב", "Wolt ישראל", "וולט ישראל", "Wolt HQ"],
    "maxCrawledPlacesPerSearch": 3,
    "includeReviews": True,
    "maxReviews": 30,
}
try:
    company_run = client.actor('compass/crawler-google-places').call(run_input=company_search)
    company_dataset_id = company_run['defaultDatasetId']
    company_reviews = []
    for place in tqdm(client.dataset(company_dataset_id).iterate_items(), desc="Company places"):
        company_reviews.append({
            "company_name": place.get('title'),
            "address": place.get('address'),
            "rating": place.get('rating'),
            "reviews": place.get('reviews'),
        })
    pl.DataFrame(company_reviews).write_csv('wolt_company_israel_complaints.csv')
    print("Company complaints saved to wolt_company_israel_complaints.csv")
except Exception as e:
    print(f"  [ERROR] Failed to scrape company complaints: {e}")

print(f"\nTotal elapsed time: {time.time()-start_total:.1f}s")

# === Export vis.js graph JSON ===
print("Exporting vis.js network graph data...")
# Remove duplicates in nodes
unique_nodes = {n['id']: n for n in gmaps_nodes}.values()
visjs_graph = {
    "nodes": list(unique_nodes),
    "edges": gmaps_edges
}
with open("wolt_gmaps_network_graph.json", "w", encoding="utf-8") as f:
    json.dump(visjs_graph, f, ensure_ascii=False, indent=2)
print("vis.js network graph exported to wolt_gmaps_network_graph.json")

=== Scraping Wolt restaurants from Israeli cities ===


Cities:   0%|          | 0/7 [00:00<?, ?it/s]

  [ERROR] Failed to scrape Tel Aviv-Yafo: Input is not valid: Field input.city must be equal to one of the allowed values: "41.328142965671304, 19.818446319183693", "47.05756359468393, 15.434042699040106", "48.30510780946534, 14.289608991444084", "47.807726024059065, 13.032928781987948", "48.20865808554825, 16.372342303963308", "40.373141313556964, 49.84575754727883", "40.68003738748368, 46.35883397298534", "40.453126440554456, 49.74780999283473", "38.75474435497512, 48.85318563877422", "40.5892563100799, 49.6757848367605", "35.039149731761995, 33.98291086074673", "34.68349817248807, 32.60735437038002", "34.918621964444995, 33.633561157247186", "34.67246098511423, 33.04275499381072", "35.160289212970035, 33.36415838333676", "34.77483987349535, 32.422758701986", "49.955238281854264, 14.048249076696663", "50.1898919763608, 14.667603176998796", "49.1951525622682, 16.6078519821167", "50.68660679249152, 14.53698662133337", "50.07948676687204, 12.370244958982084", "50.46055354116305, 13.4183

Cities:  14%|█▍        | 1/7 [00:02<00:13,  2.20s/it]

  [ERROR] Failed to scrape Jerusalem: Input is not valid: Field input.city must be equal to one of the allowed values: "41.328142965671304, 19.818446319183693", "47.05756359468393, 15.434042699040106", "48.30510780946534, 14.289608991444084", "47.807726024059065, 13.032928781987948", "48.20865808554825, 16.372342303963308", "40.373141313556964, 49.84575754727883", "40.68003738748368, 46.35883397298534", "40.453126440554456, 49.74780999283473", "38.75474435497512, 48.85318563877422", "40.5892563100799, 49.6757848367605", "35.039149731761995, 33.98291086074673", "34.68349817248807, 32.60735437038002", "34.918621964444995, 33.633561157247186", "34.67246098511423, 33.04275499381072", "35.160289212970035, 33.36415838333676", "34.77483987349535, 32.422758701986", "49.955238281854264, 14.048249076696663", "50.1898919763608, 14.667603176998796", "49.1951525622682, 16.6078519821167", "50.68660679249152, 14.53698662133337", "50.07948676687204, 12.370244958982084", "50.46055354116305, 13.41836070

Cities:  29%|██▊       | 2/7 [00:04<00:10,  2.14s/it]

  [ERROR] Failed to scrape Haifa: Input is not valid: Field input.city must be equal to one of the allowed values: "41.328142965671304, 19.818446319183693", "47.05756359468393, 15.434042699040106", "48.30510780946534, 14.289608991444084", "47.807726024059065, 13.032928781987948", "48.20865808554825, 16.372342303963308", "40.373141313556964, 49.84575754727883", "40.68003738748368, 46.35883397298534", "40.453126440554456, 49.74780999283473", "38.75474435497512, 48.85318563877422", "40.5892563100799, 49.6757848367605", "35.039149731761995, 33.98291086074673", "34.68349817248807, 32.60735437038002", "34.918621964444995, 33.633561157247186", "34.67246098511423, 33.04275499381072", "35.160289212970035, 33.36415838333676", "34.77483987349535, 32.422758701986", "49.955238281854264, 14.048249076696663", "50.1898919763608, 14.667603176998796", "49.1951525622682, 16.6078519821167", "50.68660679249152, 14.53698662133337", "50.07948676687204, 12.370244958982084", "50.46055354116305, 13.418360707770

Cities:  43%|████▎     | 3/7 [00:06<00:08,  2.11s/it]

  [ERROR] Failed to scrape Rishon LeZion: Input is not valid: Field input.city must be equal to one of the allowed values: "41.328142965671304, 19.818446319183693", "47.05756359468393, 15.434042699040106", "48.30510780946534, 14.289608991444084", "47.807726024059065, 13.032928781987948", "48.20865808554825, 16.372342303963308", "40.373141313556964, 49.84575754727883", "40.68003738748368, 46.35883397298534", "40.453126440554456, 49.74780999283473", "38.75474435497512, 48.85318563877422", "40.5892563100799, 49.6757848367605", "35.039149731761995, 33.98291086074673", "34.68349817248807, 32.60735437038002", "34.918621964444995, 33.633561157247186", "34.67246098511423, 33.04275499381072", "35.160289212970035, 33.36415838333676", "34.77483987349535, 32.422758701986", "49.955238281854264, 14.048249076696663", "50.1898919763608, 14.667603176998796", "49.1951525622682, 16.6078519821167", "50.68660679249152, 14.53698662133337", "50.07948676687204, 12.370244958982084", "50.46055354116305, 13.4183

Cities:  43%|████▎     | 3/7 [00:08<00:10,  2.74s/it]


KeyboardInterrupt: 