In [16]:
pip install beautifulsoup4 requests pandas

Collecting selenium
  Downloading selenium-4.37.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio<1.0,>=0.31.0 (from selenium)
  Downloading trio-0.31.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket<1.0,>=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting outcome (from trio<1.0,>=0.31.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket<1.0,>=0.12.2->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.37.0-py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m60.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio-0.31.0-py3-none-any.whl (512 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m512.7/512.7 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trio_websocket-0.12.2-py3-none-any.whl (21 kB)
Downloadin

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime

def scrape_eventbrite_ai_berlin(pages=5, pause=1.0):
    """
    Scrape AI events in Berlin from Eventbrite by parsing <li> cards.
    Fixes date/time and location extraction.
    """
    base_url = "https://www.eventbrite.com/d/germany--berlin/tech/"
    headers = {"User-Agent": "Mozilla/5.0"}
    events = []

    for page in range(1, pages + 1):
        url = f"{base_url}?page={page}"
        print(f"Fetching page {page}: {url}")
        resp = requests.get(url, headers=headers)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")

        # Select each card container
        cards = soup.select("li div.SearchResultPanelContentEventCard-module__card___Xno0V")
        if not cards:
            print("No cards found; check selector.")
            break

        for card in cards:
            # Link & title
            a = card.select_one("a.event-card-link")
            link = a["href"] if a else None
            h3 = card.select_one("h3")
            title = h3.get_text(strip=True) if h3 else None

            # Date & Time: the first <p> with bold style under details
            date_el = card.select_one("p.Typography_body-md-bold__487rx")
            if not date_el:
                # fallback horizontal card selector
                date_el = card.select_one("p.event-card__clamp-line--one")
            date_time = date_el.get_text(strip=True) if date_el else None

            # Location: from data-event-location attribute
            location = a.get("data-event-location") if a and a.has_attr("data-event-location") else None
            if location == "online":
                location = "Online"
            else:
                # fallback: text under organizer
                loc_fallback = card.select_one("p.Typography_body-md__487rx")
                location = loc_fallback.get_text(strip=True) if loc_fallback else location

            events.append({
                "Title": title,
                "Link": link,
                "Date & Time": date_time,
                "Location": location
            })

        time.sleep(pause)

    return pd.DataFrame(events)

def main():
    df = scrape_eventbrite_ai_berlin(pages=10, pause=1.0)
    df.dropna(subset=["Title"], inplace=True)
    df.drop_duplicates(subset=["Link"], inplace=True)
    if df.empty:
        print("No events found.")
        return

    print("Sample results:")
    print(df.head())

    timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
    filename  = f"berlin_tech_events_{timestamp}.csv"
    df.to_csv(filename, index=False)
    print(f"💾 Saved {len(df)} events to {filename}")

if __name__ == "__main__":
    main()


Fetching page 1: https://www.eventbrite.com/d/germany--berlin/tech/?page=1
Fetching page 2: https://www.eventbrite.com/d/germany--berlin/tech/?page=2
Fetching page 3: https://www.eventbrite.com/d/germany--berlin/tech/?page=3
Fetching page 4: https://www.eventbrite.com/d/germany--berlin/tech/?page=4
Fetching page 5: https://www.eventbrite.com/d/germany--berlin/tech/?page=5
Fetching page 6: https://www.eventbrite.com/d/germany--berlin/tech/?page=6
Fetching page 7: https://www.eventbrite.com/d/germany--berlin/tech/?page=7
Fetching page 8: https://www.eventbrite.com/d/germany--berlin/tech/?page=8
Fetching page 9: https://www.eventbrite.com/d/germany--berlin/tech/?page=9
Fetching page 10: https://www.eventbrite.com/d/germany--berlin/tech/?page=10
Sample results:
                                               Title  \
0        I Missed Out on AI and NVIDIA, What’s Next?   
1  AI & Teens: The Conversation You Can't Afford ...   
2                      DM To Dollars 3-Day Challenge   
3  Berli

  timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
