Method 1: Use WEB SCRAPER from Chrome Store

Method 2: General Request

In [8]:
# Get first page

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Event list URL and headers
url = "https://www.ticketmaster.com/search?q=&sort=date&startDate=2024-10-24&endDate=2024-11-30"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
}

# Request page content and parse with BeautifulSoup
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

# Lists for storing data
events = []
start_dates = []
end_dates = []
locations = []

# Find event elements in the page
event_elements = soup.find_all('li', class_='sc-1nyzlro-1')  # Update this based on actual event container class

# Extract details from each event element
for event in event_elements:
    # Event name
    event_name = event.find('span', class_='sc-fyofxi-5').text if event.find('span', class_='sc-fyofxi-5') else "N/A"
    
    # Event start date
    start_date = event.find('div', class_='sc-1evs0j0-0').text if event.find('div', class_='sc-1evs0j0-0') else "N/A"
    
    # Event end date
    end_date_element = event.find('span', class_='sc-1idcr5x-0 bihyed')
    if end_date_element:
        # Find the inner span that contains the detailed date
        detailed_span = end_date_element.find('span', class_='VisuallyHidden-sc-8buqks-0')
        if detailed_span:
            # Extract the date without the "Until" text
            end_date = detailed_span.find('span').text.strip() if detailed_span else "N/A"
        else:
            # Fallback to extracting the visible date part, if any
            visible_end_date = end_date_element.find('span', aria_hidden=True)
            end_date = visible_end_date.text.strip() if visible_end_date else "N/A"
    else:
        end_date = "N/A"

    
    # Location details
    location_elements = event.find_all('span', class_='sc-fyofxi-5')
    location = ", ".join([loc.text for loc in location_elements if loc.text]) if location_elements else "N/A"
    
    # Append to lists
    events.append(event_name)
    start_dates.append(start_date)
    end_dates.append(end_date)
    locations.append(location)

    # Pause to avoid rapid requests
    time.sleep(1)

# Create DataFrame and drop rows where all columns are N/A
df1 = pd.DataFrame({
    'Event Name': events,
    'Start Date': start_dates,
    'End Date': end_dates,
    'Location': locations
})

# Drop rows where all columns are NA
df1.dropna(how='all', inplace=True)

df1


Unnamed: 0,Event Name,Start Date,End Date,Location
0,BARNATO 2024 Sponsor Pass,Jan01,12/31/24,"BARNATO 2024 Sponsor Pass, Omaha, NE, Barnato"
1,BARNATO 2024 Sponsor Pass,Jan01,12/31/24,"BARNATO 2024 Sponsor Pass, Omaha, NE, Barnato"
2,,,,
3,Ticket for you and a +1 to ALL 2024 shows at t...,Jan03,12/31/24,Ticket for you and a +1 to ALL 2024 shows at t...
4,CPS GOLDEN TICKETS,Mar09,10/1/25,"CPS GOLDEN TICKETS, Eugene, OR, The Big Dirty"
5,VIP Passes to The Liberty,Jun12,6/30/25,"VIP Passes to The Liberty, Roswell, NM, The Li..."
6,2024-2025 First Bank Broadway Season - Tuesday...,Sep17,6/24/25,2024-2025 First Bank Broadway Season - Tuesday...
7,2024-2025 First Bank Broadway Season - Wednesd...,Sep18,6/25/25,2024-2025 First Bank Broadway Season - Wednesd...
8,Warren Civic Music Series 2024-2025,Sep18,5/1/25,"Warren Civic Music Series 2024-2025, Warren, O..."
9,2024-2025 First Bank Broadway Season - Thursda...,Sep19,6/26/25,2024-2025 First Bank Broadway Season - Thursda...


In [None]:
# requrie all pages

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# Event list URL and headers
base_url = "https://www.ticketmaster.com/search"
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
}

# Lists for storing data
events = []
start_dates = []
end_dates = []
locations = []

# Pagination variables
page = 1
while True:
    # Construct URL with page parameter
    url = f"{base_url}?q=&sort=date&startDate=2024-10-24&endDate=2024-10-30&page={page}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find event elements on the current page
    event_elements = soup.find_all('li', class_='sc-1nyzlro-1')
    
    # If no events are found, break the loop
    if not event_elements:
        break
    
    # Extract details from each event element
    for event in event_elements:
        # Event name
        event_name = event.find('span', class_='sc-fyofxi-5').text if event.find('span', class_='sc-fyofxi-5') else "N/A"
        
        # Event start and end dates
        start_date = event.find('div', class_='sc-1evs0j0-0').text if event.find('div', class_='sc-1evs0j0-0') else "N/A"
   
        end_date_element = event.find('span', class_='sc-1idcr5x-0 bihyed')
        if end_date_element:
            # Find the inner span that contains the detailed date
            detailed_span = end_date_element.find('span', class_='VisuallyHidden-sc-8buqks-0')
            if detailed_span:
                # Extract the date without the "Until" text
                end_date = detailed_span.find('span').text.strip() if detailed_span else "N/A"
            else:
                # Fallback to extracting the visible date part, if any
                visible_end_date = end_date_element.find('span', aria_hidden=True)
                end_date = visible_end_date.text.strip() if visible_end_date else "N/A"
        else:
            end_date = "N/A"
        
        # Location details
        location_elements = event.find_all('span', class_='sc-fyofxi-5')
        location = ", ".join([loc.text for loc in location_elements]) if location_elements else "N/A"
        
        # Append to lists
        events.append(event_name)
        start_dates.append(start_date)
        end_dates.append(end_date)
        locations.append(location)

    # Pause to avoid rapid requests
    time.sleep(1)
    
    # Go to the next page
    page += 1

# Create DataFrame and save to CSV
df2 = pd.DataFrame({
    'Event Name': events,
    'Start Date': start_dates,
    'End Date': end_dates,
    'Location': locations
})
print(df2)


Method 3: Use SCRPAFLY

In [None]:
# Get first page

import json
import math
import os
import re
from typing import Dict, List
import urllib
import asyncio
from datetime import datetime
import pandas as pd
from dotenv import load_dotenv
from loguru import logger as log
from scrapfly import ScrapeApiResponse, ScrapeConfig, ScrapflyClient, ScrapflyScrapeError
from bs4 import BeautifulSoup

# Load the environment variables from .env file
load_dotenv()

# Accessing the API key from environment variables
scrapfly_api_key = os.getenv('SCRAPFLY_API_KEY') or "scp-live-27c7deed8b60458a95244a22f389179d"

# Using the API key to configure ScrapflyClient
SCRAPFLY = ScrapflyClient(key=scrapfly_api_key, max_concurrency=5)
BASE_CONFIG = {
    "asp": True,
    "country": "us",
}

def parse_search_page(result):
    """Extract event data from the Ticketmaster page HTML."""
    soup = BeautifulSoup(result.content, 'html.parser')
    event_elements = soup.find_all('li', class_='sc-1nyzlro-1')
    events = []

    for event in event_elements:
        event_name = event.find('span', class_='sc-fyofxi-5').text if event.find('span', class_='sc-fyofxi-5') else "N/A"
        start_date = event.find('div', class_='sc-1evs0j0-0').text if event.find('div', class_='sc-1evs0j0-0') else "N/A"
        end_date_element = event.find('span', class_='sc-1idcr5x-0 bihyed')
        if end_date_element:
            # Find the inner span that contains the detailed date
            detailed_span = end_date_element.find('span', class_='VisuallyHidden-sc-8buqks-0')
            if detailed_span:
                # Extract the date without the "Until" text
                end_date = detailed_span.find('span').text.strip() if detailed_span else "N/A"
            else:
                # Fallback to extracting the visible date part, if any
                visible_end_date = end_date_element.find('span', aria_hidden=True)
                end_date = visible_end_date.text.strip() if visible_end_date else "N/A"
        else:
            end_date = "N/A"
        location_elements = event.find_all('span', class_='sc-fyofxi-5')
        location = ', '.join([loc.text for loc in location_elements])

        events.append({
            "Event Name": event_name,
            "Start Date": start_date,
            "End Date": end_date,
            "Location": location
        })

    if not events:
        log.warning("No event data found on page.")

    return events

# Add page number to URL
def _add_url_parameter(url, **kwargs):
    """Add or replace GET parameters in a URL."""
    url_parts = list(urllib.parse.urlparse(url))
    query = dict(urllib.parse.parse_qsl(url_parts[4]))
    query.update(kwargs)
    url_parts[4] = urllib.parse.urlencode(query)
    return urllib.parse.urlunparse(url_parts)

async def scrape_search(url: str, max_results: int = 100) -> List[Dict]:
    log.info(f"Scraping Ticketmaster events: {url}")
    result_first_page = await SCRAPFLY.async_scrape(ScrapeConfig(url, **BASE_CONFIG))

    # Parse the first page
    events = parse_search_page(result_first_page)
    
    return events

async def run():
    BASE_CONFIG["cache"] = False
    url = "https://www.ticketmaster.com/search?sort=date&startDate=2024-11-06&endDate=2024-11-06"
    result_search = await scrape_search(url)
    
    # Convert results to DataFrame
    df = pd.DataFrame(result_search)
    print(df)

if __name__ == "__main__":
    asyncio.run(run())


In [None]:
# Load all pages and then scrape


# Initialize logging
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

def parse_event_page(result):
    """Parse event data from Ticketmaster page HTML."""
    # Assuming event data can be found by a similar structure as provided.
    events = re.findall(r'<div class="sc-fyofxi-0 MDVIb">(.*?)<a href="(.*?)" data-testid="event-list-link"', result, re.DOTALL)

    parsed_events = []
    for event in events:
        title_match = re.search(r'jXhNIe.*?>(.*?)</span>', event[0])
        location_match = re.search(r'jWLmQR.*?>(.*?)</span>', event[0])
        date_match = re.search(r'jifFsK.*?>(.*?)</span>', event[0])
        
        title = title_match.group(1) if title_match else "N/A"
        location = location_match.group(1) if location_match else "N/A"
        date = date_match.group(1) if date_match else "N/A"
        url = event[1]

        parsed_events.append({
            "title": title,
            "location": location,
            "date": date,
            "url": f"https://www.ticketmaster.com{url}"
        })

    return parsed_events

async def load_all_events(url: str, max_results: int = 1000) -> List[Dict]:
    log.info(f"scraping events: {url}")
    
    # Scrape the first page
    result_first_page = await SCRAPFLY.async_scrape(ScrapeConfig(url, **BASE_CONFIG))
    
    # Access content and parse the event data
    results = parse_event_page(result_first_page.content)
    
    # Keep clicking the "More Events" button to load all events
    loaded_events = len(results)
    page_num = 1

    while loaded_events < max_results:
        # Add an offset for each "More Events" page load
        next_url = _add_url_parameter(url, page=str(page_num))
        next_page = await SCRAPFLY.async_scrape(ScrapeConfig(next_url, **BASE_CONFIG))
        
        if isinstance(next_page, ScrapflyScrapeError):
            log.error(f"Failed to load page {page_num}: {next_page.message}")
            break

        # Access content and parse the event data
        new_events = parse_event_page(next_page.content)
        results.extend(new_events)

        # Update loaded count and page number
        loaded_events += len(new_events)
        page_num += 1

        log.info(f"Loaded {loaded_events} events so far")

        # Break if "More Events" button is gone or max results reached
        if not new_events or loaded_events >= max_results:
            break

        # Pause briefly to mimic human-like interaction
        time.sleep(1)

    return results


async def run_scraping():
    BASE_CONFIG["cache"] = False
    print("Starting Ticketmaster scrape...")

    # URL for events on November 8, 2024
    base_url = "https://www.ticketmaster.com/search?sort=date&startDate=2024-11-08&endDate=2024-11-08"

    # Load all events
    scraped_events = await load_all_events(base_url)

    # Save to a DataFrame and CSV
    df = pd.DataFrame(scraped_events)
    print(df)

    print("Scraping complete.")

# Run the scraping function
await run_scraping()


In [None]:
# Save to CSV if needed

df.to_csv('ticketmaster_events.csv', index=False)
print("Data saved to 'ticketmaster_events.csv'")