In [2]:
## Importing Dependencies
 
import pandas as pd
import requests
import json
import sys
from pymongo import MongoClient
from pprint import pprint
from sqlalchemy import create_engine
sys.path.append('/Users/tokar/UNC_DA/MY_WORK/Personal_Code')
from my_api_config import ticket_master_key
import time

In [2]:
# Function to remove unnecessary keys from the JSON responses
def remove_keys(response):
    if '_embedded' in response:  # Check if '_embedded' key exists
        # Defining keys to remove in each of their various sections
        keys_to_remove_events = ["test", "locale", "images", "sales", "pleaseNote","info","outlets", "promoter", "promoters", "products", "seatmap", "accessibility", "ticketLimit", "ageRestrictions", "ticketing", "_links"]
        keys_to_remove_dates = ["timezone", "spanMultipleDays", "status"]
        keys_to_remove_dates_start = ["dateTBD", "dateTBA", "timeTBA", "noSpecificTime"]
        keys_to_remove_embedded_venues = ["test", "url", "locale", "aliases", "images", "dmas", "generalInfo", "upcomingEvents", "_links", "social", "boxOfficeInfo", "parkingDetail", "accessibleSeatingDetail"]
        keys_to_remove_embedded_attractions = ["test", "locale", "externalLinks", "aliases", "images", "upcomingEvents", "_links"]
        keys_to_remove_top = ["_links", "page"]

        # Remove top-level keys
        for key in keys_to_remove_top:
            if key in response:
                del response[key]

        # Remove keys from each event
        for event in response.get("_embedded", {}).get("events", []):
            # Remove keys from "events" section
            for key in keys_to_remove_events:
                if key in event:
                    del event[key]
            # Remove keys from "dates" section
            if 'dates' in event:
                for key in keys_to_remove_dates:
                    if key in event['dates']:
                        del event['dates'][key]
                # Remove keys from "start" section within "dates"
                if 'start' in event['dates']:
                    for key in keys_to_remove_dates_start:
                        if key in event['dates']['start']:
                            del event['dates']['start'][key]
            # Remove keys from "venues" section
            if '_embedded' in event:
                for venue in event['_embedded'].get('venues', []):
                    for key in keys_to_remove_embedded_venues:
                        if key in venue:
                            del venue[key]
            # Remove keys from "attractions" section
            if '_embedded' in event and 'attractions' in event['_embedded']:
                for attraction in event['_embedded']['attractions']:
                    for key in keys_to_remove_embedded_attractions:
                        if key in attraction:
                            del attraction[key]

In [3]:

# Loop for making 1000 API Calls

# Establishing MongoDB Connection
mongo = MongoClient("mongodb://localhost:27017/")
db = mongo["ticket_master_db"]
events_collection = db["events"]

# Iterating through 1000 API calls
for page_number in range(0, 1000):  # Adjust the range as needed
    
    # Establishing the URL of the API call
    url_base = "https://app.ticketmaster.com/discovery/v2/events.json?"
    url_country = "&countryCode=US"
    url_source = "&source=ticketmaster"
    url_segment = "&segmentName=Music"
    url_size = "&size=200"
    url_date_start = "&startDateTime=2024-04-05T00:00:00Z"
    url_date_end = "&endDateTime=2024-10-01T00:00:00Z"
    url_sort = "&sort=name,date,desc"
    url_page = "&page=" + str(page_number)
    query_url = url_base + "apikey=" + ticket_master_key + url_source + url_date_start + url_date_end + url_country + url_segment + url_size + url_page + url_sort
    
    # Storing API call response into JSON
    response = requests.get(query_url).json()
    
    # Check if '_embedded' key exists
    if '_embedded' in response:
        # Running function to remove unnecessary fields
        remove_keys(response)
        
        # Extract the events array if present
        events = response["_embedded"].get("events", [])
        
        # Insert each event into the MongoDB collection
        for event in events:
            events_collection.insert_one(event)
    
    time.sleep(1)  # Add a delay if needed

In [4]:
client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
events_collection = db["events"]

# Query the data you want to export
data = list(events_collection.find({}))

# Convert ObjectId to string
for item in data:
    item["_id"] = str(item["_id"])

# Serialize the data into JSON format
ticketmaster_data = json.dumps(data, indent=4)

# Write the JSON data to a file
with open("ticket_master.json", "w") as file:
    file.write(ticketmaster_data)

print("Data exported to ticket_master.json")

Data exported to ticket_master.json


In [3]:
# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
events_collection = db["events"]

# Create a new collection for attractions
attractions_collection = db["attractions"]

if "attractions" in db.list_collection_names():
    db.drop_collection("attractions")
    print("Existing 'attractions' collection dropped.")

# Iterate over each document in the events collection
for event in events_collection.find({}):
    # Check if "_embedded" and "attractions" keys exist
    if "_embedded" in event and "attractions" in event["_embedded"]:
        
        for attraction in event["_embedded"]["attractions"]:
            # Extract relevant information
            all_attractions = [attraction["name"] for attraction in event["_embedded"]["attractions"] if attraction["name"] != event["name"]]
            venue_name = event["_embedded"]["venues"][0]["name"] if event["_embedded"]["venues"] else None
            venue_id = event["_embedded"]["venues"][0]["id"] if event["_embedded"]["venues"] else None
            venue_zipcode = event["_embedded"]["venues"][0]["postalCode"] if event["_embedded"]["venues"] else None
            venue_city = event["_embedded"]["venues"][0]["city"]["name"] if event["_embedded"]["venues"] else None
            venue_state = event["_embedded"]["venues"][0]["state"]["name"] if event["_embedded"]["venues"] else None
            venue_address = event["_embedded"]["venues"][0]["address"]["line1"] if event["_embedded"]["venues"] else None
            venue_zipcode = event["_embedded"]["venues"][0]["postalCode"] if event["_embedded"]["venues"] else None
            venue_country = event["_embedded"]["venues"][0]["country"]["name"] if event["_embedded"]["venues"] else None
            venue_location = event["_embedded"]["venues"][0]["location"] if event["_embedded"]["venues"] else None
            # venue_longitude = event["_embedded"]["venues"][0]["location"]['longitude'] if event["_embedded"]["venues"] else None
            # venue_latitude = event["_embedded"]["venues"][0]["location"]['latitude'] if event["_embedded"]["venues"] else None
            
            venue_longitude = float(venue_location['longitude']) if venue_location and 'longitude' in venue_location else None
            venue_latitude = float(venue_location['latitude']) if venue_location and 'latitude' in venue_location else None
            
            price_ranges = event["priceRanges"] if "priceRanges" in event else []
            max_price = event["priceRanges"][0]["max"] if "priceRanges" in event and "max" in event["priceRanges"][0] else None
            min_price = event["priceRanges"][0]["min"] if "priceRanges" in event and "min" in event["priceRanges"][0] else None
            
            start_date = event["dates"]["start"]["localDate"] if "dates" in event and "start" in event["dates"] else None
            start_time = event["dates"]["start"].get("localTime", None) if "dates" in event and "start" in event["dates"] else None
            
            attraction_segment = event["_embedded"]["attractions"][0]["classifications"][0]["segment"]["name"] if event["_embedded"]["attractions"] else None
            attraction_segment_id = event["_embedded"]["attractions"][0]["classifications"][0]["segment"]["id"] if event["_embedded"]["attractions"] else None
            attraction_genre = event["_embedded"]["attractions"][0]["classifications"][0]["genre"]["name"] if event["_embedded"]["attractions"] else None
            attraction_subGenre = event["_embedded"]["attractions"][0]["classifications"][0]["subGenre"]["name"] if event["_embedded"]["attractions"] else None

            # Create attraction document
            attraction_info = {
                "attraction_name": attraction["name"],
                "full_lineup": all_attractions,
                "segment": attraction_segment,
                "segment_id": attraction_segment_id,
                "genre": attraction_genre,
                "sub_genre": attraction_subGenre,
                "attraction_url": attraction["url"],
                "attraction_id": attraction["id"],
                "event_name": event["name"],
                "event_url": event["url"],
                "event_id": event["id"],
                "venue_name": venue_name,
                "venue_id": venue_id,
                "venue_city": venue_city,
                "venue_state": venue_state,
                "venue_country": venue_country,
                "venue_longitude": venue_longitude,
                "venue_latitude": venue_latitude,
                #"price_ranges": price_ranges,
                "max_price": max_price,
                "min_price": min_price,
                "start_date": start_date,
                "start_time": start_time
                
            }

            # Insert the attraction document into the new collection
            attractions_collection.insert_one(attraction_info)

print("Attractions data copied to the new collection.")


Existing 'attractions' collection dropped.
Attractions data copied to the new collection.


In [5]:
## This code creates a JSON file of the attractions collection

client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
attractions_collection = db["attractions"]

# Query the data you want to export
data = list(attractions_collection.find({}))

# Convert ObjectId to string
for item in data:
    item["_id"] = str(item["_id"])

# Serialize the data into JSON format
ticketmaster_attractions_data = json.dumps(data, indent=4)

# Write the JSON data to a file
with open("ticket_master_attractions_data.json", "w") as file:
    file.write(ticketmaster_attractions_data)

print("ticket_master_attractions_data.json")

ticket_master_attractions_data.json


In [8]:
## This code creates a MongoDB Collection for the attractions database

client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
attractions_collection_copy = db["attractions_copy"]

# Insert JSON data into the collection
with open("ticket_master_attractions_data.json", "r") as file:
    json_data = json.load(file)

# Insert JSON data into the collection
attractions_collection_copy.insert_many(json_data)


InsertManyResult(['660f39f0700d7cacc3f8b5d3', '660f39f0700d7cacc3f8b5d4', '660f39f0700d7cacc3f8b5d5', '660f39f0700d7cacc3f8b5d6', '660f39f0700d7cacc3f8b5d7', '660f39f0700d7cacc3f8b5d8', '660f39f0700d7cacc3f8b5d9', '660f39f0700d7cacc3f8b5da', '660f39f0700d7cacc3f8b5db', '660f39f0700d7cacc3f8b5dc', '660f39f0700d7cacc3f8b5dd', '660f39f0700d7cacc3f8b5de', '660f39f0700d7cacc3f8b5df', '660f39f0700d7cacc3f8b5e0', '660f39f0700d7cacc3f8b5e1', '660f39f0700d7cacc3f8b5e2', '660f39f0700d7cacc3f8b5e3', '660f39f0700d7cacc3f8b5e4', '660f39f0700d7cacc3f8b5e5', '660f39f0700d7cacc3f8b5e6', '660f39f0700d7cacc3f8b5e7', '660f39f0700d7cacc3f8b5e8', '660f39f0700d7cacc3f8b5e9', '660f39f0700d7cacc3f8b5ea', '660f39f0700d7cacc3f8b5eb', '660f39f0700d7cacc3f8b5ec', '660f39f0700d7cacc3f8b5ed', '660f39f0700d7cacc3f8b5ee', '660f39f0700d7cacc3f8b5ef', '660f39f0700d7cacc3f8b5f0', '660f39f0700d7cacc3f8b5f1', '660f39f0700d7cacc3f8b5f2', '660f39f0700d7cacc3f8b5f3', '660f39f0700d7cacc3f8b5f4', '660f39f0700d7cacc3f8b5f5', '6

In [28]:

client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
tour = db['attractions_copy']

if "tour_run" in db.list_collection_names():
    db.drop_collection("tour_run")
    print("Existing 'attractions' collection dropped.")

pipeline = [
    {
        '$group': {
            '_id': {'attraction_name': '$attraction_name', 'start_date': '$start_date'},
            'venues': {
                '$addToSet': {
                    'name': '$venue_name',
                    'city': '$venue_city',
                    'state': '$venue_state',
                    'country': '$venue_country',
                    'longitude': '$venue_longitude',
                    'latitude': '$venue_latitude'
                }
            }
        }
    },
    {
        '$group': {
            '_id': '$_id.attraction_name',
            'dates': {
                '$push': {
                    'start_date': '$_id.start_date',
                    'venues': '$venues'
                }
            }
        }
    },
    {
        '$project': {
            '_id': 0,
            'attraction_name': '$_id',
            'dates': 1
        }
    },
    {
        '$out': 'tour_run'  # Specify the new collection name
    }
]


# Execute the aggregation pipeline
tour.aggregate(pipeline)


db["tour_run"].find().sort("attraction_name", 1)

Existing 'attractions' collection dropped.


<pymongo.cursor.Cursor at 0x199b6c77040>