In [2]:
# 1 Importing Dependencies
 
import pandas as pd
import requests
import json
import sys
from pymongo import MongoClient
from pprint import pprint
from sqlalchemy import create_engine
sys.path.append('/Users/tokar/UNC_DA/MY_WORK/Personal_Code')
from my_api_config import ticket_master_key
import time

In [None]:
# 2 Function to remove unnecessary keys from the JSON responses
def remove_keys(response):
    if '_embedded' in response:  # Check if '_embedded' key exists
        # Defining keys to remove in each of their various sections
        keys_to_remove_events = ["test", "locale", "images", "sales", "pleaseNote","info","outlets", "promoter", "promoters", "products", "seatmap", "accessibility", "ticketLimit", "ageRestrictions", "ticketing", "_links"]
        keys_to_remove_dates = ["timezone", "spanMultipleDays", "status"]
        keys_to_remove_dates_start = ["dateTBD", "dateTBA", "timeTBA", "noSpecificTime"]
        keys_to_remove_embedded_venues = ["test", "url", "locale", "aliases", "images", "dmas", "generalInfo", "upcomingEvents", "_links", "social", "boxOfficeInfo", "parkingDetail", "accessibleSeatingDetail"]
        keys_to_remove_embedded_attractions = ["test", "locale", "externalLinks", "aliases", "images", "upcomingEvents", "_links"]
        keys_to_remove_top = ["_links", "page"]

        # Remove top-level keys
        for key in keys_to_remove_top:
            if key in response:
                del response[key]

        # Remove keys from each event
        for event in response.get("_embedded", {}).get("events", []):
            # Remove keys from "events" section
            for key in keys_to_remove_events:
                if key in event:
                    del event[key]
            # Remove keys from "dates" section
            if 'dates' in event:
                for key in keys_to_remove_dates:
                    if key in event['dates']:
                        del event['dates'][key]
                # Remove keys from "start" section within "dates"
                if 'start' in event['dates']:
                    for key in keys_to_remove_dates_start:
                        if key in event['dates']['start']:
                            del event['dates']['start'][key]
            # Remove keys from "venues" section
            if '_embedded' in event:
                for venue in event['_embedded'].get('venues', []):
                    for key in keys_to_remove_embedded_venues:
                        if key in venue:
                            del venue[key]
            # Remove keys from "attractions" section
            if '_embedded' in event and 'attractions' in event['_embedded']:
                for attraction in event['_embedded']['attractions']:
                    for key in keys_to_remove_embedded_attractions:
                        if key in attraction:
                            del attraction[key]

In [None]:
# 3 Loop for making API Calls

# Establishing MongoDB Connection
mongo = MongoClient("mongodb://localhost:27017/")
db = mongo["ticket_master_db"]
events_collection = db["events"]

# if "events" in db.list_collection_names():
#     db.drop_collection("events")
#     print("Existing 'attractions' collection dropped.")

# Iterating through API calls
for marketID in range(0, 126):
    for page_number in range(0, 5):  
    
        # Establishing the URL of the API call
        url_base = "https://app.ticketmaster.com/discovery/v2/events.json?" 
        url_source = "&source=ticketmaster" 
        url_market = "&marketId="+str(marketID)
        url_date_start = "&startDateTime=2024-06-22T00:00:00Z"
        url_date_end = "&endDateTime=2024-10-01T00:00:00Z"
        url_size = "&size=200"
        url_page = "&page=" + str(page_number)
        url_sort = "&sort=date,asc"
        url_country = "&countryCode=US"
        url_segment = "&segmentName=Music"
                
        query_url = url_base + "apikey=" + ticket_master_key + url_source + "&locale=*" + url_market + url_date_start + url_date_end + url_size + url_page + url_sort + url_country + url_segment
        
        # Storing API call response into JSON
        response = requests.get(query_url).json()
        
        # Check if '_embedded' key exists
        if '_embedded' in response:
            # Running function to remove unnecessary fields
            remove_keys(response)
            
            # Extract the events array if present
            events = response["_embedded"].get("events", [])
            
            # Insert each event into the MongoDB collection
            for event in events:
                events_collection.insert_one(event)

In [None]:
# 4 This segment of code removes any duplicates in the events collection

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
collection = db["events"]

# Create a list to store unique IDs
unique_ids = []

# Iterate through documents in the collection
for doc in collection.find():
    # Get the ID of the current document
    current_id = doc["id"]
    # Check if the ID is already in the list
    if current_id in unique_ids:
        # If the ID is a duplicate, delete the document
        collection.delete_one({"_id": doc["_id"]})
        print(f"Deleted duplicate document with ID: {current_id}")
    else:
        # If the ID is not a duplicate, add it to the list
        unique_ids.append(current_id)

print("Duplicate removal process completed.")

In [None]:
# 5 Creates the Attractions collection

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
events_collection = db["events"]

# Create a new collection for attractions
attractions_collection = db["attractions"]

if "attractions" in db.list_collection_names():
    db.drop_collection("attractions")
    print("Existing 'attractions' collection dropped.")

# Iterate over each document in the events collection
for event in events_collection.find({}):
    # Check if "_embedded" and "attractions" keys exist
    if "_embedded" in event and "attractions" in event["_embedded"]:
        
        for attraction in event["_embedded"]["attractions"]:
            try:
                # Extract relevant information
                all_attractions = [attraction["name"] for attraction in event["_embedded"]["attractions"] if attraction["name"] != event["name"]]
                venue_name = event["_embedded"]["venues"][0]["name"] if event["_embedded"]["venues"] else None
                venue_id = event["_embedded"]["venues"][0]["id"] if event["_embedded"]["venues"] else None
                venue_zipcode = event["_embedded"]["venues"][0]["postalCode"] if event["_embedded"]["venues"] else None
                venue_city = event["_embedded"]["venues"][0]["city"]["name"] if event["_embedded"]["venues"] else None
                venue_state = event["_embedded"]["venues"][0]["state"]["name"] if event["_embedded"]["venues"] else None
                venue_address = event["_embedded"]["venues"][0]["address"]["line1"] if event["_embedded"]["venues"] else None
                venue_zipcode = event["_embedded"]["venues"][0]["postalCode"] if event["_embedded"]["venues"] else None
                venue_country = event["_embedded"]["venues"][0]["country"]["name"] if event["_embedded"]["venues"] else None
                venue_location = event["_embedded"]["venues"][0]["location"] if event["_embedded"]["venues"][0] else None
                
                venue_longitude = float(venue_location['longitude']) if venue_location and 'longitude' in venue_location else None
                venue_latitude = float(venue_location['latitude']) if venue_location and 'latitude' in venue_location else None
                
                price_ranges = event["priceRanges"] if "priceRanges" in event else []
                max_price = event["priceRanges"][0]["max"] if "priceRanges" in event and "max" in event["priceRanges"][0] else None
                min_price = event["priceRanges"][0]["min"] if "priceRanges" in event and "min" in event["priceRanges"][0] else None
                
                start_date = event["dates"]["start"]["localDate"] if "dates" in event and "start" in event["dates"] else None
                start_time = event["dates"]["start"].get("localTime", None) if "dates" in event and "start" in event["dates"] else None
                
                attraction_segment = event["_embedded"]["attractions"][0]["classifications"][0]["segment"]["name"] if event["_embedded"]["attractions"] else None
                attraction_segment_id = event["_embedded"]["attractions"][0]["classifications"][0]["segment"]["id"] if event["_embedded"]["attractions"] else None
                attraction_genre = event["_embedded"]["attractions"][0]["classifications"][0]["genre"]["name"] if event["_embedded"]["attractions"] else None
                attraction_subGenre = event["_embedded"]["attractions"][0]["classifications"][0]["subGenre"]["name"] if event["_embedded"]["attractions"] else None

                # Create attraction document
                attraction_info = {
                    "attraction_name": attraction["name"],
                    "full_lineup": all_attractions,
                    "segment": attraction_segment,
                    "segment_id": attraction_segment_id,
                    "genre": attraction_genre,
                    "sub_genre": attraction_subGenre,
                    "attraction_url": attraction["url"],
                    "attraction_id": attraction["id"],
                    "event_name": event["name"],
                    "event_url": event["url"],
                    "event_id": event["id"],
                    "venue_name": venue_name,
                    "venue_id": venue_id,
                    "venue_city": venue_city,
                    "venue_state": venue_state,
                    "venue_country": venue_country,
                    "venue_longitude": venue_longitude,
                    "venue_latitude": venue_latitude,
                    #"price_ranges": price_ranges,
                    "max_price": max_price,
                    "min_price": min_price,
                    "start_date": start_date,
                    "start_time": start_time
                    
                }

                # Insert the attraction document into the new collection
                attractions_collection.insert_one(attraction_info)
                
                
            except Exception as e:
                print(f"Error inserting document: {e}")
                # If an error occurs during insertion, delete the document
                attractions_collection.delete_one(attraction_info)
                print("Document deleted.")

print("Attractions data copied to the new collection.")

In [None]:
# 6 This segment of code removes any duplicates in the attractions collection

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
collection = db["attractions"]

# Create an empty set to store unique combinations of event_id and attraction_id
unique_combinations = set()

# Iterate through each document in the collection
for document in collection.find():
    # Extract event_id and attraction_id from the document
    event_id = document["event_id"]
    attraction_id = document["attraction_id"]
    
    # Check if the combination of event_id and attraction_id is unique
    if (event_id, attraction_id) not in unique_combinations:
        # If unique, add it to the set and keep the document
        unique_combinations.add((event_id, attraction_id))
    else:
        # If not unique, remove the document from the collection
        collection.delete_one({"_id": document["_id"]})

In [7]:
# 7 This code creates the Tour_Run collection

client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
tour = db['attractions']

if "tour_run" in db.list_collection_names():
    db.drop_collection("tour_run")
    print("Existing 'tour_run' collection dropped.")

pipeline = [
    {
        '$group': {
            '_id': {'attraction_name': '$attraction_name', 'start_date': '$start_date'},
            'venues': {
                '$addToSet': {
                    'name': '$venue_name',
                    'city': '$venue_city',
                    'state': '$venue_state',
                    'country': '$venue_country',
                    'longitude': '$venue_longitude',
                    'latitude': '$venue_latitude'
                }
            }
        }
    },
    {
        '$group': {
            '_id': '$_id.attraction_name',
            'dates': {
                '$push': {
                    'start_date': '$_id.start_date',
                    'venues': '$venues'
                }
            }
        }
    },
    {
        '$unwind': '$dates'  # Unwind the dates array
    },
    {
        '$sort': {'dates.start_date': 1}  # Sort dates in descending order by start_date
    },
    {
        '$group': {
            '_id': '$_id',
            'attraction_name': {'$first': '$_id'},  # Preserve attraction_name
            'dates': {'$push': '$dates'}  # Reconstruct the dates array
        }
    },
    {
        '$project': {
            '_id': 0,
            'attraction_name': 1,
            'dates': 1
        }
    },
    {
        '$out': 'tour_run'
    }
]

# Execute the aggregation pipeline
tour.aggregate(pipeline)

# Retrieve and sort the results by attraction_name
result = db["tour_run"].find().sort("attraction_name", 1)

Existing 'tour_run' collection dropped.


In [2]:
# 8 This creates a json file of the events collection https://stackoverflow.com/questions/49153020/how-to-dump-a-collection-to-json-file-using-pymongo

# Establishing MongoDB Connection
client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
events_collection = db["events"]

# Querying events collection in ticket master database
data = list(events_collection.find({}))

# Converting ObjectId to string
for item in data:
    item["_id"] = str(item["_id"])

# Dumpins the data into JSON format
ticketmaster_data = json.dumps(data, indent=4)

# Writing the JSON data to a file
with open("ticketmaster_events.json", "w") as file:
    file.write(ticketmaster_data)


print("Data exported to ticketmaster_events.json")

Data exported to ticket_master.json


In [8]:
# 9 This code creates a JSON file of the attractions collection

client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
attractions_collection = db["attractions"]

# Query the data you want to export
data = list(attractions_collection.find({}))

# Convert ObjectId to string
for item in data:
    item["_id"] = str(item["_id"])

# Serialize the data into JSON format
ticketmaster_attractions_data = json.dumps(data, indent=4)

# Write the JSON data to a file
with open("json_files/ticketmaster_attractions.json", "w") as file:
    file.write(ticketmaster_attractions_data)

print("Data exported to ticketmaster_attractions.json")

Data exported to ticketmaster_attractions.json


In [9]:
# 10 This code creates a JSON file of the tour_run collection

client = MongoClient("mongodb://localhost:27017/")
db = client["ticket_master_db"]
tour_run_collection = db["tour_run"]

# Query the data you want to export
data = list(tour_run_collection.find({}))

# Convert ObjectId to string
for item in data:
    item["_id"] = str(item["_id"])

# Serialize the data into JSON format
ticketmaster_tour_run_data = json.dumps(data, indent=4)

# Write the JSON data to a file
with open("json_files/ticketmaster_tour_run.json", "w") as file:
    file.write(ticketmaster_tour_run_data)
    
print("Data exported to ticketmaster_tour_run.json")

Data exported to ticketmaster_tour_run.json


In [17]:
# As Needed

# Use this code to split large json file into smaller parts so they can be uploaded

from os.path import splitext

# Parameters to pass into function

input_file = 'ticketmaster_events.json'
output_prefix = 'part'
num_parts = 2  


# defining function to read in JSON and then split into specific parts
def split_large_json(input_file, output_prefix, num_parts):
    with open(input_file, 'r') as f:
        data = json.load(f)
    
    total_items = len(data)
    items_per_part = total_items // num_parts
    
    rootname = splitext(input_file)[0]
    
    for i in range(num_parts):
        start_index = i * items_per_part
        end_index = (i + 1) * items_per_part if i < num_parts - 1 else total_items
        
        output_file = f"json_files/{rootname}_{output_prefix}_{i+1}.json"
        with open(output_file, 'w') as f:
            json.dump(data[start_index:end_index], f, indent=2)


split_large_json(input_file, output_prefix, num_parts)

In [None]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient("mongodb://localhost:27017/")
db = client['ticket_master_db']
tour_run_collection = db['tour_run']

# Query the collection to get unique attraction names
attraction_names = tour_run_collection.distinct("attraction_name")

# Print the unique attraction names
for attraction_name in attraction_names:
    print(attraction_name)