##### This notebook loads the json file and parses through to get the required information 
##### It contains events related to sports and is saved to TicketMaster_sports.csv

In [1]:
import pandas as pd
import json

In [2]:
#load json files in the notebook
with open('Events_all_sports.json','r') as f:
    data=json.load(f)


## Preprocessing
#### Get only relevent columns from the json file

In [22]:
# These are the columns that I want to keep
'''
name
sales-end date to purchase
dates-start date,start time,time zone
pleaseNote-info
ticketLimit
ageRestrictions-legal age enforced
_embedded- name,address,city,state,(location-latitude,longitude),address,parking details,accessibility information
'''
for event_name in data[:2]:
    print(event_name['name'])


Calgary Flames vs. Anaheim Ducks
Toronto Raptors vs. Los Angeles Lakers


In [39]:
# create empty lists for all columns required

Events = []
Classifications=[]
Event_dates=[]
Event_start_times=[]
Event_time_zone=[]
Purchase_end_dates = []
Venues=[]
Cities = []
States=[]
Address_=[]
Latitude_=[]
Longitude_=[]
Please_note_=[]
Ticket_limits=[]
Age_restrictions=[]
Parking_=[]
Accesibility_=[]

In [40]:
# create loops to retrieve all the details required from json file

# for loop will iterate through each event and grab the data required
# iterates over 10 rows, change when you're done prototyping
for event_name in data:
    
 # access indivisual information 
    Event = event_name['name']
    Classification=event_name['classifications'][0]['segment']['name']
    Event_date=event_name['dates']['start']['localDate']
    Event_start_time=event_name['dates']['start'].get('localTime',"N/A")
    if 'dates' in event_name and 'timezone' in event_name['dates']:
        Event_timezone = event_name['dates']['timezone']
    else:
        Event_timezone = 'Unknown'
    Purchase_end_date = event_name['sales']['public'].get('endDateTime','N/A')
    Venue=event_name['_embedded']['venues'][0]['name']
    City = event_name['_embedded']['venues'][0]['city']['name']
    State=event_name['_embedded']['venues'][0]['state']['name']
    Address=event_name['_embedded']['venues'][0]['address']['line1']
    Latitude=event_name['_embedded']['venues'][0]['location']['latitude']
    Longitude=event_name['_embedded']['venues'][0]['location']['longitude']

    # Handling optional keys in case the data is not aailable in the json, provide optional data
    Please_note = event_name.get('pleaseNote', 'N/A')
    if 'ticketLimit' in event_name:
        Ticket_limit = event_name['ticketLimit'].get('info', 'N/A')
    else:
        Ticket_limit = 'N/A'
    
    if 'ageRestrictions' in event_name:
        Age_restriction = event_name['ageRestrictions'].get('legalAgeEnforced', 'N/A')
    else:
        Age_restriction = 'N/A'
        
    Parking=event_name['_embedded']['venues'][0].get('parkingDetail',"N/A")

    venue_info = event_name['_embedded']['venues'][0]
    if 'accessibleSeatingDetail' in venue_info:
        Accesibility = venue_info['accessibleSeatingDetail']
    else:
        Accesibility = 'N/A'

    # append all the details to the empty lists
    
    Events.append(Event)
    Classifications.append(Classification)
    Event_dates.append(Event_date)
    Event_start_times.append(Event_start_time)
    Event_time_zone.append(Event_timezone)
    Purchase_end_dates.append(Purchase_end_date)
    Venues.append(Venue)
    Cities.append(City)
    States.append(State)
    Address_.append(Address)
    Latitude_.append(Latitude)
    Longitude_.append(Longitude)
    Please_note_.append(Please_note)
    Ticket_limits.append(Ticket_limit)
    Age_restrictions.append(Age_restriction)
    Parking_.append(Parking)
    Accesibility_.append(Accesibility)


    
print(len(Address_)) #to verify the loop 


806


In [41]:
# Create a data dictionary 

data_dict={'Event_name':Events,
           'Event_type':Classifications,
           'Event_dates': Event_dates,
           'Event_start_times':Event_start_times,
           'Event_time_zone':Event_time_zone,
           'Purchase_end_dates':Purchase_end_dates,
           'Venues':Venues,
           'City':Cities,
           'States':States,
           'Address':Address_,
           'Latitude':Latitude_,
           'Longitude':Longitude_,
           'Additional_info':Please_note_,
           'Ticket_limits':Ticket_limits,
           'Age_restrictions':Age_restrictions,
           'Parking':Parking_,
           'Accesibility':Accesibility_
        }

In [42]:
# Check lengths of all lists or arrays in the data_dict
lengths = {key: len(value) for key, value in data_dict.items()}
print(lengths)


{'Event_name': 806, 'Event_type': 806, 'Event_dates': 806, 'Event_start_times': 806, 'Event_time_zone': 806, 'Purchase_end_dates': 806, 'Venues': 806, 'City': 806, 'States': 806, 'Address': 806, 'Latitude': 806, 'Longitude': 806, 'Additional_info': 806, 'Ticket_limits': 806, 'Age_restrictions': 806, 'Parking': 806, 'Accesibility': 806}


In [43]:
# Create Dataframe

sports_df=pd.DataFrame(data=data_dict) # use the data dictionary to create data frame 
sports_df.head()


Unnamed: 0,Event_name,Event_type,Event_dates,Event_start_times,Event_time_zone,Purchase_end_dates,Venues,City,States,Address,Latitude,Longitude,Additional_info,Ticket_limits,Age_restrictions,Parking,Accesibility
0,Calgary Flames vs. Anaheim Ducks,Sports,2024-04-02,19:00:00,America/Denver,2024-04-03T02:00:00Z,Scotiabank Saddledome,Calgary,Alberta,555 Saddledome Rise SE,51.0385626,-114.0524879,Please note: Restaurant level seating does not...,There is a ticket limit of 4 per household.,False,There is limited pay parking on the grounds. I...,Accessible Seating: Wheelchair areas are: Sect...
1,Toronto Raptors vs. Los Angeles Lakers,Sports,2024-04-02,19:00:00,America/New_York,2024-04-03T00:00:00Z,Scotiabank Arena,Toronto,Ontario,"50 Bay Street, Suite 500",43.6439254,-79.3783234,Fans in attendance agree to abide by the healt...,There is an overall 9 ticket limit for this ev...,False,"Public parking is available on Queens Quay, Ba...",Scotiabank Arena is fully accessible through a...
2,Toronto Raptors vs. Washington Wizards,Sports,2024-04-07,18:00:00,America/New_York,2024-04-07T23:00:00Z,Scotiabank Arena,Toronto,Ontario,"50 Bay Street, Suite 500",43.6439254,-79.3783234,Fans in attendance agree to abide by the healt...,There is an overall 9 ticket limit for this ev...,False,"Public parking is available on Queens Quay, Ba...",Scotiabank Arena is fully accessible through a...
3,Toronto Raptors vs. Indiana Pacers,Sports,2024-04-09,19:00:00,America/New_York,2024-04-10T00:00:00Z,Scotiabank Arena,Toronto,Ontario,"50 Bay Street, Suite 500",43.6439254,-79.3783234,Fans in attendance agree to abide by the healt...,There is an overall 9 ticket limit for this ev...,False,"Public parking is available on Queens Quay, Ba...",Scotiabank Arena is fully accessible through a...
4,Vancouver Canucks vs. Arizona Coyotes,Sports,2024-04-10,19:30:00,America/Vancouver,2024-04-11T04:30:00Z,Rogers Arena,Vancouver,British Columbia,800 Griffiths Way,49.2776612,-123.1081074,,There is an 8 ticket limit for this event,False,Underground parking (800 Griffiths Way) is ava...,Accessible platform seating is available on va...


In [47]:
sports_df.drop_duplicates().shape

(806, 17)

In [48]:
# to store the data frame as CSV

sports_df.to_csv('TicketMaster_sports.csv',index=False)
