##### This notebook loads the json file and parses through to get the required information 
##### It contains events related to music and is saved to TicketMaster_music.csv

In [1]:
import pandas as pd
import json


In [2]:
#load multiple json files in the notebook
with open('Events_music.json', 'r') as f1:
    data1=json.load(f1)

with open('Events_999.json', 'r') as f2:
    data2=json.load(f2)

with open('Events_99.json', 'r') as f3:
    data3=json.load(f3)

with open('Events_49.json', 'r') as f4:
    data4=json.load(f4)




In [68]:
# data required from the json files
'''
name
sales-end date to purchase
dates-start date,start time,time zone
pleaseNote-info
ticketLimit
ageRestrictions-legal age enforced
_embedded- name,address,city,state,(location-latitude,longitude)address,parking details,accessibility information
'''
data4[4].keys()#['_embedded']['venues'][0]#['address']['line1']


dict_keys(['name', 'type', 'id', 'test', 'url', 'locale', 'images', 'sales', 'dates', 'classifications', 'promoter', 'promoters', 'info', 'pleaseNote', 'priceRanges', 'seatmap', 'accessibility', 'ticketLimit', 'ageRestrictions', 'ticketing', '_links', '_embedded'])

In [41]:
# create empty lists for all columns required

Events = []
Classifications=[]
Event_dates=[]
Event_start_times=[]
Event_time_zone=[]
Purchase_end_dates = []
Venues=[]
Cities = []
States=[]
Address_=[]
Latitude_=[]
Longitude_=[]
Please_note_=[]
Ticket_limits=[]
Age_restrictions=[]
Parking_=[]
Accesibility_=[]

In [53]:
# create loops to retrieve all the details required from json file

# for loop will iterate through each list containing objects 
# iterates over 10 rows, change when you're done prototyping
for event_name in data4:
    
     # access indivisual information 
        Event = event_name['name']
        if 'classifications' in event_name and event_name['classifications']:
        # Check if 'genre' key exists in the first element of 'classifications'
            if 'genre' in event_name['classifications'][0]:
                Classification = event_name['classifications'][0]['genre'].get('name', 'N/A')
            else:
                Classification = 'N/A'
        else:
            Classification = 'N/A'
        Event_date=event_name['dates']['start']['localDate']
        Event_start_time=event_name['dates']['start'].get('localTime',"N/A")
        if 'dates' in event_name and 'timezone' in event_name['dates']:
            Event_timezone = event_name['dates']['timezone']
        else:
            Event_timezone = 'Unknown'
        Purchase_end_date = event_name['sales']['public'].get('endDateTime','N/A')
        Venue=event_name['_embedded']['venues'][0]['name']
        City = event_name['_embedded']['venues'][0]['city']['name']
        Address=event_name['_embedded']['venues'][0]['address']['line1']
        State=event_name['_embedded']['venues'][0]['state']['name']
        
        venue_info = event_name['_embedded']['venues'][0]
        if 'location' in venue_info:
            Latitude = venue_info['location'].get('latitude', 'N/A')
            Longitude = venue_info['location'].get('longitude', 'N/A')
        else:
            Latitude = 'N/A'
            Longitude = 'N/A'

        # Handling optional keys in case the data is not available in the json, provide optional data
        Please_note = event_name.get('pleaseNote', 'N/A')
        
        if 'ticketLimit' in event_name:
            Ticket_limit = event_name['ticketLimit'].get('info', 'No ticket limit information available')
        else:
            Ticket_limit= 'No ticket limit information available'
        
        if 'ageRestrictions' in event_name:
            Age_restriction = event_name['ageRestrictions'].get('legalAgeEnforced', 'N/A')
        else:
            Age_restriction = 'N/A'
       
        if '_embedded' in event_name and 'venues' in event_name['_embedded'] and event_name['_embedded']['venues']:
            Parking = event_name['_embedded']['venues'][0].get('parkingDetail', 'N/A')
        else:
            Parking = 'Parking information not available'
        if '_embedded' in event_name and 'venues' in event_name['_embedded'] and event_name['_embedded']['venues']:
            Accesibility=event_name['_embedded']['venues'][0].get('accessibleSeatingDetail','N/A')
        else:
            Accesibility='N/A'

        # append all the details to the empty lists
    
        Events.append(Event)
        Classifications.append(Classification)
        Event_dates.append(Event_date)
        Event_start_times.append(Event_start_time)
        Event_time_zone.append(Event_timezone)
        Purchase_end_dates.append(Purchase_end_date)
        Venues.append(Venue)
        Cities.append(City)
        Address_.append(Address)
        States.append(State)
        Latitude_.append(Latitude)
        Longitude_.append(Longitude)
        Please_note_.append(Please_note)
        Ticket_limits.append(Ticket_limit)
        Age_restrictions.append(Age_restriction)
        Parking_.append(Parking)
        Accesibility_.append(Accesibility)



In [54]:
# Create a data dictionary 

data_dict={'Event_name':Events,
           'Event_type':Classifications,
           'Event_dates': Event_dates,
           'Event_start_times':Event_start_times,
           'Event_time_zone':Event_time_zone,
           'Purchase_end_dates':Purchase_end_dates,
           'Venues':Venues,
           'City':Cities,
           'States':States,
           'Address':Address_,
           'Latitude':Latitude_,
           'Longitude':Longitude_,
           'Additional_info':Please_note_,
           'Ticket_limits':Ticket_limits,
           'Age_restrictions':Age_restrictions,
           'Parking':Parking_,
           'Accesibility':Accesibility_
        }

In [55]:
# Check lengths of all lists or arrays in the data_dict
lengths = {key: len(value) for key, value in data_dict.items()}
print(lengths)

{'Event_name': 2900, 'Event_type': 2900, 'Event_dates': 2900, 'Event_start_times': 2900, 'Event_time_zone': 2900, 'Purchase_end_dates': 2900, 'Venues': 2900, 'City': 2900, 'States': 2900, 'Address': 2900, 'Latitude': 2900, 'Longitude': 2900, 'Additional_info': 2900, 'Ticket_limits': 2900, 'Age_restrictions': 2900, 'Parking': 2900, 'Accesibility': 2900}


In [62]:
# Create Dataframe

music_df=pd.DataFrame(data=data_dict) # use the data dictionary to create data frame 
music_df.shape

(2900, 17)

In [61]:
music_df.drop_duplicates().shape

(1000, 17)

In [63]:
# Create csv from Data frame
music_df.to_csv('TicketMaster_music.csv',index=False)