In [1]:
import pandas as pd
import numpy as np
import json
import requests
from fuzzywuzzy import process
from fuzzywuzzy import fuzz
from bs4 import BeautifulSoup as bs
import base64
import datetime

%matplotlib inline

# Ticketmaster Data

In [8]:
api_keys = pd.read_csv('api_keys.csv')
# git ignore csv for pushing to repo
# .ipynb_checkpoints
tm_key = api_keys.loc[api_keys['endpoint'] == 'ticketmaster_endpoint', 'key'].values[0]
tm_endpoint = 'https://app.ticketmaster.com/discovery/v2/events.json?'
tm_params = {'ver':2.0,'feedtype':'json','apikey':tm_key, 'size':199, 'classificationName':'Music'}

In [13]:
states = ['AK', 
          'AL', 
          'AR', 
          'AS', 
          'AZ', 
          'CA', 
          'CO', 
          'CT', 
          'DC', 
          'DE', 
          'FL', 
          'GA', 
          'HI', 
          'IA', 
          'ID', 
          'IL', 
          'IN', 
          'KS', 
          'KY', 
          'LA', 
          'MA', 
          'MD', 
          'ME', 
          'MI', 
          'MN', 
          'MO', 
          'MP', 
          'MS', 
          'MT', 
          'NC', 
          'ND', 
          'NE', 
          'NH', 
          'NJ', 
          'NM', 
          'NV', 
          'NY', 
          'OH', 
          'OK', 
          'OR', 
          'PA', 
          'RI', 
          'SC', 
          'SD', 
          'TN', 
          'TX', 
          'UT', 
          'VA', 
          'VT', 
          'WA', 
          'WI', 
          'WV', 
          'WY']

In [None]:
all_events = []
for state in (states):
    tm_params['stateCode'] = state
    for page in range(6):
        tm_params['page'] = str(page)
        response = requests.get(tm_endpoint,tm_params)
        res = response.json()
        all_events.extend(res.get('_embedded',{}).get('events',{}))

In [None]:
# taking all_events list and pulling out desired items via for loop, assigning to list "event info",
# then turning that into a dataframe 

event_info = []
for event in all_events:
     event_info.append({
        'TM_id': str(event.get('id',{})),
        'event_name' : str(event.get('name',{})),
        'artist': list(str(attraction.get('name')) for attraction in event['_embedded'].get('attractions',{})),
        'venue' : list(str(venue.get('name')) for venue in event['_embedded'].get('venues',{}))[0],
        'city' : list(str(venue.get('city',{}).get('name')) for venue in event['_embedded'].get('venues',{}))[0],
        'state' : list(str(venue.get('state',{}).get('stateCode')) for venue in event['_embedded'].get('venues',{}))[0],
        'date' : str(event.get('dates',{}).get('start',{}).get('dateTime',{})),
        'prices': event.get('priceRanges'),
        'genre' : event.get('classifications'),
     })
tmDF = pd.DataFrame(event_info)

In [None]:
tmDF

In [None]:
#removing brackets from given columns (no more list)
tmDF['artist'] = tmDF['artist'].str[0]
tmDF['prices'] = tmDF['prices'].str[0]
tmDF['genre'] = tmDF['genre'].str[0]

In [None]:
# "flattening" out "prices" and "genre", using "flatten" function to draw out individual elements from nested
# list/dictionary within column

def flatten(x):
    d = {}
    if x is not None:
       # Each element of the dict
        for k,v in x.items():
          # Check value type
            if isinstance(v,list) and v[0] is not None:
             # If list: iter sub dict
                for k_s, v_s in v[0].items(): 
                    d["{}_{}".format(k, k_s)] = v_s
            else: d[k] = v
    return pd.Series(d)

In [None]:
tmDF = tmDF.join(tmDF['genre'].apply(flatten),how='left', lsuffix='_left', rsuffix='_right')

In [None]:
tmDF = tmDF.join(tmDF['segment'].apply(flatten),how='left', lsuffix='_left', rsuffix='_right')
tmDF = tmDF.rename(columns = {'name':'category'})

In [None]:
tmDF = tmDF.join(tmDF['genre_right'].apply(flatten),how='left', lsuffix='_left', rsuffix='_right')
tmDF = tmDF.rename(columns = {'name':'genre'})

In [None]:
tmDF = tmDF.join(tmDF['prices'].apply(flatten),how='left',lsuffix='_left',rsuffix='_right')

In [None]:
# dropping unnecessary columns after flattening
tmDF = tmDF.drop(columns = ['prices','primary','genre_left','genre_right','type_left','subType','segment',
                            'subGenre','family','type_right','id_left','id_right','currency'])
tmDF = tmDF.rename(columns = {'min':'min_price','max':'max_price'})
tmDF

# SeatGeek data

In [10]:
sg_key = api_keys.loc[api_keys['endpoint'] == 'seatgeek_endpoint', 'key'].values
sg_endpoint = 'https://api.seatgeek.com/2/events?'
sg_params = {'client_id':sg_key}

In [14]:
all_sg_events = []
for state in (states):
    sg_params['venue.state'] = state
    for page in range(6):
        sg_params['page'] = str(page)
        response = requests.get(sg_endpoint,sg_params)
        res = response.json()
        all_sg_events.extend(res.get('events',[]))

In [15]:
len(all_sg_events)

2550

In [16]:
info_list = []
for event in all_sg_events:
    info_list.append(
     {'SG_event_id' : str(event.get('id',{})),
     'SG_listing_count' : str(event.get('stats',{}).get('listing_count',{})),
     'SG_average_price' : str(event.get('stats',{}).get('average_price',{})),
     'SG_min_price' : str(event.get('stats',{}).get('lowest_price',{})),
     'SG_max_price' : str(event.get('stats',{}).get('highest_price',{})),
     'SG_event_name' : str(event.get('title',{})),
     'SG_date' : str(event.get('datetime_local',{})),
     'SG_artists' : list(str(performer.get('name',{})) for performer in event.get('performers',{})),
     'SG_artists_score' : list(str(performer.get('score',{})) for performer in event.get('performers',{})),
     'SG_artists_id' : list(str(performer.get('id',{})) for performer in event.get('performers',{})),
     'SG_venue' : str(event.get('venue',{}).get('name',{})),
     'SG_venue_city' : str(event.get('venue',{}).get('city',{})),
     'SG_venue_state' : str(event.get('venue',{}).get('state',{})),
     'SG_venue_score' : str(event.get('venue',{}).get('score',{}))
                           })
sgDF = pd.DataFrame(info_list)

In [17]:
sgDF

Unnamed: 0,SG_event_id,SG_listing_count,SG_average_price,SG_min_price,SG_max_price,SG_event_name,SG_date,SG_artists,SG_artists_score,SG_artists_id,SG_venue,SG_venue_city,SG_venue_state,SG_venue_score
0,5298213,6,289,186,426,The King and I - Anchorage,2021-02-16T19:30:00,[The King and I],[0.4],[8283],Atwood Concert Hall,Anchorage,AK,0.461184
1,5298216,6,291,186,426,The King and I - Anchorage,2021-02-17T19:30:00,[The King and I],[0.4],[8283],Atwood Concert Hall,Anchorage,AK,0.461184
2,5298215,6,291,186,426,The King and I - Anchorage,2021-02-18T19:30:00,[The King and I],[0.4],[8283],Atwood Concert Hall,Anchorage,AK,0.461184
3,5298217,6,289,186,426,The King and I - Anchorage,2021-02-19T20:00:00,[The King and I],[0.4],[8283],Atwood Concert Hall,Anchorage,AK,0.461184
4,5298212,6,289,186,426,The King and I - Anchorage,2021-02-20T14:00:00,[The King and I],[0.4],[8283],Atwood Concert Hall,Anchorage,AK,0.461184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2545,5348541,1,132,132,132,Tech N9ne with Krizz Kaliko and Rittz,2021-05-17T19:00:00,"[Tech N9ne, Krizz Kaliko, Rittz]","[0.4, 0.38, 0.38]","[1748, 19807, 19990]",The Lincoln,Cheyenne,WY,0
2546,5255362,,,,,Jon Wolfe,2021-05-24T03:30:00,[Jon Wolfe],[0.41],[33376],Terry Bison Ranch,Cheyenne,WY,0
2547,5212195,,,,,Jackson Hole Rodeo,2021-05-24T03:30:00,[Jackson Hole Rodeo],[0.38],[787354],Jackson Hole Rodeo,Jackson,WY,0
2548,5142808,,,,,Citizen Cope,2021-05-25T03:30:00,[Citizen Cope],[0.47],[470],Pink Garter Theatre,Jackson Hole,WY,0.382202


In [18]:
sgDF['SG_artists'] = sgDF['SG_artists'].str[0]
sgDF['SG_artists_score'] = sgDF['SG_artists_score'].str[0]
sgDF['SG_artists_id'] = sgDF['SG_artists_id'].str[0]

In [19]:
sgDF

Unnamed: 0,SG_event_id,SG_listing_count,SG_average_price,SG_min_price,SG_max_price,SG_event_name,SG_date,SG_artists,SG_artists_score,SG_artists_id,SG_venue,SG_venue_city,SG_venue_state,SG_venue_score
0,5298213,6,289,186,426,The King and I - Anchorage,2021-02-16T19:30:00,The King and I,0.4,8283,Atwood Concert Hall,Anchorage,AK,0.461184
1,5298216,6,291,186,426,The King and I - Anchorage,2021-02-17T19:30:00,The King and I,0.4,8283,Atwood Concert Hall,Anchorage,AK,0.461184
2,5298215,6,291,186,426,The King and I - Anchorage,2021-02-18T19:30:00,The King and I,0.4,8283,Atwood Concert Hall,Anchorage,AK,0.461184
3,5298217,6,289,186,426,The King and I - Anchorage,2021-02-19T20:00:00,The King and I,0.4,8283,Atwood Concert Hall,Anchorage,AK,0.461184
4,5298212,6,289,186,426,The King and I - Anchorage,2021-02-20T14:00:00,The King and I,0.4,8283,Atwood Concert Hall,Anchorage,AK,0.461184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2545,5348541,1,132,132,132,Tech N9ne with Krizz Kaliko and Rittz,2021-05-17T19:00:00,Tech N9ne,0.4,1748,The Lincoln,Cheyenne,WY,0
2546,5255362,,,,,Jon Wolfe,2021-05-24T03:30:00,Jon Wolfe,0.41,33376,Terry Bison Ranch,Cheyenne,WY,0
2547,5212195,,,,,Jackson Hole Rodeo,2021-05-24T03:30:00,Jackson Hole Rodeo,0.38,787354,Jackson Hole Rodeo,Jackson,WY,0
2548,5142808,,,,,Citizen Cope,2021-05-25T03:30:00,Citizen Cope,0.47,470,Pink Garter Theatre,Jackson Hole,WY,0.382202
