In [40]:
import requests
import pandas as pd
import numpy as np
import sqlalchemy as db
import json
import datetime
import functions as f
from tqdm import tqdm
import time

In [41]:
state = pd.read_csv('states.csv')

In [42]:
states = list(state.Abbreviation)

In [43]:
size = '200' #maxsize
engine = db.create_engine('sqlite:///tickets.db')
table = 'events'
frame_list = []
fails = []
success= []


In [44]:
frame_list = []
for state in tqdm(states):
    for page in range(5):
        frame_list.append(f.scrape_ticketmaster(state, size, page))
        time.sleep(1)

100%|██████████| 51/51 [07:38<00:00,  9.00s/it]


In [45]:
len(frame_list)

255

In [46]:
temp = []
fail = []
for x in frame_list:
    try:
        df = pd.DataFrame(x['_embedded'] ['events'])
        temp.append(df)
    except:
        fail.append(x)

In [47]:
len(fail)

115

In [48]:
df = pd.concat(temp, sort=False)

In [49]:
df.reset_index(inplace=True)

In [50]:
df.columns

Index(['index', 'name', 'type', 'id', 'test', 'url', 'locale', 'images',
       'sales', 'dates', 'classifications', 'promoter', 'promoters', 'info',
       'priceRanges', '_links', '_embedded', 'pleaseNote', 'ticketLimit',
       'products', 'seatmap', 'accessibility', 'outlets', 'description'],
      dtype='object')

In [51]:
df.drop(columns = ['index'],inplace=True)

In [52]:
#drop nulls because wont have important model feature
df.dropna(subset=['priceRanges'],inplace=True)

In [53]:
df = f.unpack_price(df)

In [54]:
df = f.unpack_limit(df)

In [55]:
df.max_tickets.fillna(10,inplace=True)

In [56]:
df = f.unpack_venue(df)

In [57]:
df = f.unpack_market(df)

In [58]:
df = f.unpack_classifications(df)

In [59]:
df = f.unpack_presales(df)

In [60]:
df.columns

Index(['name_x', 'type', 'id', 'test', 'url', 'locale', 'images', 'dates',
       'promoter', 'promoters', 'info', '_links', '_embedded', 'pleaseNote',
       'products', 'seatmap', 'accessibility', 'outlets', 'description',
       'price_min', 'price_max', 'max_tickets', 'name_y', 'postalCode',
       'city.name', 'state.name', 'state.stateCode', 'country.countryCode',
       'address.line1', 'location.longitude', 'location.latitude',
       'num_markets', 'genre', 'subgenre', 'public.startDateTime',
       'public.endDateTime', 'presales'],
      dtype='object')

In [61]:
#fix public.endtime off by one day
df['public.endDateTime'] = pd.to_datetime(df['public.endDateTime'],utc=False)

In [62]:
df['public.endDateTime'] = df['public.endDateTime'] - datetime.timedelta(days=1)

In [63]:
df['public.endDateTime'][1]

Timestamp('2019-10-25 01:00:00+0000', tz='UTC')

In [64]:
df['days_event'] = df['public.endDateTime'] - pd.to_datetime(df['public.startDateTime'],utc=True)

In [65]:
df.days_event = df.days_event.apply(lambda x: x.days)

In [66]:
df['day_of_week'] = df['public.endDateTime'].apply(lambda x: x.weekday())

In [67]:
df = f.final_cleanup(df)

In [68]:
df.columns

Index(['event_name', 'type', 'id', 'test', 'url', 'images', 'promoter',
       'promoters', 'info', '_links', 'pleaseNote', 'products', 'seatmap',
       'accessibility', 'outlets', 'description', 'price_min', 'price_max',
       'max_tickets', 'venue_name', 'postalCode', 'city', 'state', 'country',
       'address', 'longitude', 'latitude', 'num_markets', 'genre', 'subgenre',
       'onsale_date', 'event_date', 'is_presale', 'days_event', 'day_of_week'],
      dtype='object')

In [69]:
df.drop(columns = ['_links', 'accessibility', 'description', 'id', 'images', 'info','outlets', 'pleaseNote', 'products', 'promoter',
       'promoters', 'seatmap', 'test', 'type','url','onsale_date'],inplace=True)

In [70]:
df.dropna(subset=['price_min'],inplace=True)

In [71]:
df.to_csv('tm_events.csv')

In [72]:
df = pd.read_csv('tm_events.csv')

In [73]:
df.venue_name.nunique()

1103

In [74]:
#check if multiple nights
df[df.duplicated(subset=['event_name','venue_name'])].head()

Unnamed: 0.1,Unnamed: 0,event_name,price_min,price_max,max_tickets,venue_name,postalCode,city,state,country,address,longitude,latitude,num_markets,genre,subgenre,event_date,is_presale,days_event,day_of_week
17,17,Black Jacket Symphony Presents The Eagles' 'Ho...,32.0,37.0,10,Von Braun Center Concert Hall,35801,Huntsville,AL,US,700 Monroe Street,-86.589925,34.726557,1.0,Rock,Pop,2019-10-05 03:00:00+00:00,1,154.0,5.0
18,18,Black Jacket Symphony Presents The Eagles' 'Ho...,32.0,37.0,10,Von Braun Center Concert Hall,35801,Huntsville,AL,US,700 Monroe Street,-86.589925,34.726557,1.0,Rock,Pop,2019-10-05 23:00:00+00:00,0,155.0,5.0
67,72,Jim Brickman A Christmas Celebration,35.0,55.0,10,The Lyric Theatre,35203,Birmingham,AL,US,1800 3rd Ave N,-86.809601,33.515281,1.0,Pop,Adult Contemporary,2019-12-21 01:30:00+00:00,1,238.0,5.0
96,109,Jim Parkers Songwriters Series,25.0,53.75,10,Von Braun Center Playhouse,35801,Huntsville,AL,US,700 Monroe St SW,-86.589925,34.726557,2.0,Other,Other,2019-11-08 01:30:00+00:00,0,307.0,4.0
112,135,Jonas Brothers: Happiness Begins Tour,35.2,495.2,8,Talking Stick Resort Arena,85004,Phoenix,AZ,US,201 East Jefferson Street,-112.071313,33.445899,1.0,Rock,Pop,2019-12-10 02:30:00+00:00,1,205.0,1.0


In [75]:
df.event_name

0                         The Rocky Horror Masquerade Ball
1          AEG & Tom Joyner Present: Magic City Music Fest
2                    Jonas Brothers: Happiness Begins Tour
3                            Eric Church: Double Down Tour
4        Trans-Siberian Orchestra 2019 Presented By Hal...
                               ...                        
10892                                              Brand X
10893    I Wrote That One Too...A Life in Songwriting S...
10894                          Matt Heckler & Casper Allen
10895                                         Marlon Craft
10896                                   The Palmer Squares
Name: event_name, Length: 10897, dtype: object