In [1]:
import apikey
import requests
from bs4 import BeautifulSoup
apikey = apikey.apikey

In [2]:
class SetList:
    def __init__(self):
        self.response = None
        self.showid   = None
        self.date     = None
        self.location = None
        self.venue    = None
        self.rating   = None
        self.songs    = None
        
    def __str__(self):
        return str(self.response)
    
    def get_setlist(self, showdate, apikey=apikey):
        url = 'https://api.phish.net/v3/setlists/get'
        key_param = {'apikey':apikey, "showdate":showdate}
        s = requests.post(url, params=key_param)
        self.response = s.json()
    
    def read_response(self):
        self.showid   = self.response['response']['data'][0]['showid']
        self.date     = self.response['response']['data'][0]['showdate']
        self.location = self.response['response']['data'][0]['location']
        self.venue    = self.response['response']['data'][0]['venue']
        self.rating   = self.response['response']['data'][0]['rating']

In [3]:
def get_setlist(showdate, apikey=apikey):
    url = 'https://api.phish.net/v3/setlists/get'
    key_param = {'apikey':apikey, "showdate":showdate}
    s = requests.post(url, params=key_param)
    return s.json()

In [16]:
def parse_setlist(setlist):
    
    # If there is no tracklist, escape with empty response
    try:
        response_data = setlist['response']['data'][0]
    except IndexError:
        return None
        
    location = response_data['location']
    showdate = response_data['showdate']
    rating   = response_data['rating']
    venue    = BeautifulSoup(response_data['venue'],"lxml").text
    soup     = BeautifulSoup(response_data['setlistdata'],"lxml")
    
    output = []
    
    setlist  = soup.find_all(['span','a'])
    for line in setlist:
        text = line.get_text()
        is_span = line.name

        if line.name == 'span':
            which_set = line.text
        elif line.name == 'a':
            song_name = line.text
            song_url = line.attrs['href']
            if line.has_attr('title'):
                song_comment = line.attrs['title']
            else:
                song_comment = None
            track = {'set': which_set, 
                    'title': song_name ,
                    'url': song_url, 
                    'comment': song_comment,
                    'show_rating': rating,
                    'location': location,
                    'venue': venue,
                    'show_date': showdate
                    }
            output.append(track)
    return output

In [21]:
showdate = '1988-02-10'
setlist  = get_setlist(showdate)

In [22]:
setlist

{'error_code': 0, 'error_message': None, 'response': {'count': 0, 'data': []}}

In [23]:
parse_setlist(setlist)

[]

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

dfl = pd.read_csv("1987_2017_show_dates.csv")
dfl['Date'] = pd.to_datetime(dfl['Date'])
dfl['Date'] = dfl['Date'].dt.strftime('%Y-%m-%d')

set_dates = dfl['Date'].values

In [27]:
from time import sleep
print(len(set_dates))
output = []

for idx, set_date in enumerate(set_dates):
    sleep(1)
    
    if idx > 2:
        break
        
    print((idx,set_date))
    setlist = get_setlist(set_date)
    setlist = parse_setlist(setlist)
    if setlist is None:
        pass
    else:
        output.extend(setlist)

1777
(0, '1987-01-19')
(1, '1987-01-21')
(2, '1987-02-01')


In [28]:
output

[{'comment': None,
  'location': 'Burlington, VT, USA',
  'set': 'Set 1',
  'show_date': '1987-01-21',
  'show_rating': '4.0000',
  'title': 'Wilson',
  'url': 'http://phish.net/song/wilson',
  'venue': "Hunt's"},
 {'comment': None,
  'location': 'Burlington, VT, USA',
  'set': 'Set 2',
  'show_date': '1987-01-21',
  'show_rating': '4.0000',
  'title': 'I Am Hydrogen',
  'url': 'http://phish.net/song/i-am-hydrogen',
  'venue': "Hunt's"},
 {'comment': None,
  'location': 'Burlington, VT, USA',
  'set': 'Set 2',
  'show_date': '1987-01-21',
  'show_rating': '4.0000',
  'title': 'Peaches en Regalia',
  'url': 'http://phish.net/song/peaches-en-regalia',
  'venue': "Hunt's"},
 {'comment': None,
  'location': 'Burlington, VT, USA',
  'set': 'Set 2',
  'show_date': '1987-01-21',
  'show_rating': '4.0000',
  'title': 'Swing Low, Sweet Chariot',
  'url': 'http://phish.net/song/swing-low-sweet-chariot',
  'venue': "Hunt's"},
 {'comment': None,
  'location': 'Burlington, VT, USA',
  'set': 'Set 2

In [29]:
df = pd.DataFrame(output)
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 8 columns):
comment        0 non-null object
location       9 non-null object
set            9 non-null object
show_date      9 non-null object
show_rating    9 non-null object
title          9 non-null object
url            9 non-null object
venue          9 non-null object
dtypes: object(8)
memory usage: 656.0+ bytes


In [30]:
df

Unnamed: 0,comment,location,set,show_date,show_rating,title,url,venue
0,,"Burlington, VT, USA",Set 1,1987-01-21,4.0,Wilson,http://phish.net/song/wilson,Hunt's
1,,"Burlington, VT, USA",Set 2,1987-01-21,4.0,I Am Hydrogen,http://phish.net/song/i-am-hydrogen,Hunt's
2,,"Burlington, VT, USA",Set 2,1987-01-21,4.0,Peaches en Regalia,http://phish.net/song/peaches-en-regalia,Hunt's
3,,"Burlington, VT, USA",Set 2,1987-01-21,4.0,"Swing Low, Sweet Chariot",http://phish.net/song/swing-low-sweet-chariot,Hunt's
4,,"Burlington, VT, USA",Set 2,1987-01-21,4.0,Sneakin' Sally Through the Alley,http://phish.net/song/sneakin-sally-through-th...,Hunt's
5,,"Burlington, VT, USA",Set 2,1987-01-21,4.0,Makisupa Policeman,http://phish.net/song/makisupa-policeman,Hunt's
6,,"Burlington, VT, USA",Set 2,1987-01-21,4.0,Skin It Back,http://phish.net/song/skin-it-back,Hunt's
7,,"Burlington, VT, USA",Set 2,1987-01-21,4.0,Cities,http://phish.net/song/cities,Hunt's
8,,"Burlington, VT, USA",Set 2,1987-01-21,4.0,Fluffhead,http://phish.net/song/fluffhead,Hunt's


In [26]:
df['show_date'] = pd.to_datetime(df['show_date'])
df.head()

KeyError: 'show_date'

In [115]:
df = df.loc[:, ['show_date', 'venue', 'location', 'show_rating', 'set', 'title', 'url', 'comment']]


In [116]:
df.to_csv("phish_data_2009-2017.csv", index=False)
