In [1]:
# Uncomment to install required packages
# !pip install requests pandas matplotlib


# Phish Venue Capacity Analysis (1983–Present)
Using the Phish.net API

In [18]:
import requests
import pandas as pd
import matplotlib.pyplot as plt

# Replace this with your actual API key
API_KEY = 'BC742244D05058B72A91'
BASE_URL = 'https://api.phish.net/v5/'


In [19]:
def fetch_shows(api_key):
    endpoint = f'{BASE_URL}shows.json?apikey={api_key}'
    response = requests.get(endpoint)
    if response.status_code == 200:
        return response.json()['data']
    else:
        raise Exception(f'Error fetching shows: {response.status_code}')

shows = fetch_shows(API_KEY)
df_shows = pd.DataFrame(shows)
df_shows.head()


Unnamed: 0,showid,showyear,showmonth,showday,showdate,permalink,exclude_from_stats,venueid,setlist_notes,venue,city,state,country,artistid,artist_name,tourid,tour_name,created_at,updated_at
0,1249948108,2000,9,17,2000-09-17,https://phish.net/setlists/phish-september-17-...,0,9,<p>Mike teased Sundown in Bathtub Gin. Trey te...,Merriweather Post Pavilion,Columbia,MD,USA,1,Phish,50.0,2000 Fall Tour,,2022-09-17 10:07:16
1,1249948445,1985,3,4,1985-03-04,https://phish.net/setlists/phish-march-04-1985...,0,10,This show was an African Relief benefit for OX...,Hunt's,Burlington,VT,USA,1,Phish,3.0,1985 Tour,,2024-11-05 21:13:58
2,1250019273,1998,7,2,1998-07-02,https://phish.net/setlists/phish-july-02-1998-...,0,11,<p>This show marked the debuts of Meat and Fik...,The Grey Hall,Copenhagen,,Denmark,1,Phish,40.0,1998 Summer European Tour,,2025-02-06 09:34:31
3,1250024745,1998,7,10,1998-07-10,https://phish.net/setlists/phish-july-10-1998-...,0,12,Divided Sky and Mike&rsquo;s Song were aborted...,Zeleste,Barcelona,,Spain,1,Phish,40.0,1998 Summer European Tour,,2025-02-28 09:28:50
4,1250387629,1990,1,20,1990-01-20,https://phish.net/setlists/phish-january-20-19...,0,15,"This show included the debut of Bouncing, the ...","Webster Hall, Dartmouth College",Hanover,NH,USA,1,Phish,60.0,1990 Tour,,2023-01-17 15:05:28


In [20]:
def fetch_venues(api_key):
    endpoint = f'{BASE_URL}venues.json?apikey={api_key}'
    response = requests.get(endpoint)
    if response.status_code == 200:
        return response.json()['data']
    else:
        raise Exception(f'Error fetching venues: {response.status_code}')

venues = fetch_venues(API_KEY)
df_venues = pd.DataFrame(venues)
df_venues.head()

print("Show columns:", df_shows.columns.tolist())
print("Venue columns:", df_venues.columns.tolist())



Show columns: ['showid', 'showyear', 'showmonth', 'showday', 'showdate', 'permalink', 'exclude_from_stats', 'venueid', 'setlist_notes', 'venue', 'city', 'state', 'country', 'artistid', 'artist_name', 'tourid', 'tour_name', 'created_at', 'updated_at']
Venue columns: ['venueid', 'venuename', 'city', 'state', 'country', 'venuenotes', 'alias', 'short_name']


In [25]:
df_combined = pd.merge(df_shows, df_venues, on='venueid', suffixes=('_show', '_venue'))
print(df_combined.columns.tolist())
df_combined.sample(5)  # See a few actual records

# Convert dates and extract year
df_combined['showdate'] = pd.to_datetime(df_combined['showdate'], errors='coerce')
df_combined['year'] = df_combined['showdate'].dt.year

# Drop missing or non-numeric capacities
df_combined = df_combined[pd.to_numeric(df_combined['attendance'], errors='coerce').notnull()]
df_combined['attendance'] = df_combined['attendance'].astype(float)

df_combined[['showdate', 'venue', 'location', 'attendance', 'year']].head()


['showid', 'showyear', 'showmonth', 'showday', 'showdate', 'permalink', 'exclude_from_stats', 'venueid', 'setlist_notes', 'venue', 'city_show', 'state_show', 'country_show', 'artistid', 'artist_name', 'tourid', 'tour_name', 'created_at', 'updated_at', 'venuename', 'city_venue', 'state_venue', 'country_venue', 'venuenotes', 'alias', 'short_name']


KeyError: 'attendance'

In [None]:
df_avg_capacity = df_combined.groupby('year')['capacity'].mean().reset_index()
df_avg_capacity.head()


In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df_avg_capacity['year'], df_avg_capacity['capacity'], marker='o')
plt.title('Average Venue Capacity for Phish Performances (1983–Present)')
plt.xlabel('Year')
plt.ylabel('Average Venue Capacity')
plt.grid(True)
plt.tight_layout()
plt.show()


In [17]:
import requests

API_KEY = 'BC742244D05058B72A91'
BASE_URL = 'https://api.phish.net/v5/'

# Try show endpoint
url = f"{BASE_URL}shows.json?apikey={API_KEY}"
resp = requests.get(url)
data = resp.json()
print("Number of shows returned:", len(data.get('data', [])))
print("First show preview:", data.get('data', [])[0])

Number of shows returned: 4353
First show preview: {'showid': 1249948108, 'showyear': '2000', 'showmonth': 9, 'showday': 17, 'showdate': '2000-09-17', 'permalink': 'https://phish.net/setlists/phish-september-17-2000-merriweather-post-pavilion-columbia-md-usa.html', 'exclude_from_stats': 0, 'venueid': 9, 'setlist_notes': '<p>Mike teased Sundown in Bathtub Gin. Trey teased San-Ho-Zay in Chalk Dust Torture.</p>\r\n', 'venue': 'Merriweather Post Pavilion', 'city': 'Columbia', 'state': 'MD', 'country': 'USA', 'artistid': 1, 'artist_name': 'Phish', 'tourid': 50, 'tour_name': '2000 Fall Tour', 'created_at': None, 'updated_at': '2022-09-17 10:07:16'}
