In [1]:
%load_ext autoreload
%autoreload 2

In [13]:
import os
import time
import requests
from dotenv import load_dotenv

import numpy as np
import pandas as pd
import janitor
import tidybear as tb

from tqdm import tqdm

import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [3]:
load_dotenv()
SETLIST_FM_API_KEY = os.getenv("SETLIST_FM_API_KEY")
SPOTIFY_API_KEY = os.getenv("SPOTIFY_API_KEY")

FOO_FIGHTERS_MBID = "67f66c07-6e61-4026-ade5-7e782fad3a5d"

In [46]:
payload = {
    "url": f"https://api.setlist.fm/rest/1.0/artist/{FOO_FIGHTERS_MBID}/setlists",
    "headers": {
        "Accept": "application/json",
        "x-api-key": SETLIST_FM_API_KEY
    },
    "params": {"p": 1}
}

from lib.setlist_fm.converter import ResponseToConcertConverter

concerts = []
converter = ResponseToConcertConverter()

page_number, page_has_content = 1, True 
while page_has_content:
    
    if page_number == 1 or page_number % 10 == 0:
        print(f"Fetching page {page_number}...")
    
    payload["params"]["p"] = page_number
    response = requests.get(**payload)
    data = response.json()
    
    if "setlist" not in data:
        page_has_content = False
        break
        
    setlists = data["setlist"]
    for setlist in setlists:
        concerts.append(converter.convert(setlist))
    
    time.sleep(5)
    page_number += 1

Fetching page 1...
Fetching page 10...
Fetching page 20...
Fetching page 30...
Fetching page 40...
Fetching page 50...
Fetching page 60...
Fetching page 70...


In [47]:
print(page_number)
print(len(concerts))

76
1483


In [48]:
import json
import pydantic
from lib.setlist_fm.converter import Concert

class ConcertList(pydantic.BaseModel):
    concerts: list[Concert]

json_data = json.loads(ConcertList(concerts = concerts).json())
with open("foo_fighter_setlists.json", "w") as outfile:
    json.dump(json_data, outfile)
    outfile.close()

___

In [None]:
import json
import pandas as pd

from lib.setlist_fm.converter import ConcertList

In [None]:
concerts = json.load(open("foo_fighter_setlists.json"))
concert_list = ConcertList(**concerts)

In [None]:
concert_items, venue_items, song_items = [], [], []

for concert in concert_list.concerts:
    concert_items.append({
        **concert.dict(exclude={"venue", "setlist"}),
        "venue_id": concert.venue.id
    })
    
    if concert.venue:
        venue_items.append(concert.venue.dict())
        
    if concert.setlist:
        song_items += [{
            "concert_id": concert.id,
            "song_number": i,
            **sl.dict()
        } for i, sl in enumerate(concert.setlist, 1)]

concert_df = pd.DataFrame(concert_items)
venue_df = pd.DataFrame(venue_items).drop_duplicates().reset_index(drop=True)
songs_df = pd.DataFrame(song_items)

In [None]:
concert_df.to_csv("data/concerts.csv", index=False)
venue_df.to_csv("data/venues.csv", index=False)
songs_df.to_csv("data/songs.csv", index=False)

___

In [28]:
import re
import requests
from bs4 import BeautifulSoup

import pandas as pd

In [3]:
response = requests.get("https://www.songfacts.com/songs/foo-fighters")
content = response.content

soup = BeautifulSoup(content)

In [39]:
list_items = soup.find("ul", attrs={"class": "browse-list-orange"}).find_all("li")
html_text = [item.text for item in list_items]

regex = r"(.*)\s\(([0-9]{4})\)"
releases = pd.DataFrame(
    [re.findall(regex, text)[0] for text in html_text],
    columns=["name", "release_year"]
)

releases["name"] = releases.name.str.strip()
releases["release_year"] = releases.release_year.astype(int)
releases.to_csv("data/releases.csv", index=False)

In [37]:
releases

Unnamed: 0,name,release_year
0,Ain't It the Life,1999
1,All My Life,2002
2,Arlandria,2011
3,Ballad Of The Beaconsfield Miners,2007
4,Best Of You,2005
...,...,...
77,What Did I Do?/God As My Witness,2014
78,Wheels,2009
79,White Limo,2011
80,Wind Up,1997
