In [3]:
from urllib import request
import bs4 as bs
import json

In [4]:
WIKIPEDIA_PAGE = "https://en.wikipedia.org/wiki/List_of_Warped_Tour_lineups_by_year"

page = request.urlopen(WIKIPEDIA_PAGE).read()
soup = bs.BeautifulSoup(page, 'lxml')
table = soup.find_all('table')[2]
rows = table.find_all('tr')

In [5]:
bands = []

for row in rows[1:]:
    cells = row.find_all('td')
    band = {
        "name": cells[0].text,
        "url": cells[0].a["href"] if cells[0].a else "",
        "years": []
    }
    for i in range(1, len(cells)):
        if cells[i].img:
            band["years"].append(i+1994)
    bands.append(band)
print(json.dumps(bands, indent=4))

with open("src/data/performers.json", "w") as out:
    json.dump(bands, out, indent=4)

[
    {
        "name": "3OH!3",
        "years": [
            2016
        ],
        "url": "/wiki/3OH!3"
    },
    {
        "name": "'68",
        "years": [
            2015
        ],
        "url": "/wiki/%2768_(band)"
    },
    {
        "name": "5606",
        "years": [
            2010
        ],
        "url": ""
    },
    {
        "name": "A+ Dropouts",
        "years": [
            2015
        ],
        "url": ""
    },
    {
        "name": "Aaron West and the Roaring Twenties",
        "years": [
            2014,
            2015
        ],
        "url": "/wiki/Aaron_West_and_the_Roaring_Twenties"
    },
    {
        "name": "Abriel",
        "years": [
            2009
        ],
        "url": "/wiki/Abriel"
    },
    {
        "name": "The Acacia Strain",
        "years": [
            2011,
            2017
        ],
        "url": "/wiki/The_Acacia_Strain"
    },
    {
        "name": "The Academy Is...",
        "years": [
            2006,
          

## Ideas
- Focuses on the recycling of a circle for each band
    -  Hovering over a circle at any point in time will give you the band's name, their Wikipedia description and a picture of them
        *  Use d3-annotation for annotating on hover 
*  Intro is an outline of the US with a circle for each band along the path of the SVG
*  On scroll, they move to the center of the page to fill SVGS for "22" and "1014", the number of years Warped Tour has run and  the number of bands that have performed on Warped Tour, respectively
*  Upon more scrolling, they shrink to 1px-diameter circles to form the line of a line plot of the number of artists that have performed every year for the past 22 years
*  Next is a swarm plot/Nadieh's Royal Family Datasketch/Horizontal Bar Chart made from a force graph where the X axis is how many years these bands have performed on Warped Tour
*  The final presentation is a center-top located  large dropdown menu  of years from 1995-2017, with the band circles sitting at the top of the page in a uniform row fashion
    *  On selection of year, circles move towards below the dropdown menu, transition in size relative to their # of Spotify followers, and circles with more followers get less gravity and move to the top
        *  Play around with using either # of Spotify followers or Spotify Popularity Score,  both available from the Spotify Web API
    *  Tooltip a 1/3 of the screen available to show information, with an SVG mask/crop of the band's name over their picture


In [6]:
years = { year: [] for year in range(1995, 2018) }

for band in bands:
    list(map(lambda year : years[year].append(band["name"]), band["years"]))
    
for year, num_bands in years.items():
    print("{}: {} performers".format(year, len(num_bands)))

with open("src/data/performers_per_year.json", "w") as out:
    json.dump(years, out, indent=4)

1995: 16 performers
1996: 19 performers
1997: 23 performers
1998: 41 performers
1999: 24 performers
2000: 35 performers
2001: 37 performers
2002: 63 performers
2003: 54 performers
2004: 73 performers
2005: 110 performers
2006: 93 performers
2007: 127 performers
2008: 166 performers
2009: 70 performers
2010: 73 performers
2011: 115 performers
2012: 112 performers
2013: 116 performers
2014: 107 performers
2015: 108 performers
2016: 90 performers
2017: 91 performers


In [8]:
years_performed = {}

for band in bands:
    num_years = len(band["years"])
    if num_years not in years_performed:
        years_performed[num_years] = [band["name"]]
    else:
        years_performed[num_years].append(band["name"])

for num_years, num_bands in years_performed.items():
    print("{} bands performed for {} years".format(len(num_bands), num_years))
    
with open("src/data/years_performed.json", "w") as out:
    json.dump(years_performed, out, indent=4)
    

651 bands performed for 1 years
184 bands performed for 2 years
82 bands performed for 3 years
48 bands performed for 4 years
20 bands performed for 5 years
12 bands performed for 6 years
10 bands performed for 7 years
3 bands performed for 8 years
1 bands performed for 9 years
2 bands performed for 10 years
1 bands performed for 11 years
