# DSE 241 - Final Project
# Rock & Fucking Roll

## Requirements

In [141]:
import requests
import pandas as pd
import numpy as np

## Functions

In [142]:
def read_api_key(file_name):
    f = open(file_name, 'r')
    key = f.read()
    f.close()
    return key

def getgenre_artist(string):
    ## lower case
    ## replace " " with "_"
    artist = string.replace(' ','_').lower()
    return artist

def bands_artist(string):
    ## url-ify artist names
    ## replaces " " with "%20"
    artist = string.replace(' ',r'%20')
    return artist
    
    
def parse_events(event_json):
    ### defined to help parse the JSON of individual events returned by Bandintown API ###
    ### this will pull out the keys and values for the event object ###
    ### the end result is a list that can be entered as a row to a Pandas dataframe ###
    
    ### TODO: determine if the output is better in Pandas or Numpy ###
    ### TODO: define event_json object ###
    
    events_data = []
    num_of_events = len(event_json) # event json has an entry for each event ## each event has lots of other nested data

    keys_of_interest = [
            'datetime' # date-time of the event
            , 'title'  # name of the event  ## may need to create a condition to check if exists and set to null if missing
            , 'lineup' # list of strings containing names of artists  ## may leave as a nested list in data frame to avoid sparseness  ### list may be ordered in terms of headliners (??)
            , 'festival_start_date' # date festival starts  ### may be a useful indicator that the artist is performing at a festival-event, need to test this for reliability
            , 'festival_end_date'   # date festival ends, will differ from start date on multi-day events (may not exist on single day events)
            , 'venue'  # this is a nested dictionary, will need to tease this one out to flatten the data
    ]

    venue_keys_of_interest = [
             'city'        # city name, string
            , 'region'     # state-level, string
            , 'country'    # country name, string
            , 'latitude'   # coordinate data, float
            , 'longitude'  # coordinate data, float
            , 'location'   # arbitrary string describing geolocation  ## consider not including
            , 'name'       # arbitrary string describing venue name, could be misleading since some venues are at locations, but given festival name  ## consider not including
    ]

    for i in range(num_of_events):
        event = event_json[i]
        event_list = []

        for key in keys_of_interest:
            value = event.get(key)
            if key == 'venue':
                for venue_key in venue_keys_of_interest:
                    venue_value = value.get(venue_key)
                    event_list.append(venue_value)
            else:
                event_list.append(value)

        events_data.append(event_list)


    cols = keys_of_interest[:-1] + venue_keys_of_interest
    events_df = pd.DataFrame(events_data, columns=cols)
    events_df['artist'] = artist
    return events_df

def get_locations(df, latitude_column='latitude', longitude_column='longitude'):
    new_df = df.copy()
    geolocator = Nominatim(user_agent="rg_agent")
    reverse_geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1)
    for index, row in new_df.iterrows():
        lookup = reverse_geocode((row[latitude_column],row[longitude_column]), language='en')
        try:
            new_df.loc[index,'city'] = lookup.raw['address']['city']
        except:
            continue
        try:
            new_df.loc[index,'region'] = lookup.raw['address']['state']
        except:
            continue
        try:
            new_df.loc[index,'country'] = lookup.raw['address']['country']
        except:
            continue

    return new_df

## Key Inputs

In [143]:
artist = 'Tame Impala'

bands_key = read_api_key('bands_api_key.txt') ## shhhh... its a secret

analysis_level = 1
gg_artist = getgenre_artist(artist)
bands_artist = bands_artist(artist)
getgenre_api_url = r'https://api.getgenre.com/search?artist_name={}&analysis={}'.format(gg_artist, analysis_level)
bands_api_url = r'https://rest.bandsintown.com/artists/{}/events/?app_id={}'.format(bands_artist, bands_key)
bands_api_url_past = r'https://rest.bandsintown.com/artists/{}/events/?app_id={}&date=past'.format(bands_artist, bands_key)

In [144]:
gg_artist

'tame_impala'

In [145]:
bands_artist

'Tame%20Impala'

## Working Code
### Let's Go

In [146]:
r = requests.get(bands_api_url_past)

In [147]:
event_json = r.json()

In [148]:
test_df = parse_events(event_json)
print(test_df.shape)
test_df.head()

(317, 13)


Unnamed: 0,datetime,title,lineup,festival_start_date,festival_end_date,city,region,country,latitude,longitude,location,name,artist
0,2013-04-14T10:08:00,,"[Tame Impala, The Airborne Toxic Event, Eric P...",,,Indio,CA,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala
1,2013-04-19T19:00:00,,"[Tame Impala, Mona, Alt-J, Social Distortion, ...",,,Indio,CA,United States,33.6784492,-116.237155,,Empire Polo Club,Tame Impala
2,2013-04-21T10:26:00,,"[Tame Impala, Vampire Weekend, Ladies Night, M...",,,Indio,CA,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala
3,2013-04-26T20:00:00,,"[Tame Impala, Midnight Juggernauts]",,,West Melbourne,,Australia,-37.81126,144.946442,,Festival Hall Melbourne,Tame Impala
4,2013-05-02T19:30:00,,[Tame Impala],,,Sydney,,Australia,-33.889832,151.223348,,Hordern Pavilion,Tame Impala


In [149]:
r_gg = requests.get(getgenre_api_url)

In [150]:
genre_json = r_gg.json()
top_gg = genre_json['analysis']['top_genres']

In [151]:
test_df['artist'] = artist
test_df['artist_topgenres'] = [top_gg for _ in range(len(test_df))]
test_df.head()

Unnamed: 0,datetime,title,lineup,festival_start_date,festival_end_date,city,region,country,latitude,longitude,location,name,artist,artist_topgenres
0,2013-04-14T10:08:00,,"[Tame Impala, The Airborne Toxic Event, Eric P...",,,Indio,CA,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche..."
1,2013-04-19T19:00:00,,"[Tame Impala, Mona, Alt-J, Social Distortion, ...",,,Indio,CA,United States,33.6784492,-116.237155,,Empire Polo Club,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche..."
2,2013-04-21T10:26:00,,"[Tame Impala, Vampire Weekend, Ladies Night, M...",,,Indio,CA,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche..."
3,2013-04-26T20:00:00,,"[Tame Impala, Midnight Juggernauts]",,,West Melbourne,,Australia,-37.81126,144.946442,,Festival Hall Melbourne,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche..."
4,2013-05-02T19:30:00,,[Tame Impala],,,Sydney,,Australia,-33.889832,151.223348,,Hordern Pavilion,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche..."


In [152]:
test_df[['country','region','city','location']].value_counts()

country                   region  city         location
United States             CA      Los Angeles              10
                          NY      New York                 10
Mexico                            Monterrey                 8
United Kingdom                    London                    8
United States             CO      Denver                    7
                                                           ..
United Kingdom                    Edinburgh                 1
                                  Downend                   1
                                  Blackpool                 1
                                  Birmingham                1
United States of America  IL      Chicago                   1
Length: 153, dtype: int64

In [153]:
test_df['lineup'].str.len().value_counts()

1      153
2       80
3       18
7        6
0        4
6        3
56       3
23       3
17       3
4        3
19       2
22       2
58       2
25       2
21       2
88       2
36       2
26       2
5        1
18       1
41       1
52       1
24       1
62       1
86       1
8        1
59       1
38       1
12       1
39       1
53       1
51       1
95       1
14       1
44       1
77       1
30       1
48       1
65       1
32       1
123      1
42       1
49       1
Name: lineup, dtype: int64

In [154]:
test_df['lineup_size'] = test_df['lineup'].str.len()
test_df['festival_flag'] = np.where(test_df['lineup'].str.len() > 5, 1, 0)
test_df.head()

Unnamed: 0,datetime,title,lineup,festival_start_date,festival_end_date,city,region,country,latitude,longitude,location,name,artist,artist_topgenres,lineup_size,festival_flag
0,2013-04-14T10:08:00,,"[Tame Impala, The Airborne Toxic Event, Eric P...",,,Indio,CA,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",38,1
1,2013-04-19T19:00:00,,"[Tame Impala, Mona, Alt-J, Social Distortion, ...",,,Indio,CA,United States,33.6784492,-116.237155,,Empire Polo Club,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",95,1
2,2013-04-21T10:26:00,,"[Tame Impala, Vampire Weekend, Ladies Night, M...",,,Indio,CA,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",42,1
3,2013-04-26T20:00:00,,"[Tame Impala, Midnight Juggernauts]",,,West Melbourne,,Australia,-37.81126,144.946442,,Festival Hall Melbourne,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0
4,2013-05-02T19:30:00,,[Tame Impala],,,Sydney,,Australia,-33.889832,151.223348,,Hordern Pavilion,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",1,0


In [155]:
test_df[['country','region','city','location']].value_counts()

country                   region  city         location
United States             CA      Los Angeles              10
                          NY      New York                 10
Mexico                            Monterrey                 8
United Kingdom                    London                    8
United States             CO      Denver                    7
                                                           ..
United Kingdom                    Edinburgh                 1
                                  Downend                   1
                                  Blackpool                 1
                                  Birmingham                1
United States of America  IL      Chicago                   1
Length: 153, dtype: int64

In [156]:
test_df = get_locations(test_df)

In [157]:
test_df[['country','region','city','location']].value_counts()

country         region      city          location
United States   New York    New York                  10
United Kingdom  England     London                     9
United States   California  Los Angeles                8
Australia       Victoria    Melbourne                  8
United States   Colorado    Denver                     7
                                                      ..
Japan                       Higashiosaka               1
Italy           Veneto      Verona                     1
                Lombardy    Milan                      1
                Lazio       Rome                       1
United States   Wisconsin   Milwaukee                  1
Length: 153, dtype: int64

## Messy Workspace -- May not Run

In [158]:
import reverse_geocode as rg
coordinates = (-37.81, 144.96), (31.76, 35.21)
loc = rg.search(coordinates)

In [159]:
from functools import partial
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="rg_agent")

geocode = partial(geolocator.geocode, language="es")
print(geocode("london"))

print(geocode("paris"))

print(geocode("paris", language="en"))


reverse = partial(geolocator.reverse, language="es")
print(reverse("52.509669, 13.376294"))


Londres, Greater London, Inglaterra, SW1A 2DX, Reino Unido
París, Isla de Francia, Francia metropolitana, Francia
Paris, Ile-de-France, Metropolitan France, France
Potsdamer Platz, Tiergarten, Mitte, Berlín, 10785, Alemania


In [160]:
from geopy.extra.rate_limiter import RateLimiter
gr = RateLimiter(geolocator.reverse, min_delay_seconds=1)

x = gr((-37.81, 144.96),)
dir(x)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_address',
 '_point',
 '_raw',
 '_tuple',
 'address',
 'altitude',
 'latitude',
 'longitude',
 'point',
 'raw']

In [161]:
x

Location(Argus Centre, 300, La Trobe Street, Melbourne, City of Melbourne, Victoria, 3000, Australia, (-37.8101642, 144.96041012256524, 0.0))

In [162]:
x.raw['address']['city']

'Melbourne'

In [163]:
x_df = test_df[:10].copy()
tt = get_location(x_df, 'latitude', 'longitude')
tt.head(10)

Unnamed: 0,datetime,title,lineup,festival_start_date,festival_end_date,city,region,country,latitude,longitude,location,name,artist,artist_topgenres,lineup_size,festival_flag
0,2013-04-14T10:08:00,,"[Tame Impala, The Airborne Toxic Event, Eric P...",,,Indio,California,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",38,1
1,2013-04-19T19:00:00,,"[Tame Impala, Mona, Alt-J, Social Distortion, ...",,,Indio,California,United States,33.6784492,-116.237155,,Empire Polo Club,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",95,1
2,2013-04-21T10:26:00,,"[Tame Impala, Vampire Weekend, Ladies Night, M...",,,Indio,California,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",42,1
3,2013-04-26T20:00:00,,"[Tame Impala, Midnight Juggernauts]",,,Melbourne,Victoria,Australia,-37.81126,144.946442,,Festival Hall Melbourne,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0
4,2013-05-02T19:30:00,,[Tame Impala],,,Sydney,New South Wales,Australia,-33.889832,151.223348,,Hordern Pavilion,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",1,0
5,2013-05-08T20:00:00,,"[Tame Impala, Rebel FM Gig Pig]",,,Brisbane City,Queensland,Australia,-27.476351,153.017471,,Brisbane Convention & Exhibition Centre,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0
6,2013-05-18T19:00:00,,"[Tame Impala, Midnight Juggernauts]",,,Upper Swan,Western Australia,Australia,-31.7900054,116.0225478,,Belvoir Amphitheatre,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0
7,2013-05-22T17:00:00,,"[Tame Impala, Fred i Son, Extraperlo, Dead Ske...",,,Barcelona,Catalonia,Spain,41.413836,2.217205,,Parc del Fòrum,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",123,1
8,2013-05-24T15:00:00,,"[Tame Impala, Bloc Party, Grimes, Shad, Danny ...",,,George,Washington,United States,47.0791667,-119.8547222,,Gorge Amphitheatre,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",32,1
9,2013-05-26T21:00:00,,"[Tame Impala, Jonathan Wilson]",,,Portland,Oregon,United States,45.522928,-122.684512,,Crystal Ballroom,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0


In [164]:
test_df.head(10)

Unnamed: 0,datetime,title,lineup,festival_start_date,festival_end_date,city,region,country,latitude,longitude,location,name,artist,artist_topgenres,lineup_size,festival_flag
0,2013-04-14T10:08:00,,"[Tame Impala, The Airborne Toxic Event, Eric P...",,,Indio,California,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",38,1
1,2013-04-19T19:00:00,,"[Tame Impala, Mona, Alt-J, Social Distortion, ...",,,Indio,California,United States,33.6784492,-116.237155,,Empire Polo Club,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",95,1
2,2013-04-21T10:26:00,,"[Tame Impala, Vampire Weekend, Ladies Night, M...",,,Indio,California,United States,33.7205556,-116.2147222,,Empire Polo Field,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",42,1
3,2013-04-26T20:00:00,,"[Tame Impala, Midnight Juggernauts]",,,Melbourne,Victoria,Australia,-37.81126,144.946442,,Festival Hall Melbourne,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0
4,2013-05-02T19:30:00,,[Tame Impala],,,Sydney,New South Wales,Australia,-33.889832,151.223348,,Hordern Pavilion,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",1,0
5,2013-05-08T20:00:00,,"[Tame Impala, Rebel FM Gig Pig]",,,Brisbane City,Queensland,Australia,-27.476351,153.017471,,Brisbane Convention & Exhibition Centre,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0
6,2013-05-18T19:00:00,,"[Tame Impala, Midnight Juggernauts]",,,Upper Swan,,Australia,-31.7900054,116.0225478,,Belvoir Amphitheatre,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0
7,2013-05-22T17:00:00,,"[Tame Impala, Fred i Son, Extraperlo, Dead Ske...",,,Barcelona,Catalonia,Spain,41.413836,2.217205,,Parc del Fòrum,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",123,1
8,2013-05-24T15:00:00,,"[Tame Impala, Bloc Party, Grimes, Shad, Danny ...",,,George,WA,United States,47.0791667,-119.8547222,,Gorge Amphitheatre,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",32,1
9,2013-05-26T21:00:00,,"[Tame Impala, Jonathan Wilson]",,,Portland,Oregon,United States,45.522928,-122.684512,,Crystal Ballroom,Tame Impala,"[indie rock, synthpop, psychedelic pop, psyche...",2,0


In [165]:
test_df['city'] = test_df[['latitude', 'longitude']]

ValueError: Columns must be same length as key

In [None]:
from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1)
df['location'] = df['name'].apply(geocode)

df['point'] = df['location'].apply(lambda loc: tuple(loc.point) if loc else None)

In [None]:
geolocator.reverse(-37.81, 144.96)

In [None]:
import plotly.express as px
#df = px.data.gapminder().query("year == 2007")
fig = px.scatter_geo(test_df,
                     lat='latitude',
                     lon='longitude',
                     color="festival_flag", # which column to use to set the color of markers
                     hover_name="city", # column added to hover information
                     #size="lineup_size", # size of markers
                     projection="natural earth")
fig.update_geos(
    visible=False,
    resolution=50,
    #scope="north america",
    showcountries=True, countrycolor="Black"
    #,showsubunits=True, subunitcolor="Blue"
)
fig.show()

In [None]:
import plotly.graph_objects as go

fig = go.Figure(go.Scattergeo(
    test_df,
    lat='latitude',
    lon='longitude',
    color="festival_flag", # which column to use to set the color of markers
    hover_name="city", # column added to hover information
    #size="lineup_size", # size of markers
    projection="natural earth")
)
fig.update_geos(
    visible=False, resolution=110, scope="usa",
    showcountries=True, countrycolor="Black",
    showsubunits=True, subunitcolor="lightGrey"
)
fig.update_layout(height=300, margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
import pandas as pd
us_cities = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/us-cities-top-1k.csv")
us_cities.head()

import plotly.express as px

fig = px.scatter_mapbox(us_cities, lat="lat", lon="lon", hover_name="City", hover_data=["State", "Population"],
                        color_discrete_sequence=["fuchsia"], zoom=3, height=300)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
import plotly.express as px

fig = px.scatter_mapbox(test_df, lat="latitude", lon="longitude", hover_name="location",
                        color_discrete_sequence=["fuchsia"], zoom=3, height=300)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
ej = r.json()
ej[2]

In [None]:
len(event_json)

In [None]:
#r.text

In [None]:
len(ej[0])

In [None]:
ej[0].keys()

In [None]:
list(ej[0].keys())

In [None]:
type(ej[0])

In [None]:
ej[0].values()

In [None]:
v = ej[0].get('venue')
print(v)
type(v)

In [None]:
v.get('country')

In [None]:
for key in list(ej[0].keys()):
    print(key, " : \n", ej[0].get(key), "\n")