# DSE 241 - Final Project
# Rock & Fucking Roll

## Requirements

In [1]:
import requests
import pandas as pd
import numpy as np

## Functions

In [2]:
def read_api_key(file_name):
    f = open(file_name, 'r')
    key = f.read()
    f.close()
    return key

def getgenre_artist(string):
    ## lower case
    ## replace " " with "_"
    artist = string.replace(' ','_').lower()
    return artist

def bands_artist(string):
    ## url-ify artist names
    ## replaces " " with "%20"
    artist = string.replace(' ',r'%20')
    return artist
    
    
def parse_events(event_json):
    ### defined to help parse the JSON of individual events returned by Bandintown API ###
    ### this will pull out the keys and values for the event object ###
    ### the end result is a list that can be entered as a row to a Pandas dataframe ###
    
    ### TODO: determine if the output is better in Pandas or Numpy ###
    ### TODO: define event_json object ###
    
    events_data = []
    num_of_events = len(event_json) # event json has an entry for each event ## each event has lots of other nested data

    keys_of_interest = [
            'datetime' # date-time of the event
            , 'title'  # name of the event  ## may need to create a condition to check if exists and set to null if missing
            , 'lineup' # list of strings containing names of artists  ## may leave as a nested list in data frame to avoid sparseness  ### list may be ordered in terms of headliners (??)
            , 'festival_start_date' # date festival starts  ### may be a useful indicator that the artist is performing at a festival-event, need to test this for reliability
            , 'festival_end_date'   # date festival ends, will differ from start date on multi-day events (may not exist on single day events)
            , 'venue'  # this is a nested dictionary, will need to tease this one out to flatten the data
    ]

    venue_keys_of_interest = [
             'city'        # city name, string
            , 'region'     # state-level, string
            , 'country'    # country name, string
            , 'latitude'   # coordinate data, float
            , 'longitude'  # coordinate data, float
            , 'location'   # arbitrary string describing geolocation  ## consider not including
            , 'name'       # arbitrary string describing venue name, could be misleading since some venues are at locations, but given festival name  ## consider not including
    ]

    for i in range(num_of_events):
        event = event_json[i]
        event_list = []

        for key in keys_of_interest:
            value = event.get(key)
            if key == 'venue':
                for venue_key in venue_keys_of_interest:
                    venue_value = value.get(venue_key)
                    event_list.append(venue_value)
            else:
                event_list.append(value)

        events_data.append(event_list)


    cols = keys_of_interest[:-1] + venue_keys_of_interest
    events_df = pd.DataFrame(events_data, columns=cols)
    events_df['artist'] = artist
    return events_df

def get_locations(df, latitude_column='latitude', longitude_column='longitude'):
    new_df = df.copy()
    geolocator = Nominatim(user_agent="rg_agent")
    reverse_geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1)
    for index, row in new_df.iterrows():
        lookup = reverse_geocode((row[latitude_column],row[longitude_column]), language='en')
        try:
            new_df.loc[index,'city'] = lookup.raw['address']['city']
        except:
            continue
        try:
            new_df.loc[index,'region'] = lookup.raw['address']['state']
        except:
            continue
        try:
            new_df.loc[index,'country'] = lookup.raw['address']['country']
        except:
            continue

    return new_df

## Key Inputs

In [3]:
artist = 'Tame Impala'
artist = 'Kerala Dust'

bands_key = read_api_key('bands_api_key.txt') ## shhhh... its a secret

analysis_level = 1
gg_artist = getgenre_artist(artist)
bands_artist = bands_artist(artist)
getgenre_api_url = r'https://api.getgenre.com/search?artist_name={}&analysis={}'.format(gg_artist, analysis_level)
bands_api_url = r'https://rest.bandsintown.com/artists/{}/events/?app_id={}'.format(bands_artist, bands_key)
bands_api_url_past = r'https://rest.bandsintown.com/artists/{}/events/?app_id={}&date=past'.format(bands_artist, bands_key)

In [4]:
gg_artist

'kerala_dust'

In [5]:
bands_artist

'Kerala%20Dust'

## Working Code
### Let's Go

In [6]:
# data extract
r = requests.get(bands_api_url_past)
event_json = r.json()

test_df = parse_events(event_json)
print(test_df.shape)
test_df.head()

(69, 13)


Unnamed: 0,datetime,title,lineup,festival_start_date,festival_end_date,city,region,country,latitude,longitude,location,name,artist
0,2016-12-03T21:00:00,,"[Kerala Dust, Sentiment, Dwig, Kleintierschauk...",,,Zurich,,Switzerland,47.38151,8.53688,,Kauz,Kerala Dust
1,2017-01-20T23:59:00,,"[Kerala Dust, Harri, Kiosk, Constantijn Lange,...",,,Berlin,,Germany,52.511612,13.42687,,Kater Blau,Kerala Dust
2,2017-04-22T23:00:00,,"[Kerala Dust, Monolink, Jacob Groening, Just E...",,,Hamburg,,Germany,53.551802,9.958209,,Gruenspan,Kerala Dust
3,2017-04-23T12:00:00,,"[Kerala Dust, Florian Rietze, Kiosk, Luca Must...",,,Berlin,,Germany,52.511612,13.42687,,Kater Blau,Kerala Dust
4,2017-07-22T18:00:00,,"[Kerala Dust, Polo, Sainte Vie, Hyenah, Pan, J...",,,Paris,,France,48.8741,2.26298,,Les Pavillons des Etangs,Kerala Dust


In [7]:
# data extract
r_gg = requests.get(getgenre_api_url)

genre_json = r_gg.json()
top_gg = genre_json['analysis']['top_genres']

In [8]:
# feature extraction
test_df['artist'] = artist
test_df['artist_topgenres'] = [top_gg for _ in range(len(test_df))]


In [9]:
# feature extraction
test_df['lineup_size'] = test_df['lineup'].str.len()
test_df['festival_flag'] = np.where(test_df['lineup'].str.len() > 5, 'Festival', 'Concert')
test_df['festival_flag'] = test_df['festival_flag'].astype(str)

In [10]:
# data cleanup
print(test_df.shape)
test_df = test_df[(test_df['latitude'].notna()) & (test_df['longitude'].notna())]
print(test_df.shape)
test_df['latitude'] = pd.to_numeric(test_df['latitude'])
test_df['longitude'] = pd.to_numeric((test_df['longitude']))

test_df['datetime'] = test_df['datetime'].str[:10]

(69, 16)
(65, 16)


In [11]:
# data cleanup
#test_df = get_locations(test_df)  ## disabling for now due to run time

In [12]:
# test_df[['country','region','city','location']].value_counts()

In [13]:
test_df.head()

Unnamed: 0,datetime,title,lineup,festival_start_date,festival_end_date,city,region,country,latitude,longitude,location,name,artist,artist_topgenres,lineup_size,festival_flag
0,2016-12-03,,"[Kerala Dust, Sentiment, Dwig, Kleintierschauk...",,,Zurich,,Switzerland,47.38151,8.53688,,Kauz,Kerala Dust,[house],6,Festival
1,2017-01-20,,"[Kerala Dust, Harri, Kiosk, Constantijn Lange,...",,,Berlin,,Germany,52.511612,13.42687,,Kater Blau,Kerala Dust,[house],8,Festival
2,2017-04-22,,"[Kerala Dust, Monolink, Jacob Groening, Just E...",,,Hamburg,,Germany,53.551802,9.958209,,Gruenspan,Kerala Dust,[house],4,Concert
3,2017-04-23,,"[Kerala Dust, Florian Rietze, Kiosk, Luca Must...",,,Berlin,,Germany,52.511612,13.42687,,Kater Blau,Kerala Dust,[house],10,Festival
4,2017-07-22,,"[Kerala Dust, Polo, Sainte Vie, Hyenah, Pan, J...",,,Paris,,France,48.8741,2.26298,,Les Pavillons des Etangs,Kerala Dust,[house],13,Festival


In [17]:
from bokeh.plotting import figure, output_file, show
from bokeh.tile_providers import get_provider, OSM #CARTODBPOSITRON_RETINA, WIKIMEDIA, ESRI_IMAGERY, STAMEN_TONER,
from bokeh.models import ColumnDataSource
from bokeh.layouts import layout
from bokeh.models import DateSlider
from pyproj import Transformer
#import xyzservices.providers as xyz

# data transformation
in_crs = 'EPSG:3857'   # coordinates provided in EPSG:4326 format
out_crs = 'EPSG:4326'   # coordinates output in EPSG 3857 (Web Mercator) format

transformer = Transformer.from_crs(in_crs, out_crs, always_xy=True)

lons, lats = [], []
for lon, lat in list(zip(test_df['longitude'], test_df['latitude'])):
    x, y = transformer.transform(lon,lat)
    lons.append(x)
    lats.append(y)

test_df['MercatorX'] = lons
test_df['MercatorY'] = lats


# visualization
output_file("tile.html")

tile_provider = get_provider(OSM)
tools = ['pan', 'zoom_in', 'zoom_out', 'wheel_zoom', 'box_zoom', 'lasso_select', 'tap', 'hover', 'reset', 'save']

tooltips = [
    ("Date", '@datetime'),
    ('Location: ', '@city, @region'),
    ('Country: ', '@country'),
    ('Event Type: ', '@festival_flag'),
    ('Lineup: ', '@lineup'),
]
source=ColumnDataSource(test_df)
# range bounds supplied in web mercator coordinates
p = figure(x_range=(-18000000, 20000000), y_range=(-7500000, 11500000),
           #x_axis_type='mercator', y_axis_type='mercator',
           height=700, width=1500,
           tools=tools, tooltips=tooltips, active_scroll='wheel_zoom')
p.add_tile(tile_provider)
p.circle(x='MercatorX', y='MercatorY', size=6, fill_color='dodgerblue', line_color='dodgerblue', fill_alpha=.3, source=source)



date_slider = DateSlider(
    title=" Adjust Date range",
    start=min(test_df['datetime']),
    end=max(test_df['datetime']),
    step=100,
    value=(min(test_df['datetime'])),
)

date_slider.js_link("value", p.x_range, "start", attr_selector=0)
date_slider.js_link("value", p.x_range, "end", attr_selector=1)

def update_plot(attr, old, new):
    datesel = datetime.fromtimestamp(new / 1000).strftime('%Y-%m-%d')
    new_data = test_df[test_df['date'] == datesel]
    source.data.update(ColumnDataSource(new_data).data)
date_slider.on_change('value', update_plot)
layout = layout([date_slider], [p])
show(layout)
#show(p)

You are generating standalone HTML/JS output, but trying to use real Python
callbacks (i.e. with on_change or on_event). This combination cannot work.

Only JavaScript callbacks may be used with standalone output. For more
information on JavaScript callbacks with Bokeh, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/interaction/callbacks.html

Alternatively, to use real Python callbacks, a Bokeh server application may
be used. For more information on building and running Bokeh applications, see:

    https://docs.bokeh.org/en/latest/docs/user_guide/server.html



In [None]:
import pyproj
pyproj.datadir.get_data_dir()

In [None]:
p.x_range.start

## Messy Workspace -- May not Run

In [15]:
from pyproj import Transformer

in_crs = "EPSG:4326"   # coordinates provided in EPSG:4326 format
out_crs = "EPSG:3857"   # coordinates output in EPSG 3857 (Web Mercator) format

transformer = Transformer.from_crs(in_crs, out_crs, always_xy=True)

world_lon1, world_lat1 = transformer.transform(-180,-85)
world_lon2, world_lat2 = transformer.transform(180,85)

starbucks = pd.read_csv("directory.csv")
starbucks.head()
starbucks_us = starbucks[starbucks.Country == "US"].copy()

lons, lats = [], []
for lon, lat in list(zip(starbucks_us["Longitude"], starbucks_us["Latitude"])):
    x, y = transformer.transform(lon,lat)
    lons.append(x)
    lats.append(y)

starbucks_us["MercatorX"] = lons
starbucks_us["MercatorY"] = lats

starbucks_us = starbucks_us.rename(columns={"Store Name":"Name", "State/Province":"State"})
starbucks_us.head()

FileNotFoundError: [Errno 2] No such file or directory: 'directory.csv'

In [None]:
## TODO add interactive buttons to switch between artists / other slices
## https://towardsdatascience.com/visualization-with-plotly-express-comprehensive-guide-eb5ee4b50b57#f38e
## https://plotly.com/python/dropdowns/

In [None]:
# visualize data
import plotly.express as px
px.set_mapbox_access_token(open("mapbox_token.txt").read())
fig = px.scatter_mapbox(test_df,
                     lat='latitude',
                     lon='longitude',
                     color="festival_flag", # which column to use to set the color of markers
                     color_discrete_sequence=px.colors.qualitative.Vivid,

                     hover_name="city", # column added to hover information
                     hover_data={'datetime': True,
                                 'festival_flag': True,
                                 'latitude': False,
                                 'longitude': False},
                     labels={'festival_flag': 'Event Type',
                             'datetime': 'Date'},


                     #size="lineup_size", # size of markers
                     #projection="natural earth"
                     mapbox_style="streets",
                     zoom=1
                    )

# fig.update_layout(
#     mapbox_style="streets",
#     # mapbox_layers=[
#     #     {
#     #         "below": 'traces',
#     #         "sourcetype": "raster",
#     #         "sourceattribution": "United States Geological Survey",
#     #         "source": [
#     #             "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"
#     #         ]
#     #     }
#     #   ]
# )
#fig.update_traces(hovertemplate= "%{label}: <br>Popularity: %{value} </br> %{text}")

# fig.update_layout(
#     margin={"r":0,"t":0,"l":0,"b":0},
#     updatemenus=[
#         dict(
#             buttons=list([
#                 dict(
#                     args=["marker.color",["red"]],
#                     label='Red',
#                     method='restyle',
#                     visible=True
#                 )
#             ]),
#             direction="down",
#             pad={'r': 10, 't': 10},
#             #showactive=True
#         )
#     ]
# )

fig.update_layout(
    margin={"r":0,"t":0,"l":0,"b":0},
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=[{'customdata':[['2024-04-19', 'Festival', 33.6784492, -116.237155]]}],
                    label='Custom',
                    method='restyle',
                    visible=True
                )
            ]),
            direction="down",
            pad={'r': 10, 't': 10},
            #showactive=True
        )
    ]
)

fig.show()

In [None]:
fig.to_dict()

In [None]:
fig.data

In [None]:
import reverse_geocode as rg
coordinates = (-37.81, 144.96), (31.76, 35.21)
loc = rg.search(coordinates)

In [None]:
from functools import partial
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="rg_agent")

geocode = partial(geolocator.geocode, language="es")
print(geocode("london"))

print(geocode("paris"))

print(geocode("paris", language="en"))


reverse = partial(geolocator.reverse, language="es")
print(reverse("52.509669, 13.376294"))


In [None]:
from geopy.extra.rate_limiter import RateLimiter
gr = RateLimiter(geolocator.reverse, min_delay_seconds=1)

x = gr((-37.81, 144.96),)
dir(x)

In [None]:
x

In [None]:
x.raw['address']['city']

In [None]:
import plotly.express as px
#df = px.data.gapminder().query("year == 2007")
fig = px.scatter_geo(test_df,
                     lat='latitude',
                     lon='longitude',
                     color="festival_flag", # which column to use to set the color of markers
                     hover_name="city", # column added to hover information
                     #size="lineup_size", # size of markers
                     projection="natural earth")
fig.update_geos(
    visible=False,
    resolution=50,
    #scope="north america",
    showcountries=True, countrycolor="Black"
    #,showsubunits=True, subunitcolor="Blue"
)
fig.show()

In [None]:
x_df = test_df[:10].copy()
tt = get_location(x_df, 'latitude', 'longitude')
tt.head(10)

In [None]:
test_df.head(10)

In [None]:
test_df['city'] = test_df[['latitude', 'longitude']]

In [None]:
from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1)
df['location'] = df['name'].apply(geocode)

df['point'] = df['location'].apply(lambda loc: tuple(loc.point) if loc else None)

In [None]:
geolocator.reverse(-37.81, 144.96)

In [None]:
import plotly.graph_objects as go

fig = go.Figure(go.Scattergeo(
    test_df,
    lat='latitude',
    lon='longitude',
    color="festival_flag", # which column to use to set the color of markers
    hover_name="city", # column added to hover information
    #size="lineup_size", # size of markers
    projection="natural earth")
)
fig.update_geos(
    visible=False, resolution=110, scope="usa",
    showcountries=True, countrycolor="Black",
    showsubunits=True, subunitcolor="lightGrey"
)
fig.update_layout(height=300, margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
import pandas as pd
us_cities = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/us-cities-top-1k.csv")
us_cities.head()

import plotly.express as px

fig = px.scatter_mapbox(us_cities, lat="lat", lon="lon", hover_name="City", hover_data=["State", "Population"],
                        color_discrete_sequence=["fuchsia"], zoom=3, height=300)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
import plotly.express as px

fig = px.scatter_mapbox(test_df, lat="latitude", lon="longitude", hover_name="location",
                        color_discrete_sequence=["fuchsia"], zoom=3, height=300)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
ej = r.json()
ej[2]

In [None]:
len(event_json)

In [None]:
#r.text

In [None]:
len(ej[0])

In [None]:
ej[0].keys()

In [None]:
list(ej[0].keys())

In [None]:
type(ej[0])

In [None]:
ej[0].values()

In [None]:
v = ej[0].get('venue')
print(v)
type(v)

In [None]:
v.get('country')

In [None]:
for key in list(ej[0].keys()):
    print(key, " : \n", ej[0].get(key), "\n")