In [1]:
from apiKey import lastFMKey
from apiKey import lastFMSecret

#import needed libraries
import pandas as pd
import json
import numpy
import requests as req
import pylast
import time
import iso3166 as iso
import csv

# print(lastFMKey)
# print(lastFMSecret)

#### Declare variables

In [2]:
country = iso.Country
country

iso3166.Country

In [3]:
inputPath = "../Input/"
artistCSV = f"{inputPath}artist.csv"
countriesArtistCSV = f"{inputPath}countriesTopArtist.csv"
countryCsv = f"{inputPath}countries.csv"
countriesArtistNotFound = f"{inputPath}countriesArtistNotFound.txt"


#### Def methods for calling and displaying

In [4]:
def lastfm_get(payload):
    # define headers and URL
    headers = {'user-agent': 'bootcamp'}
    url = 'https://ws.audioscrobbler.com/2.0/'

    # Add API key and format to the payload
    payload['api_key'] = lastFMKey
    payload['format'] = 'json'

    response = req.get(url, headers=headers, params=payload)
    return response

In [5]:
def jprint(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)

#### Country collection and export to csv

In [6]:
#parameters for writing countries to csv
params = open(countryCsv, "w")
countries = iso.countries_by_name
countries.pop('UNITED STATES OF AMERICA', None)

#after data checking the following are not in lastfm in the iso3166 standard as the documentation states
countries["UNITED STATES"] = ("UNITED STATES")
countries["UNITED KINGDOM"] = ("UNITED KINGDOM")
#countries #sanity check

#write countries to csv
csvwriter = csv.writer(params)

for country in countries.items():
    csvwriter.writerow(country)

#### Loop through countries and collect top 50 per country

#### Get list of top artist per country and add to dataframe

In [None]:
iterator = 0
header = True
mode = 'w'

for country in countries:
    
    iterator += 1
    if (iterator > 1):
        header = False
        mode = 'a'
        
    #lastFm call to get top artists for each country
    responseTopArt = lastfm_get({
        'method':'geo.getTopArtists',
        'country':country,
        'limit':'50'
    })
    r = responseTopArt.json()
    
    try:
        #print(f"Looking to top artists for {country}.  Mode: {mode}. Iterator:{iterator}")
        artistDF = pd.json_normalize(r,[["topartists","artist"]])
        artistDF['country'] = country
              
        with open(counrtyTopArtistCSV, mode) as f:
            artistDF.to_csv(f, header=header)
            
    except:
        print(f"No artists found for {country}")
    #throttle api calls by 1 sec.  
    time.sleep(1)
    
# jprint(r.json())

#### Get Artist information and save to CSV.

In [13]:
iterator = 0
header = True
mode = 'w'

artistDF = pd.read_csv(counrtyTopArtistCSV)
artistDedupeDF = artistDF['name'].drop_duplicates()  #cleaning duplicate artists

for artist in artistDedupeDF:
    
    iterator += 1
    if (iterator > 1):
        header = False
        mode = 'a'
    
    #call lastfm to get data
    responseArtist = lastfm_get({
        'method':'artist.getInfo',
        'artist': artist
    })

    a = responseArtist.json()
    
    #audit logging
    print(f"Looking to top artists for {artist}.  Mode: {mode}. Iterator:{iterator}")

    #takes json and flattens into the needed columns and converts to dataframe
    artistDF = pd.json_normalize(a['artist'])

    #explodes tags.tag - takes the json value in the column and breaks them into multiple records that represent each value in the json
    explodeTags = artistDF[["name","stats.listeners","stats.playcount","streamable","tags.tag"]].explode("tags.tag")

    finalArtDF = (pd.concat({i: pd.json_normalize(x) for i, x in explodeTags.pop('tags.tag').items()})
               .reset_index(level=1, drop=True)
               .join(explodeTags,lsuffix="_genre")
               .reset_index(drop=True)
          )
    
    #cleaning duplicate artists and removing unneeded columns
    dedupedDF = finalArtDF.drop_duplicates()  
    del dedupedDF['url']
    
    #write dataset to csv
    with open(artistCSV, mode) as f:
        dedupedDF.to_csv(f, header=header)

# # data = json.loads()
# jprint(r.json())

Looking to top artists for The xx.  Mode: w. Iterator:1
Looking to top artists for The Beatles.  Mode: a. Iterator:2
Looking to top artists for Radiohead.  Mode: a. Iterator:3
Looking to top artists for Lana Del Rey.  Mode: a. Iterator:4
Looking to top artists for Pink Floyd.  Mode: a. Iterator:5
Looking to top artists for Metallica.  Mode: a. Iterator:6
Looking to top artists for Drake.  Mode: a. Iterator:7
Looking to top artists for Nirvana.  Mode: a. Iterator:8
Looking to top artists for Katy Perry.  Mode: a. Iterator:9
Looking to top artists for Kendrick Lamar.  Mode: a. Iterator:10
Looking to top artists for Led Zeppelin.  Mode: a. Iterator:11
Looking to top artists for blink-182.  Mode: a. Iterator:12
Looking to top artists for Depeche Mode.  Mode: a. Iterator:13
Looking to top artists for The Rolling Stones.  Mode: a. Iterator:14
Looking to top artists for Oasis.  Mode: a. Iterator:15
Looking to top artists for Michael Jackson.  Mode: a. Iterator:16
Looking to top artists for Ma

In [243]:
#sample csv read
artistCSVDF = pd.read_csv(artistCSV)
artistCSVDF

Unnamed: 0.1,Unnamed: 0,name_genre,url,name,stats.listeners,stats.playcount,streamable
0,0,seen live,https://www.last.fm/tag/seen+live,The xx,2090127,150713935,0
1,1,seen live,https://www.last.fm/tag/seen+live,The xx,2090127,150713935,0
2,2,seen live,https://www.last.fm/tag/seen+live,The xx,2090127,150713935,0
3,3,seen live,https://www.last.fm/tag/seen+live,The xx,2090127,150713935,0
4,4,seen live,https://www.last.fm/tag/seen+live,The xx,2090127,150713935,0
...,...,...,...,...,...,...,...
4205,0,synth pop,https://www.last.fm/tag/synth+pop,Empire of the Sun,1481105,33091660,0
4206,1,synth pop,https://www.last.fm/tag/synth+pop,Empire of the Sun,1481105,33091660,0
4207,2,synth pop,https://www.last.fm/tag/synth+pop,Empire of the Sun,1481105,33091660,0
4208,3,synth pop,https://www.last.fm/tag/synth+pop,Empire of the Sun,1481105,33091660,0


In [None]:
countryArtistDF = pd.read_csv(countriesArtistCSV)
countryFilterDF = pd.merge(countryArtistDF, artistDF, on="name")

groupedDF = countryFilterDF.groupby('name').agg(**{"Country Counts":("country", "count")})
groupedDF.reset_index(inplace=True)
groupedDF = groupedDF.rename(columns = {'name':'Artist Name'})

countryCountSorted = groupedDF.sort_values('Country Counts', ascending=False).head(20)
countryCountSorted

In [None]:
header = True
mode = 'w'

countryCoordDF = pd.read_csv(coordsCSV)

topGenresCountries = countryFilterDF.groupby(['country','name_genre']).agg(**{"total listeners":("listeners", "sum")}).reset_index()
topGenresSorted = topGenresCountries.set_index(['name_genre']).groupby('country')['total listeners'].nlargest(5).reset_index()

topGenreCoords = pd.merge(topGenresSorted, countryCoordDF, on="country")

with open(countriesTopGenres, mode) as f:
    topGenreCoords.to_csv(f, header=header)