In [1]:
import os

In [2]:
import pandas as pd

In [3]:
import folium

In [4]:
import geopandas as gpd

In [5]:
import webbrowser

In [6]:
import geojson

In [7]:
import json

In [8]:
from IPython.display import display

In [9]:
class Visualise:
    '''
    Class representing processed data from previous parts with attributes and methods to make geo-visualisations.
    '''
    def __init__(self, allowLog = True):
        '''
        Initilization of Downloader object. Storing objects from the webpage with self.qwe, self.rty and self.uio
        '''
        self.allowLog = allowLog
        if self.allowLog:
            print('Object of data initialized.')    
            
    def countriesPolygon(self, geoDf):
        '''
        Pandas GeoDataframe containing the (multi-)polygons of countries.
        '''
        self.geoDf = geoDf
    
    def foliumMap(self, m = folium.Map(location=[51.05, 5.86],zoom_start=3)):
        '''
        Map from library Folium.
        '''
        self.m = m         
    
    def countriesDict(self, dct):
        '''
        Dictionary for different naming conventions of countries in the World
        '''
        self.dct = {}
        for name, name2 in zip(dct['NAME'], dct['GEONAME']):
            self.dct[name] = name2
        #return self.dct
        # now the special care for a country with symbol ' in its name
        self.dct['Cote D\'Ivoire'] = 'Ivory Coast'
        self.data = self.data.replace(dct)
        
    def loadTheData(self, data, whatData = 'GeoTopArtist'):
        '''
        Loader of the data. First argument is the data provided, second is specification about what type of data it is.
        '''
        self.data = data
        self.whatData = whatData
        if whatData == 'GeoTopArtist':
            pass
        elif whatData == 'GeoTopTracks':
            pass
        elif whatData == 'ArtistInfo':
            pass
        else:
            return(print('I cannot recognize the type of data you want to pass. Select please from one of the following: \n\"GeoTopArtist\" \n\"GeoTopTracks\" \n\"ArtistInfo\" \nThank you very much!'))
        
    def showMeTopN(self, N = 10, whichOnes = 'Artists' ):
        '''
        Method which computes the top N artists for all countries contained in the scraped data.
        '''
        boundary = self.data['Rank'] <= N
        self.data = self.data.replace(self.dct)
        self.cols = ['Top ' + str(i+1) for i in range(N)]
        self.whichOnes = whichOnes
        if whichOnes == 'Artists':
            for i in range(N):
                self.geoDf['Top '+ str(i+1)] = str(None)
            self.topNArtists = self.data[boundary]
            for cntry in self.topNArtists['Country']:
                for i in range(N):
                    try:
                        self.geoDf['Top ' + str(i+1)][self.geoDf['ADMIN']==cntry] = self.topNArtists[self.topNArtists['Country']==cntry][self.topNArtists['Rank']==i+1]['Artist'].item()
                    except:
                        pass
            return self.geoDf                               
        elif whichOnes == 'Tracks':
            for i in range(N):
                self.geoDf['Top '+ str(i+1)] = str(None)
            self.topNTracks = self.data[boundary]    
            for cntry in self.topNTracks['Country']:
                for i in range(N):
                    try:
                        self.geoDf['Top ' + str(i+1)][self.geoDf['ADMIN']==cntry] = self.topNTracks[self.topNTracks['Country']==cntry][self.topNTracks['Rank']==i+1]['ArtistTrack'].item() 
                    except:
                        pass
            return self.geoDf
        else:
            return(print('I cannot show you what you want..probably. Select please from one of the following: \n"Artists" \n"Tracks" \nThank you very much!'))
    
    def saveAndShowMap(self, m, df):
        '''
        Method to save and show the map of top artists or tracks. The map is saved as 'plotTheMap.html' file which is shown in a new window.
        '''
        folium.GeoJson(
                df[['ADMIN', 'geometry'] + self.cols].to_json(),
                show=True,
                tooltip=folium.features.GeoJsonTooltip(
                fields=['ADMIN'] + self.cols,
                aliases=['Country'] + self.cols)
            ).add_to(m)
        if self.whichOnes == 'Tracks':
            self.m.save('plotTheTrackMap.html')
            webbrowser.open_new('plotTheTrackMap.html')
        else:
            self.m.save('plotTheArtistMap.html')
            webbrowser.open_new('plotTheArtistMap.html')

In [10]:
with open('countries.geojson', 'r') as f:
    data = geojson.load(f)

In [11]:
with open('countries_cut_two.csv', 'r') as c:
    cntrsDict = pd.read_csv(c, sep = ';')

In [26]:
topArtists = pd.read_csv('geo_top_artists_101.csv', sep = ',')
topArtists = topArtists.drop(columns = ['Unnamed: 0'])
topArtists.head()

Unnamed: 0,CountryId,Country,Rank,Artist,ArtistId,Listeners
0,1,Taiwan,1.0,Coldplay,cc197bad-dc9c-440d-a5b5-d52ba2e14234,5487165.0
1,1,Taiwan,2.0,Adele,cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493,2939450.0
2,1,Taiwan,3.0,Ed Sheeran,b8a7c51f-362c-4dcb-a259-bc6e0095f0a6,1771082.0
3,1,Taiwan,4.0,Sia,2f548675-008d-4332-876c-108b0c7ab9c5,2214653.0
4,1,Taiwan,5.0,Maroon 5,0ab49580-c84f-44d4-875f-d83760ea2cfe,3609760.0


In [13]:
artistViz = Visualise()
artistViz.foliumMap()
artistViz.countriesPolygon(gpd.GeoDataFrame.from_features(data))
artistViz.loadTheData(topArtists) 
artistViz.countriesDict(cntrsDict)

Object of data initialized.


Unfortunately the map cannot be displayed in Jupyter notebook, but with help of webbrowser library, we can save the output as html file and open it in a new window.

In [14]:
artistViz.showMeTopN(N = 5, whichOnes = 'Artists')
artistViz.saveAndShowMap(artistViz.m,artistViz.geoDf)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [28]:
topTracks = pd.read_csv('geo_top_tracks_101.csv', sep = ',')
topTracks = topTracks.drop(columns = ['Unnamed: 0'])
topTracks['ArtistTrack'] = topTracks['Artist'] + ' - ' + topTracks['Track']
topTracks.head()

Unnamed: 0,CountryId,Country,Rank,Track,Duration,Artist,ArtistId,ArtistTrack
0,1,Taiwan,1.0,Hello,0.0,Adele,cc2c9c3c-b7bc-4b8b-84d8-4fbd8779e493,Adele - Hello
1,1,Taiwan,2.0,Photograph,258.0,Ed Sheeran,b8a7c51f-362c-4dcb-a259-bc6e0095f0a6,Ed Sheeran - Photograph
2,1,Taiwan,3.0,Thinking Out Loud,280.0,Ed Sheeran,b8a7c51f-362c-4dcb-a259-bc6e0095f0a6,Ed Sheeran - Thinking Out Loud
3,1,Taiwan,4.0,Shape of You,0.0,Ed Sheeran,b8a7c51f-362c-4dcb-a259-bc6e0095f0a6,Ed Sheeran - Shape of You
4,1,Taiwan,5.0,The Scientist,309.0,Coldplay,cc197bad-dc9c-440d-a5b5-d52ba2e14234,Coldplay - The Scientist


In [16]:
tracksViz = Visualise()
tracksViz.foliumMap()
tracksViz.countriesPolygon(gpd.GeoDataFrame.from_features(data))
tracksViz.loadTheData(topTracks) 
tracksViz.countriesDict(cntrsDict)

Object of data initialized.


Similarily as above: the map cannot be displayed in Jupyter notebook, but with help of webbrowser library, we can save the output as html file and open it in a new window.

In [17]:
tracksViz.showMeTopN(N = 5, whichOnes = 'Tracks')
tracksViz.saveAndShowMap(tracksViz.m,tracksViz.geoDf)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


---