In [1]:
import pandas as pd

In [68]:
class Analyzer:
    '''
    Class representing analyzed data attained from previous parts with attributes and methods used for analysis.
    '''
    def __init__(self, allowLog = True):
        '''
        Initilization of Downloader object. Storing objects from the webpage with self.qwe, self.rty and self.uio
        '''
        self.allowLog = allowLog
        if self.allowLog:
            print('Analyzer initialized.')    
    
    def dataLoader(self, df):
        '''
        Loads the processed data from previous parts.
        '''
        self.df = df
    
    def showMeTheTopArtists(self, N, sortBy  = 'Scrobbles'):
        '''
        Shows the most N scrobbled artist, or the artist with the highest number of listeners.
        '''
        if sortBy == 'Listeners':
            return self.df.sort_values(by = ['Listeners'], ascending = False)[['Artist', 'Listeners', 'Scrobbles']].head(N)
        elif sortBy == 'Scrobbles':
            return self.df.sort_values(by = ['Scrobbles'], ascending = False)[['Artist', 'Listeners', 'Scrobbles']].head(N)
        elif sortBy=='SpL':
                self.df['Scrobbles per Listener'] = self.df['Scrobbles'] / self.df['Listeners']
                return self.df.sort_values(by = ['Scrobbles per Listener'], ascending = False)[['Artist', 'Listeners', 'Scrobbles', 'Scrobbles per Listener']].head(N)
        else:
            return print('Unfortunately couldn\'t sorted the dataframe as you wish. Please choose one of the following:\n"Scrobbles"\n"Listeners"\n"SpL"\nThank you!')
        
            
        

In [69]:
df = pd.read_csv('artist_info.csv', sep = ',')
df = df.drop(columns = ['Unnamed: 0'], axis = 1)
newA = Analyzer()
newA.dataLoader(df)

Analyzer initialized.


## Some of the basic statistics and interesting results

We firstly plot the chart of the top 20 artist based on the scrobbles (i.e. number of played songs from the artists by users of Last.fm).

In [70]:
newA.showMeTheTopArtists(20)

Unnamed: 0,Artist,Listeners,Scrobbles
897,The Beatles,3783733,537207098
759,Radiohead,4821534,515452255
220,Coldplay,5487151,370302801
650,Muse,4153641,353109565
66,Arctic Monkeys,3594730,345820358
736,Pink Floyd,3167698,324401009
555,Linkin Park,4054402,303649011
768,Red Hot Chili Peppers,4704723,302251011
534,Lady Gaga,3925363,297780074
619,Metallica,2960691,288974324


We follow by top 20 artists sorted by number of listeners (i.e. the number of users of Last.fm, who had at least once listened to a song from this artist).

In [71]:
newA.showMeTheTopArtists(N = 20, sortBy = 'Listeners')

Unnamed: 0,Artist,Listeners,Scrobbles
220,Coldplay,5487151,370302801
759,Radiohead,4821534,515452255
768,Red Hot Chili Peppers,4704723,302251011
775,Rihanna,4675544,208837032
306,Eminem,4624847,209443855
496,Kanye West,4524008,273346082
922,The Killers,4510073,216418725
682,Nirvana,4362266,229721764
650,Muse,4153641,353109565
752,Queen,4139663,203080438


Motivated by some differences we are looking for the bands with the most "devoted" fans, i.e. we plot the top 20 artists with highest scrobbles per listener ratio.

In [72]:
newA.showMeTheTopArtists(N = 20, sortBy = 'SpL')

Unnamed: 0,Artist,Listeners,Scrobbles,Scrobbles per Listener
85,BTS,313537,203482762,648.991226
324,Exo,156865,36331408,231.609397
1061,サカナクション,50651,8063977,159.206669
1071,星野源,14820,2254793,152.145277
897,The Beatles,3783733,537207098,141.978067
730,Perfume,184314,25256200,137.028115
411,Horkýže Slíže,30794,3965752,128.783269
1078,相対性理論,43445,5531864,127.33028
538,Lana Del Rey,2017109,249125270,123.506102
691,ONE OK ROCK,163473,19898992,121.726475


---

## Debugging and trying

In [12]:
df = pd.read_csv('artist_info.csv', sep = ',')
df = df.drop(columns = ['Unnamed: 0'], axis = 1)
df.head()

Unnamed: 0,ArtistId,Artist,Listeners,Scrobbles,Tag1,Tag2,Tag3,Tag4,Tag5
0,603ba565-3967-4be1-931e-9cb945394e86,*NSYNC,1000244,13392103,pop,boybands,90s,dance,seen live
1,f37c537b-3557-4031-bfd6-ab63ced32854,10cc,757032,6539113,classic rock,rock,70s,pop,british
2,dff0d392-4cd5-4052-9fbb-f485df3891e5,2 Chainz,689853,10755122,Hip-Hop,rap,Dirty South,trap,southern rap
3,02628cd8-ca14-4545-a0b4-96020b54692b,20syl,77619,892733,Hip-Hop,french,electronic,turntablism,hip hop
4,382f1005-e9ab-4684-afd4-0bdae4ee37f2,2Pac,2245429,58602220,rap,Hip-Hop,Gangsta Rap,West Coast Rap,hip hop


In [26]:
type(df.sort_values(by = ['Listeners'], ascending = False))

pandas.core.frame.DataFrame

In [28]:
df.sort_values(by = ['Listeners'], ascending = False)['Artist']

220                 Coldplay
759                Radiohead
768    Red Hot Chili Peppers
775                  Rihanna
306                   Eminem
               ...          
70                     Arnob
705              Os Tubarões
224            Cordas do Sol
168                Bulimundo
750         Páll Finnur Páll
Name: Artist, Length: 1081, dtype: object

In [48]:
df['Scrobbles'] / df['Listeners']

0        13.388836
1         8.637829
2        15.590455
3        11.501475
4        26.098452
           ...    
1076     90.463885
1077     41.862724
1078    127.330280
1079     78.868340
1080     54.995952
Length: 1081, dtype: float64