In [1]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import datetime

# enable LaTeX rendering in seaborn plot labels
sns.set(font_scale=1.5, rc={'text.usetex' : True})

In [2]:
# read scrobbles imported using scrobbles_importer
dataset_path = './datasets/'
scrobbles_csv_file = 'lucas_scrobbles.csv'
scrobbles = pd.read_csv(dataset_path + scrobbles_csv_file,
                        delimiter=';',
                        names=['artist','song','date','time'])

# merge date and time into timestamp and delete them
timestamp = pd.to_datetime(scrobbles['date'] + ', ' + scrobbles['time'])
scrobbles.insert(scrobbles.shape[1], 'timestamp', timestamp)
del scrobbles['date'], scrobbles['time']

In [3]:
def plot_rank(scrobbles, top=10, relative=False, column='artist'):
    rank = scrobbles[column].value_counts()
    
    labels = rank[0:top].keys()
    values = rank[0:top]
    
    if relative:
        total = scrobbles.shape[0]
        values = (values/total)*100.0
        
    plt.figure()
    plt.bar(np.arange(0,top), values, align='center')
    plt.xticks(np.arange(0,top), labels,rotation=90)
    plt.xlabel('Top {} {}'.format(top, column + 's'))
    if relative:
        plt.ylabel('Total scrobbles percentage [%]')
    else:
        plt.ylabel('Scrobles count')
    

def number_of_unique_artists(scrobbles):
    return scrobbles['artist'].nunique()

print('Number of unique artists: {}'.format(number_of_unique_artists(scrobbles)))
plot_rank(scrobbles, relative=True, top=20, column='artist')

Number of unique artists: 1878
Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x7fa491b296a8> (for post_execute):


UnicodeEncodeError: 'ascii' codec can't encode character '\xe3' in position 282: ordinal not in range(128)

UnicodeEncodeError: 'ascii' codec can't encode character '\xe3' in position 282: ordinal not in range(128)

<matplotlib.figure.Figure at 0x7fa4a67411d0>

In [None]:
def plot_unique_artists_evolution(scrobbles, start=None, end=None, freq='1M', ticks_angle=None):
    if start is None:
        # get first scrobble
        start = scrobbles['timestamp'].iloc[-1]

    if end is None:
        # get last scrobble
        end = scrobbles['timestamp'].iloc[0]

    date_range = pd.date_range(start, end, freq=freq)

    nunique_artists = []
    for date in date_range:
        query = scrobbles[scrobbles['timestamp'] < date]
        nunique_artists.append(number_of_unique_artists(query))
    
    plt.figure()
    plt.plot(date_range, nunique_artists)
    plt.xlabel('Period')
    plt.ylabel('Unique artists')
    if ticks_angle is not None:
        plt.xticks(rotation=ticks_angle)

    plt.figure()
    plt.plot(date_range, np.gradient(nunique_artists), color='green')
    plt.xlabel('Period')
    plt.ylabel('Unique artists gradient')
    if ticks_angle is not None:
        plt.xticks(rotation=ticks_angle)

plot_unique_artists_evolution(scrobbles)
plot_unique_artists_evolution(scrobbles,end='01/01/2010',freq='1D', ticks_angle=90)

In [None]:
def plot_artist_evolution(scrobbles, artist, start=None, end=None, freq='1M', ticks_angle=None):
    
    if start is None:
        # get first scrobble
        start = scrobbles['timestamp'].iloc[-1]

    if end is None:
        # get last scrobble
        end = scrobbles['timestamp'].iloc[0]

    date_range = pd.date_range(start, end, freq=freq)

    scrobbles_number = []
    for date in date_range:
        query_artist = scrobbles[scrobbles['artist'] == artist]
        query_scrobbles = query_artist[query_artist['timestamp'] < date]
        scrobbles_number.append(query_scrobbles.shape[0])
    
    plt.figure()
    plt.plot(date_range, scrobbles_number)
    plt.xlabel('Period')
    plt.ylabel('Scrobbles')
    plt.title(artist +' evolution')
    if ticks_angle is not None:
        plt.xticks(rotation=ticks_angle)
        
    plt.figure()
    plt.plot(date_range, np.gradient(scrobbles_number))
    plt.xlabel('Period')
    plt.ylabel('Scrobbles gradient')
    plt.title(artist +' evolution rate')
    if ticks_angle is not None:
        plt.xticks(rotation=ticks_angle)        
        
plot_artist_evolution(scrobbles, 'Hillsong United')
plot_artist_evolution(scrobbles, 'Louise Attaque')
plot_artist_evolution(scrobbles, 'Kraftwerk')