In [None]:
import pandas as pd
from sqlalchemy import create_engine

db="sqlite:///top2020.db"
engine = create_engine(db,echo=False)

In [66]:
df_entries = pd.read_sql_table('entries',engine)
df_albums = pd.read_sql_table('albums',engine,index_col='id')
df_users = pd.read_sql_table('users',engine,index_col='id')
df_genres = pd.read_sql_table('genres',engine,index_col='id')

In [67]:
def build_entries():
    entries = pd.merge(df_entries, df_albums, left_on='album_id', right_on='id')
    entries = pd.merge(entries, df_genres, left_on='genre_id', right_on='id')

    return entries

entries = build_entries()
entries.head()

Unnamed: 0,id,name_x,user_id,album_id,position,score,name_y,genre_id,name
0,1,déhà - how to despise humanity in 7 lessons an...,1,1,1,30.0,déhà - how to despise humanity in 7 lessons an...,24,Experimental
1,1082,déhà - how to despise humanity in 7 lessons an...,64,1,10,19.772727,déhà - how to despise humanity in 7 lessons an...,24,Experimental
2,26,melted bodies - enjoy yourself,6,26,4,27.413793,melted bodies - enjoy yourself,24,Experimental
3,541,melted bodies-enjoy yourself,33,26,2,29.137931,melted bodies - enjoy yourself,24,Experimental
4,1087,melted bodies - enjoy yourself,64,26,15,14.090909,melted bodies - enjoy yourself,24,Experimental


In [68]:
def format_entries(entries):

    map = {
        'id':'entry_id',
        'user_id':'user_id',
        'album_id':'album_id',
        'name_y':'album',
        'genre_id':'genre_id',
        'name':'genre',
        'position':'position',
        'score':'score'
    }

    drop_cols = (x for x in entries.columns if x not in map.keys())

    entries.drop(drop_cols, axis=1, inplace=True)
    entries.rename(map, axis=1, inplace=True)
    entries.set_index('entry_id', drop=False, inplace=True)

    return entries

entries = format_entries(entries)
entries.head()


Unnamed: 0_level_0,entry_id,user_id,album_id,position,score,album,genre_id,genre
entry_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1,1,1,1,30.0,déhà - how to despise humanity in 7 lessons an...,24,Experimental
1082,1082,64,1,10,19.772727,déhà - how to despise humanity in 7 lessons an...,24,Experimental
26,26,6,26,4,27.413793,melted bodies - enjoy yourself,24,Experimental
541,541,33,26,2,29.137931,melted bodies - enjoy yourself,24,Experimental
1087,1087,64,26,15,14.090909,melted bodies - enjoy yourself,24,Experimental


In [108]:
def build_album_results(entries):

    aggfunc = {
        'position': ['mean','min','max'],
        'score': ['count','sum','mean','max','min']
    }

    album_stats = pd.pivot_table(entries, index = ['genre_id','genre','album_id','album'], values = ['score','position'], aggfunc=aggfunc)

    album_ranking = album_stats.rank(method='dense',ascending=False)[('score','sum')]
    album_genre_ranking = album_stats.groupby('genre_id').rank(method='dense',ascending=False)[('score','sum')]

    album_results = pd.merge(album_stats, album_ranking, left_index=True, right_index=True)
    album_results = pd.merge(album_results, album_genre_ranking, left_index=True, right_index=True)

    album_results.reset_index(inplace=True)

    return album_results

album_results = build_album_results(entries)
album_results.head()

Unnamed: 0_level_0,genre_id,genre,album_id,album,position,position,position,score_x,score_x,score_x,score_x,score_x,score_y,score
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,max,mean,min,count,max,mean,min,sum,sum,sum
0,1,Heavy Metal,19,cirith ungol - forever in black,26.0,10.0,3.0,4,27.619048,16.278736,5.0,65.114943,64.0,2.0
1,1,Heavy Metal,180,dynazty - the dark delight,20.0,20.0,20.0,1,13.62069,13.62069,13.62069,13.62069,267.0,7.0
2,1,Heavy Metal,205,lonewolf - division hades,30.0,15.5,1.0,2,30.0,17.5,5.0,35.0,148.0,3.0
3,1,Heavy Metal,213,avatar - hunter gatherer,13.0,9.833333,1.0,6,30.0,18.032581,13.928571,108.195489,31.0,1.0
4,1,Heavy Metal,337,spirit adrift enlightened in eternity,30.0,30.0,30.0,1,5.0,5.0,5.0,5.0,306.0,11.0


In [109]:
def format_album_results(album_results):

    album_results.columns = album_results.columns.map('|'.join).str.strip('|')

    map = {
        'score_y|sum':'rank',
        'album_id':'album_id',
        'album':'album',
        'score|sum':'genre_rank',
        'genre_id':'genre_id',
        'genre':'genre',
        'score_x|count':'nb_votes',
        'score_x|sum':'total_score',
        'score_x|mean':'mean_score',
        'score_x|max':'highest_score',
        'score_x|min':'lowest_score',
        'position|mean':'mean_position',
        'position|min':'highest_position',
        'position|max':'lowest_position'
    }

    album_results = album_results.reindex(columns=map.keys())

    drop_cols = (x for x in album_results.columns if x not in map.keys())
    album_results.drop(drop_cols, axis=1, inplace=True)

    album_results.rename(map, axis=1, inplace=True)

    album_results.set_index('album_id', inplace=True, drop=True)

    album_results.sort_values('rank', inplace=True)

    return album_results

album_results = format_album_results(album_results)

album_results.head()

Unnamed: 0_level_0,rank,album,genre_rank,genre_id,genre,nb_votes,total_score,mean_score,highest_score,lowest_score,mean_position,highest_position,lowest_position
album_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
35,1.0,the ocean - phanerozoic ii : mesozoic / cenozoic,1.0,3,Prog Metal,15,324.158629,21.610575,30.0,5.862069,8.933333,1.0,29.0
31,2.0,napalm death - throes of joy in the jaws of de...,1.0,21,Crust - Grind,15,283.355725,18.890382,29.137931,7.380952,10.666667,2.0,26.0
245,3.0,kvelertak - splid,1.0,39,Metal'n'Roll,12,257.413283,21.451107,29.137931,5.0,6.5,2.0,20.0
81,4.0,loudblast - manifesto,1.0,4,Death Metal,13,228.271757,17.559366,30.0,5.0,10.076923,1.0,24.0
57,5.0,oranssi pazuzu - mestarin kynsi,1.0,8,Post-Black Metal,11,222.261967,20.205633,30.0,5.0,10.545455,1.0,30.0


In [77]:

def extend_entries(entries, album_results):

    album_keepcols = [
        'album_id',
        'total_score'
    ]
    album_dropcols = (x for x in album_results.columns if x not in album_keepcols)
    album_scores = album_results.drop(album_dropcols, axis=1)

    full_entries = pd.merge(entries, album_scores, on='album_id')

    full_entries = pd.merge(full_entries, df_users, left_on='user_id', right_on='id')

    full_entries.set_index('entry_id', inplace=True)
    cols = [
        'user_id',
        'name',
        'album_id',
        'album',
        'genre_id',
        'genre',
        'top_size',
        'position',
        'score',
        'total_score'
    ]

    full_entries = full_entries.reindex(columns=cols)

    return full_entries

full_entries = extend_entries(entries, album_results)
full_entries.head()

Unnamed: 0_level_0,user_id,name,album_id,album,genre_id,genre,top_size,position,score,total_score
entry_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1,Denis Porcin,1,déhà - how to despise humanity in 7 lessons an...,24,Experimental,4,1,30.0,49.772727
2,1,Denis Porcin,2,sepultura - quadra,2,Thrash Metal,4,2,21.666667,220.968801
3,1,Denis Porcin,3,jonathan hulten - chants from another place,29,Neofolk - DarkFolk,4,3,13.333333,43.333333
4,1,Denis Porcin,4,tomorrow's rain - hollow,11,Doom Metal,4,4,5.0,32.57837
1082,64,Ugo Petropoulos,1,déhà - how to despise humanity in 7 lessons an...,24,Experimental,23,10,19.772727,49.772727


In [83]:
def compute_entry_stats(df):

   df['pop_score'] = df['score'] * df['total_score'] / 1000
   # idea for a future implementation
   # df['pop_score'] = df['score'] * ( df['total_score'] - df['total_score'].quantile(q=0.666) ) / 1000
   df['edgyness'] = df['top_size'] / df['pop_score']

   df.sort_values('entry_id', inplace=True)

full_entries = compute_entry_stats(full_entries)
full_entries.head()

Unnamed: 0_level_0,user_id,name,album_id,album,genre_id,genre,top_size,position,score,total_score,pop_score,edgyness
entry_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,1,Denis Porcin,1,déhà - how to despise humanity in 7 lessons an...,24,Experimental,4,1,30.0,49.772727,1.493182,2.678843
2,1,Denis Porcin,2,sepultura - quadra,2,Thrash Metal,4,2,21.666667,220.968801,4.787657,0.835482
3,1,Denis Porcin,3,jonathan hulten - chants from another place,29,Neofolk - DarkFolk,4,3,13.333333,43.333333,0.577778,6.923077
4,1,Denis Porcin,4,tomorrow's rain - hollow,11,Doom Metal,4,4,5.0,32.57837,0.162892,24.55617
1082,64,Ugo Petropoulos,1,déhà - how to despise humanity in 7 lessons an...,24,Experimental,23,10,19.772727,49.772727,0.984143,23.370598


In [111]:
def compute_user_stats(full_entries):
    
    user_genres = pd.pivot_table(full_entries, index=['name'], columns=['genre'], values=['score'], aggfunc=['sum'])

    aggfunc = {
       'pop_score': 'sum',
       'edgyness': 'mean'
    }
    user_edgyness = pd.pivot_table(full_entries, index = ['name'], values=['pop_score','edgyness'], aggfunc=aggfunc)

    user_edgyness.sort_values(('edgyness'), ascending=False, inplace=True)

    return user_genres,user_edgyness

user_genres, user_edgyness = compute_user_stats(full_entries)
user_edgyness.head(20)

Unnamed: 0_level_0,edgyness,pop_score
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alexis Hérissant,196.44504,22.179822
David Vermalen,171.16605,14.068766
Pagus Ström,165.329756,29.986856
Francky Rockois,154.728884,42.634397
David Chardiet,142.191275,72.554854
"Ronan ""youpi crêpier"" Lesclache",141.319131,47.902311
David Decaudin,141.24302,14.856602
Moland Fengkov,140.492984,47.05477
Arnaud Dehon,139.625061,14.059001
Corinne Royer béril,139.533215,41.617442


In [82]:
user_genres.head(20)

Unnamed: 0_level_0,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum
Unnamed: 0_level_1,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score,score
genre,Alternative Rock / Metal,Avant-Garde Metal,Black Metal,Blues - Blues Rock,Crust - Grind,Dark Synth / Dungeon synth,Death Metal,Deathcore,Doom Metal,Experimental,...,Post-Rock,Power Metal,Prog - Psychedelic Rock,Prog Metal,Punk,Rap Metal,Shoegaze,Sludge,Stoner - Psych,Thrash Metal
name,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
Alex KM,,41.034483,104.137931,25.689655,,,30.0,,26.37931,,...,,,10.862069,24.827586,,,,60.344828,122.241379,
Alexandre Ardisson,,,95.625,,,,,,,,...,,,,,,,,,,
Alexis Hérissant,17.068966,41.896552,102.586207,,43.62069,,156.896552,,,18.793103,...,,,41.034483,,,,8.448276,,7.586207,
Amduscias Baal,,,18.888889,,,,,,134.444444,,...,,,,,,,,,21.666667,
Anthony Pwl,,,,,,,,,,,...,,,,11.25,,,,,,76.25
Arnaud Dehon,,,167.857143,,,,61.904762,,25.238095,,...,,,,,,,,,,42.142857
Arno STROBL,,,,,27.727273,,5.0,,25.454545,7.272727,...,,,,,20.909091,,,,16.363636,32.727273
Arsonist Tohliam,,,45.0,,,,10.0,,,20.0,...,,,,,,,,,,
Arthur Bourson,,,47.5,,,,,,,,...,,,,,,,,,,
Brett Caldas-Lima,,,,,,,,,,,...,,,,52.5,,,,,,


In [90]:
def compute_genre_stats(df):

    genre_stats = pd.pivot_table(df, index=['genre'], values=['nb_votes','total_score'], aggfunc=['sum'])
    genre_stats.sort_values(('sum','nb_votes'), ascending=False, inplace=True)

    genre_stats['weight'] = genre_stats[('sum','total_score')] * 100.0 / genre_stats[('sum','total_score')].sum()

    return genre_stats

genre_stats = compute_genre_stats(album_results)
genre_stats.head(20)

Unnamed: 0_level_0,sum,sum,weight
Unnamed: 0_level_1,nb_votes,total_score,Unnamed: 3_level_1
genre,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Black Metal,202,3616.887197,17.940909
Death Metal,111,1781.570175,8.837154
Doom Metal,96,1588.970247,7.881797
Thrash Metal,72,1265.888615,6.279209
Stoner - Psych,72,1231.781665,6.110028
Prog Metal,61,1130.650069,5.608383
Post-Black Metal,52,993.615057,4.928646
Sludge,41,676.207837,3.354206
Experimental,36,714.073309,3.54203
Prog - Psychedelic Rock,35,619.52419,3.073037
