In [1]:
import pandas as pd
import sqlite3
import seaborn as sb
import matplotlib.pyplot as plt
import csv

In [2]:
from plotly.offline import plot, init_notebook_mode, iplot
import plotly.figure_factory as ff
init_notebook_mode(connected=True)

In [3]:
groups = ['g1','g2','g5','g6','g7','g8','g9','g11','g12','g15','g16','g17','g25','g28','g29','g31','g32']

In [7]:
def mood_count(row, groups):
    """ Auxiliary function counting the presence of mood tags in a row.
    """
    count = 0
    for group in groups:
        count += int(row[group])
    return count

## MSD with Mood Tags

In [8]:
conn = sqlite3.connect("../db/tracks_mood.db")
data = pd.read_sql_query("SELECT * from mood_labels;", conn)

In [9]:
data["mood_count"] = data.apply(lambda row: mood_count(row, groups), axis=1)

In [10]:
count_table = [["Mood Group", "Count (non unique)", "Count (unique)"]]

unique_set = data[data["mood_count"] == 1] # Remove rows with more than one label

data["g16"].value_counts()

for group in groups:
    count_table.append([group, data[group].value_counts()[1],  unique_set[group].value_counts()[1]])
    
table = ff.create_table(count_table)
iplot(table)

## MSD with Mood Tags with Query Results on Amazon

In [12]:
ama_data = pd.read_csv("../csv/tracks_mood_amazon.csv") # MSD with tags with Amazon availability boolean

In [13]:
ama_data["mood_count"] = ama_data.apply(lambda row: mood_count(row, groups), axis=1)

In [14]:
ama_data.shape # Original dataset with mood tags

(60637, 23)

In [15]:
# Trim to the data points that have results on amazon
ama_data = ama_data[ama_data["on_amazon_jp"]==True]

In [16]:
ama_data.shape # Dataset with results on amazon

(44929, 23)

In [17]:
count_table = [["Mood Group", "Count (non unique)", "Count (unique)"]]

unique_set = ama_data[ama_data["mood_count"] == 1] # Remove rows with more than one label

ama_data["g16"].value_counts()

for group in groups:
    count_table.append([group, ama_data[group].value_counts()[1],  unique_set[group].value_counts()[1]])
    
table = ff.create_table(count_table)
iplot(table)

## MSD with Mood Tags and Duplicates Removed with Query Results on Amazon

In [20]:
ama_nodup_data = pd.read_csv("../csv/tracks_mood_amazon_nodupli.csv")

In [21]:
ama_nodup_data["mood_count"] = ama_nodup_data.apply(lambda row: mood_count(row, groups), axis=1)

In [22]:
ama_nodup_data.shape # Dataset with removed duplicates

(46830, 23)

In [23]:
# Trim to the data points that have results on amazon
ama_nodup_data = ama_nodup_data[ama_nodup_data["on_amazon_jp"]==True]

In [24]:
ama_nodup_data.shape # Dataset with removed duplicates, and available on amazon

(33014, 23)

In [25]:
count_table = [["Mood Group", "Count (non unique)", "Count (unique)"]]

unique_set = ama_nodup_data[ama_nodup_data["mood_count"] == 1] # Remove rows with more than one label

ama_nodup_data["g16"].value_counts()

for group in groups:
    count_table.append([group, ama_nodup_data[group].value_counts()[1],  unique_set[group].value_counts()[1]])
    
table = ff.create_table(count_table)
iplot(table)

## Shopping List

We are buying only the files with a single mood label, from the statistics obtained in the previous section "MSD with Mood Tags and Duplicates Removed most certainly Available on Amazon".  

In [31]:
single_tag_songs = ama_nodup_data[ama_v2_nodup_data["mood_count"] == 1]

In [32]:
pd.options.display.max_rows = 100

In [111]:
g1 = single_tag_songs[single_tag_songs["g1"] == 1]
g1.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
36704,TRWFXCL128F428AD01,The Buzz Kill,Sage Francis
18026,TRMZCPQ12903CE3D7D,Back Home,Pepper
23424,TRAUEAA128F9342707,Growing Down,No Use For A Name
32039,TREAIES128F4266517,Crash Burn,Blues Traveler
26620,TRNTFKC12903CDA0B8,Never Gonna Stop The Show,General Midi
41178,TRTWRDB128EF356B6B,Numb,Ill Niño
40666,TRTYKPI128F428A4EE,Every Single Day,Pennywise
24259,TRBBLXU128F42623A1,Eyes Open Wide,Pulley
15902,TROFQTG128E0791F3F,Light Burns Clear,Sparta
16666,TROVXWL128F4267A3C,Fire,Mother's Finest


In [112]:
g2 = single_tag_songs[single_tag_songs["g2"] == 1]
g2.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
44025,TRQTFRI128F146E0E5,Hotel Expressionism,The Streets
38519,TRZGWUP128F4261A6F,Double Pump,Girl Talk
24389,TRBSBCX128F92DEF11,I Am Not a Robot,Marina & The Diamonds
35740,TRPKBQW128F426FFFC,Robot,The Futureheads
26107,TRNFLRP128F427F119,Time for Heroes,The Libertines
9934,TRIRYEF12903D061CD,Two Tribes,Frankie Goes To Hollywood
26236,TRNENHL128F424EF18,Movie Star,Róisín Murphy
35952,TRWCXQI128F421873E,Free and Easy (Down the Road I Go),Dierks Bentley
25830,TRNYVPW128F92FD8B0,Carry On,"Crosby, Stills, Nash & Young"
2847,TRSGGIX128E0789D57,Robinson Crusoe,The Redwalls


In [113]:
g5 = single_tag_songs[single_tag_songs["g5"] == 1]
g5.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
5814,TRCOZBQ12903CA3669,Peggy Sue,Buddy Holly
39708,TRGPOFO128F4264F8F,Walking In Rhythm,The Blackbyrds
1107,TRXSTAC12903CEAF79,Black and Blue,Tilly and the Wall
23708,TRBIFFV128F930DE98,Ride A White Swan,T. Rex
4116,TRZEIXS128F425CE00,Lou Reed,The Little Willies
10540,TRIEVXY128F1492442,Where'd You Go?,The Mighty Mighty Bosstones
32934,TREUWNS128F9341EDB,Magic's Wand,Whodini
10476,TRIEJCM128F92DF37A,Never Ending Summer,311
7367,TRCUUMG128F4298705,Bad Town,Operation Ivy
8506,TRJDIHY12903CB2600,Rags To Riches,Tony Bennett


In [114]:
g6 = single_tag_songs[single_tag_songs["g6"] == 1]
g6.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
27927,TRSWZYM128F422F9DE,I Know A Place,Petula Clark
2890,TRSVKUB128F14858D2,Chanson De Rien Du Tout,Mickey 3D
20697,TRYYVVU12903CB6448,Final Say,Sambassadeur
32306,TREFSSV128F426734C,Geht Auseinander,Wir sind Helden
28734,TRFMJIG128E0781679,Warm Night,The Concretes
21425,TRYGXVS12903C9E15C,Stairway To The Stars,Ella Fitzgerald
40228,TRTCMBE128F9335CAE,Ice Dogs,Man Man
23401,TRAUHSS128F14587C3,Life Begins At The Hop,XTC
26512,TRNZUJT128F427076E,Back Door Man,Willie Dixon
22825,TRAEFRR128F421A07D,To Be in Love,Masters At Work


In [115]:
g7 = single_tag_songs[single_tag_songs["g7"] == 1]
g7.sample(n=58,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
36798,TRWECMU128F1459198,Dala,Ayub Ogada
1636,TRMUJKH128F4267E7E,Stand!,Sly & The Family Stone
24074,TRBYJMH128F9300CD3,People Get Ready,The Impressions
32101,TREBCJG128F933F25C,Fort Hood,Mike Doughty
26271,TRNEUEL128F931097F,"Don't Be Afraid, You're Already Dead",Akron/Family
23369,TRAQLFP128F429BED6,Blister,Jimmy Eat World
25017,TRBTVZG12903CD9983,This Flight Tonight,Nazareth
13275,TRXCWGM128E078E0D1,All I Really Want (Acoustic Album Version),Alanis Morissette
15004,TROCJIQ128E0792423,After The Rain Has Fallen,Sting
38143,TRZDHUA128E078F8B2,Nobody Move,Eazy-E


In [116]:
g8 = single_tag_songs[single_tag_songs["g8"] == 1]
g8.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
11438,TRHIYNA128F4252A93,Comfort Zone,Steven Halpern
35306,TRPDKZV128F4277E2B,Somebody's Arms,Sarah Slean
31704,TREHHMW128F4246149,Silences,Vishwa Mohan Bhatt
25952,TRNNONJ128F428CC5F,Pointless Nostalgic,Jamie Cullum
1916,TRYOXDF128F92F2AC2,Chove En Santiago,Luar na Lubre
3083,TRDIIPQ128F9316E61,From Heaven to Dust,Azam Ali
19130,TRRAFNC128F428FA63,Birds of a Feather,Kind of Like Spitting
46222,TRVFXXF128E0781AB3,It Is Obvious,Syd Barrett
31122,TRDZSKV128F931B4ED,Wise Blood,Soulsavers
37211,TRWKFTX12903C95540,Europa,A Hawk and a Hacksaw


In [118]:
g9 = single_tag_songs[single_tag_songs["g9"] == 1]
g9.sample(n=97,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
17772,TRMEMVX12903CA13EC,There Is A Light,A Silver Mt. Zion
27883,TRSWCUU128F92E1019,Starrsha,Ringo Deathstarr
16587,TROUHHH128F93093CD,Demoliendo Hoteles,Charly García
42711,TRKTZNP128E07900B3,Music,Cat Stevens
23624,TRBJBMC12903CBB3ED,Dreamer,Atmosphere
26648,TRNTKRX12903CE1983,Flawless,The Ones
31619,TREJGBP128F9339054,Yearnin',The Black Keys
40106,TRGUOGZ128F427C312,Ugly Day,Five Iron Frenzy
42347,TRKDAHJ128F930172E,Medicine,Sons and Daughters
27971,TRSZEEM128F14AD296,Resurrection,Brian May


In [120]:
g11 = single_tag_songs[single_tag_songs["g11"] == 1]
g11.sample(n=51,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
21319,TRYWWQC128F933761F,Learning How To Bend,Gary Allan
726,TRIKBLH12903CFB8AE,Angel Fire,Dolores O'Riordan
44776,TRUBNVT128F42744BB,Melt Away,Mariah Carey
22819,TRAEATL128F42690A7,Lord of the Thighs,Aerosmith
31536,TRECBIU128F4257D62,Enchanted Thoughtfist,Jello Biafra with the Melvins
41420,TRTKWMP128F428149A,La Calandria,Pedro Infante
29942,TRFVVFA128F14AE800,Pictures Of Shorelines,Further Seems Forever
44817,TRUNNHV128F9347737,Hacer El Amor con Otro,Alejandra Guzmán
31139,TRDZZRB128EF340D4D,Karaoke Plays,Maxïmo Park
23629,TRBJNAA12903CB422E,Flight of the Navigator,Set Your Goals


In [121]:
g12 = single_tag_songs[single_tag_songs["g12"] == 1]
g12.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
21427,TRYGMIE128F4228EDC,Witchi-Tai-To,Jan Garbarek
23861,TRBXPAP128F93253D9,Aftermath #02,Aes Dana
24647,TRBLOUO128E0786428,Landing,Moby
29088,TRFNUPI128F42222BA,Smooth Vegas,Soul Ballet
23844,TRBXASJ128F9349C7C,Come Together,Marcus Miller
197,TRCWCOR12903CA4258,The Man From Nazareth,Narnia
21036,TRYDHQA128F931C28D,Beyond,Mr. Scruff
15973,TROECBP128F4276F7C,Harlequin,Weather Report
26748,TRNQEYR128F147E93D,Dammi Fuoco (Light My Fire),Montefiori Cocktail
2167,TRABHPA12903CEC222,Caravan,Duke Ellington


In [122]:
g15 = single_tag_songs[single_tag_songs["g15"] == 1]
g15.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
1264,TROXTZO128F4285A25,Silence,Sarah McLachlan
16107,TROLQNK128F92E74CC,Wicked Twisted Road,Reckless Kelly
29878,TRFVJKZ128F9307C93,Be Free,Belinda
38026,TRZNKYC128EF35FA2E,No Conclusion,of Montreal
32923,TREUEFP128F42BA475,You Can't Always Get What You Want,The Rolling Stones
46119,TRVBVGF128F4226628,Citizen,iLiKETRAiNS
35568,TRPZAYN128F145908E,The Birds,Peter Hammill
46443,TRVPZCR128F9330262,Close the World,Violet Indiana
32640,TREZRYL128F426700B,"The Downeaster ""Alexa""",Billy Joel
46247,TRVFWRM128F425C1BB,December,The Last Dance


In [123]:
g16 = single_tag_songs[single_tag_songs["g16"] == 1]
g16.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
45122,TRUPDNU128F147CBE9,The Thing That Should Not Be,Metallica
42341,TRKDMDO128F422AC8A,Tried,Assemblage 23
777,TRHJXVA128F92D2DA3,Gods Take Dust,Suspiria
7057,TRCTXIG128F424CCD5,Crimson Tail,Penumbra
13629,TRXOZRH128F4275375,Plug 'N' Die,Dope Stars Inc.
28023,TRSGBWD128F421409E,Poison Pit,Themselves
793,TRHIUIV12903CC5A38,Dead Stars (Club Version),Covenant
44921,TRUFNKV128EF35F577,Popstar Kill,Bis
6965,TRCZVBJ128F92F913B,Bloodheat,Archie Bronson Outfit
32525,TREPRJQ128F14768EB,Red Flags And Long Nights,She Wants Revenge


In [124]:
g17 = single_tag_songs[single_tag_songs["g17"] == 1]
g17.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
6786,TRCPWMF128F93519F1,F*@k You Lucy,Atmosphere
36691,TRWSQAD12903CA6C69,Read 'em And Weep,Barry Manilow
17836,TRMLYMW128F9304D8F,Broken Wing,Grammatics
10135,TRIBHET128F4248021,Changes,The Zombies
40892,TRTFJZC128F930BEED,The Soft Attack,The Daysleepers
39526,TRGFVFV12903D0165F,Your Call,Ulver
6479,TRCFQKQ128F42BC245,I Belong To Me,Jessica Simpson
23680,TRBIJES12903CF5B12,Harboring An Apparition,Mouth Of The Architect
21264,TRYPPVA128F149F14F,True Reflections,Jah Cure
16582,TROUIBB12903D036F1,Black Car,Black Tambourine


In [125]:
g25 = single_tag_songs[single_tag_songs["g25"] == 1]
g25.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
14296,TRXEZXQ128F42865B8,Trouble,Skindred
26764,TRNQTGL128E07937A5,Loud Love,Soundgarden
46213,TRVSVUN128F1485FB0,X-Amount Of Words,Blue October
1650,TRMVTHX128F425DA9C,Self Healing Lie,Mesh
15247,TROHPMM128F930AE86,Circle,Corpus Delicti
26041,TRNSFXQ128E0788926,God,The Smashing Pumpkins
37563,TRZHIOU128F149AC08,Salvador,Jamie T
40159,TRGVINL128F422B4ED,This Lamb Sells Condos,Final Fantasy
35591,TRPZPXC128F92F9D6A,Gypsy Magic,Govi
7372,TRCVCPC128F92EF353,Lay Me Down,Alice in Videoland


In [126]:
g28 = single_tag_songs[single_tag_songs["g28"] == 1]
g28.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
3697,TRPXFYR128F148D327,Anthem,Rush
20382,TRYHSWU128F428B255,Which Side Are You On,Billy Bragg
23725,TRBIGJK128E0783FCB,Leave (Get Out),JoJo
29794,TRFQEQG128F92E636E,Dead Men Tell No Tales,Motörhead
42266,TRKSDZM128F9346B87,The Runaways,Anberlin
18875,TRRODAN128F4279B42,I'm So Bored With The U.S.A.,The Clash
20257,TRYJZYD128E0788E85,Only,Nine Inch Nails
26559,TRNGFOR12903CA6809,Fuel My Fire,L7
46319,TRVEICB128F14825EA,Bullet Theory,Funeral for a Friend
18078,TRMZWUG128F933E621,Reject All American,Bikini Kill


In [127]:
g29 = single_tag_songs[single_tag_songs["g29"] == 1]
g29.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
33386,TRLMPIE128F42BA126,The Shortest Straw,Metallica
4536,TRTGLMR128F429B029,Cosmonaut,At The Drive-In
22842,TRAETAX128F14910FF,Oualalaradime,Zebda
34042,TRLWWSL128EF3473DB,Dragon Lady,Eric Burdon
31786,TREXKZX128F148B429,Unfulfilled,Quicksand
39498,TRGFNLX128F147CBEF,Battery,Metallica
31233,TRDTRDU128F93257E5,Not Ready To Die,Demon Hunter
34592,TRPIWEE128F934B036,Perverse Suffering,Cannibal Corpse
20392,TRYHPNH128F425E8DD,Fallen,30 Seconds To Mars
26980,TRSJQUJ128F934EE7A,What's Going On,Senser


In [128]:
g31 = single_tag_songs[single_tag_songs["g31"] == 1]
g31.sample(n=25,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
26368,TRNPWFR128F4289160,Heroin,Lou Reed
26170,TRNDDXU128E0793765,Sister Moonshine,Supertramp
39313,TRGBOPC128F421FBF1,Cherry Red,Five Horse Johnson
9768,TRIOOXM128F4295254,Let Me Love You Baby,Buddy Guy
44128,TRQQZQT128E0781AA9,Late Night,Syd Barrett
18867,TRROYBR12903CF2301,There's No Place Like a Stranger's Floor,The Lawrence Arms
40076,TRGQZDY128F1468DEB,Love It Is A Killing Thing,Sheila Chandra
40680,TRTAIFP128F92F9F5F,One Room Country Shack,Buddy Guy
10099,TRIAZSI128F4284391,Streets Of Fire,Bruce Springsteen
2183,TRANYUX128F14652EA,Who Do You Love?,George Thorogood & The Destroyers


In [129]:
g32 = single_tag_songs[single_tag_songs["g32"] == 1]
g32.sample(n=100,random_state=1)[['tid','title','artist']]

Unnamed: 0,tid,title,artist
28608,TRFXJRT128F934324D,Nathalie,Julio Iglesias
33068,TRLJRUT128F4261CCA,Soul Provider,Michael Bolton
9379,TRJVZFI128F92E6CEC,Por Amarte,Enrique Iglesias
17226,TRMRYXN128F4255D2A,Wild Rose,Bombay Rockers
10144,TRIBMRB128E0790827,Angie Baby,Helen Reddy
25720,TRNRSAV128F148B117,Lucky,Hoobastank
5872,TRCMNOT128F1492F6B,E...,Vasco Rossi
10411,TRIDIKS128F42BAF36,Music Of The Moon,Martha Tilston
1633,TRMQPDD128F4272A0F,United We Stand,Brotherhood of Man
15173,TROIWFK128F426719D,Love Doesn't Ask Why,Céline Dion
