In [16]:
# usual imports
import os
import sys
import time
import glob
import datetime
import pandas as pd
import numpy as np # get it at: http://numpy.scipy.org/

import pickle


In [22]:
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
plotly.offline.init_notebook_mode(connected=True)

# track ids to get the right order

In [2]:
track_ids_path = "./data/track_ids.txt"
track_ids = np.loadtxt(track_ids_path, dtype="str")

# get tracks popularity and sort by popularity

In [3]:
track_pop_dict_path = "data/nn_subset_popularity_dict.pkl"
track_pop_df = pd.read_csv("./data/nn_subset_popularity.csv")[["track_id", "play_count"]].sort_values(by=["play_count"], ascending=False)
print("Shape", track_pop_df.shape)
track_pop_df.head()

Shape (32648, 2)


Unnamed: 0,track_id,play_count
14726,TRAEHHJ12903CF492F,726885
3600,TRGXQES128F42BA5EB,648239
28202,TROAQBZ128F9326213,292642
13730,TRIXAZF128F421EE64,268353
18305,TRTNDNE128F1486812,185653


## subset of most N popular songs

In [4]:
N = 100
subset_track_pop_df = track_pop_df.iloc[:N]

# get mapping tack, song, artist, title

In [8]:
song_id_path = "./data/song_track_artist.pkl"
with open(song_id_path, "rb") as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    mapping_data = u.load()

In [9]:
print(mapping_data.shape)
mapping_data.head()

(100600, 4)


Unnamed: 0,song_id,track_id,artist_id,song_name
18,SOIMMJJ12AF72AD643,TRMMMTK128F424EF7C,Emery,Rock-N-Rule
32,SOGNNYL12A6D4F910B,TRMMMQN128F4238509,Prince & The Revolution,Raspberry Beret (LP Version)
33,SOOLRHW12A8C142643,TRMMMKQ128F92EBCB5,Kreator,All of the same blood
47,SOWWQXA12A81C1F9CA,TRMMMFG128F425087B,Billy Idol,Scream
53,SOHMYGC12A6D4FAC4B,TRMMMQA128F14A454A,Diana Krall,Dancing In The Dark


In [10]:
mapping_data_track_idx = mapping_data.set_index("track_id")
subset_mapping_data_track_idx = mapping_data_track_idx.loc[subset_track_pop_df["track_id"]]
print(subset_mapping_data_track_idx.shape)
subset_mapping_data_track_idx.head(100)

(100, 3)


Unnamed: 0_level_0,song_id,artist_id,song_name
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
TRAEHHJ12903CF492F,SOBONKR12A58A7A7E0,Dwight Yoakam,You're The One
TRGXQES128F42BA5EB,SOAUWYT12A81C206F1,Björk,Undo
TROAQBZ128F9326213,SONYKOW12AB01849C9,OneRepublic,Secrets
TRIXAZF128F421EE64,SOUFTBI12AB0183F65,Tub Ring,Invalid
TRTNDNE128F1486812,SOFLJQZ12A6D4FADA6,Cartola,Tive Sim
TRKRRUV128F92F20F1,SOTWNDJ12A8C143984,Train,Marry Me
TRJRECT12903CBADA3,SOUDLVN12AAFF43658,Bill Withers,Make Love To Your Mind
TRQFXKD128E0780CAE,SOKLRPJ12A8C13C3FE,Coldplay,The Scientist
TRENTGL128E0780C8E,SOCVTLJ12A6310F0FD,Coldplay,Clocks
TRUFTBY128F93450B8,SOSCIZP12AB0181D2F,Lady GaGa,Alejandro


# get mfcc and chroma

In [11]:
chroma_path = "./data/chroma_mfcc/chroma.npy"
mfcc_path = "./data/chroma_mfcc/mfcc.npy"

In [12]:
chroma_data = np.load(chroma_path)
mfcc_data = np.load(mfcc_path)

In [13]:
subset_track_idx = {} # track_id, track_idx
for i in range(len(track_ids)):
    if track_ids[i] in subset_track_pop_df["track_id"].values:
        subset_track_idx[track_ids[i]] = i

# get segment start times 

In [70]:
segments_start_path = "data/track-segments_start.pkl"
with open(segments_start_path, "rb") as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    segments_start = u.load()

In [71]:
# truncate the start times like in Neural Net
for k, v in segments_start.items():
    segments_start[k] = v[:300]

In [74]:
# check segment times are increasing. Works!
for k, v in segments_start.items():
    last_time = v[0]
    for s in v:
        if s < last_time:
            print(k)
            break
        last_time = s

# Get plot for song

In [14]:
# COLD PLAY SCIENTIST
# track_id = "TRQFXKD128E0780CAE"
# track_idx = subset_track_idx[track_id]

In [86]:
track_id = "TRGCHLH12903CB7352" # miley party USA
# track_id = "TRQFXKD128E0780CAE" # COLD PLAY SCIENTIST
track_idx = subset_track_idx[track_id]
feature_segments_start = segments_start[track_id]
artist_name = subset_mapping_data_track_idx.loc[track_id]["artist_id"]
song_name = subset_mapping_data_track_idx.loc[track_id]["song_name"]
print("artist:", artist_name)
print("Song:", song_name)

artist: Miley Cyrus
Song: Party In The U.S.A.


In [87]:
chroma_feature = chroma_data[track_idx]
mfcc_feature = mfcc_data[track_idx]

In [88]:
chroma_feature

array([[1.   , 0.874, 0.807, ..., 0.699, 0.828, 0.804],
       [0.114, 1.   , 0.061, ..., 0.035, 0.202, 0.014],
       [0.385, 0.803, 0.21 , ..., 0.562, 1.   , 0.156],
       ...,
       [0.701, 0.19 , 0.229, ..., 0.261, 0.441, 0.237],
       [0.194, 0.19 , 0.325, ..., 0.766, 0.574, 0.195],
       [0.611, 0.911, 1.   , ..., 0.285, 0.266, 0.461]])

In [89]:
trace = go.Heatmap(z=chroma_feature.T,
                   y=["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"],
                   x=feature_segments_start)
                  

layout = go.Layout(
    title="Chroma for " + song_name + " by " + artist_name,
    xaxis=go.layout.XAxis(
        title=go.layout.xaxis.Title(
            text='Time (sec)',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Pitch Class',
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    
)

data = [trace]

fig = go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig, filename="chroma_heatmap")

In [85]:
trace = go.Heatmap(z=mfcc_feature.T,
                   y=["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"],
                   x=feature_segments_start)
                  

layout = go.Layout(
    title="Mfcc for " + song_name + " by " + artist_name,
    xaxis=go.layout.XAxis(
    title=go.layout.xaxis.Title(
        text='Time (sec)',
        font=dict(
            family='Courier New, monospace',
            size=18,
            color='#7f7f7f'
        )
    )
    ),
    yaxis=go.layout.YAxis(
        title=go.layout.yaxis.Title(
            text='Pitch Class', # ???
            font=dict(
                family='Courier New, monospace',
                size=18,
                color='#7f7f7f'
            )
        )
    ),
    
)

data = [trace]

fig = go.Figure(data=data, layout=layout)

plotly.offline.iplot(fig, filename="chroma_heatmap")

In [46]:
mfcc_feature == chroma_feature

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [50]:
mfcc_feature.T

array([[   0.   ,   33.097,   31.179, ...,   42.631,   44.25 ,   50.882],
       [ 171.13 ,  -80.466, -105.683, ...,   -8.726,  -55.427,   83.552],
       [   9.469,   50.204,   15.312, ...,   61.114,   79.543,  -30.469],
       ...,
       [   0.973,   65.084,  -27.351, ...,  -24.88 ,   24.363,   -9.294],
       [ -10.64 ,   82.41 ,  -76.531, ...,   -7.987,   31.212,    4.045],
       [  -7.228,    1.146,    7.701, ...,   28.014,    1.379,   -0.927]])

In [49]:
chroma_feature.T

array([[1.   , 0.114, 0.385, ..., 0.701, 0.194, 0.611],
       [0.874, 1.   , 0.803, ..., 0.19 , 0.19 , 0.911],
       [0.807, 0.061, 0.21 , ..., 0.229, 0.325, 1.   ],
       ...,
       [0.699, 0.035, 0.562, ..., 0.261, 0.766, 0.285],
       [0.828, 0.202, 1.   , ..., 0.441, 0.574, 0.266],
       [0.804, 0.014, 0.156, ..., 0.237, 0.195, 0.461]])