In [1]:
import numpy as np
from sklearn.manifold import TSNE

from plotly import tools
from matplotlib import cm
import plotly.offline as py
import plotly.graph_objs as go
py.init_notebook_mode(connected=True)

import os
import glob
from os import listdir
from os.path import isfile, join

In [2]:
def files_list(path):
    return [f for f in glob.glob(path + '*.csv')]


def build_bank(path, f_names = []):
    files = files_list(path)

    features_bank = []

    for feature_file in files:
        if f_names != [] and \
            os.path.basename(feature_file)[:-4] not in f_names:
                continue
        
        feature_table = open(feature_file, "r").readlines()[1:]
        feature_values = []

        for line in feature_table:
            feature_values.append(line.split(","))

        features_bank.append(feature_values)
        n_songs = len(feature_values)
        
    f_normal = []    
    for song_idx in range(n_songs):
        fs = features_bank[0][song_idx][0:2]
        for feature in features_bank:
            fs += feature[song_idx][2:]

        f_normal.append(list(map(lambda x: x.strip(), fs)))

    return f_normal

In [3]:
f_names = ["bpm", "centroid", "self-correlation", "onset_strength", "volume", "zero_cross"]

data = build_bank("../data/features/favourite/", f_names)

f_art = [x[0] for x in data]
f_vals = [x[2:] for x in data]
f_labels = [x[0] + " " + x[1] for x in data]

data1 = build_bank("../data/features/tags/",f_names)

f_art1 = [x[0] for x in data1]
f_vals1 = [x[2:] for x in data1]
f_labels1 = [x[0] + " " + x[1] for x in data1]

In [None]:
max_vals = []
def add_bar(fig, inp, n_row, n_col):
    global chart_count, max_vals

    x, y = np.histogram(list(map(float, inp)), bins=20)

    max_vals.append(max(x))
    data = go.Bar(
            x=y,
            y=x,
            name="",
            marker = dict(color='green'),
            opacity=0.7
    )
    fig.append_trace(data, (chart_count // n_col) + 1, (chart_count % n_col) + 1)
    fig['layout']['yaxis{0}'.format(chart_count+1)].update(ticklen=3)
    fig['layout']['xaxis{0}'.format(chart_count+1)].update(range=[min(y),max(y)])
    fig['layout']['yaxis{0}'.format(chart_count+1)]['tickfont'].update(size=11,
                                                                       color="lightgrey")
    chart_count += 1

def add_line(fig, inp, n_row, n_col):
    global chart_count

    xh, yh = np.histogram(list(map(float, inp)), bins=20)

    data = go.Scatter(
            x=yh,
            y=max_vals[chart_count] * np.array(xh) / max(xh),#max_vals[chart_count],
            name="",
            marker = dict(color='blue'),
            opacity=0.7,
            mode='lines',
            hoverinfo='none',
            line=dict(shape='spline')
            )
    fig.append_trace(data, (chart_count // n_col) + 1, (chart_count % n_col) + 1)
    fig['layout']['yaxis{0}'.format(chart_count+1)].update(ticklen=3)
    fig['layout']['yaxis{0}'.format(chart_count+1)]['tickfont'].update(size=11,
                                                                       color="lightgrey")
    chart_count += 1
    
def draw_bars(fig, f_names, f_vals, dfunc):
    n_row = 3
    n_col = 2
    n_charts = n_row * n_col
    
    if fig is None:
        fig = tools.make_subplots(rows=n_row, cols=n_col, print_grid=False,
                              subplot_titles=f_names)

    for j in range(len(f_names)):
        dfunc(fig, np.array(f_vals)[:,j], n_row, n_col)

    fig['layout'].update(height=750, width=750, showlegend=False)
    return fig
  
chart_count = 0
fig = draw_bars(None, f_names, f_vals, add_bar)
chart_count = 0
fig = draw_bars(fig, f_names, f_vals1, add_line)

py.iplot(fig, show_link=False)

In [None]:
def color_convert(cmap, pl_entries):
    h = 1.0/(pl_entries-1)
    pl_colorscale = []
    
    for k in range(pl_entries):
        C = np.array(cmap(k * h)[:3]) * 255
        pl_colorscale.append([k * h, 'rgb' + str((C[0], C[1], C[2]))])
        
    return pl_colorscale

def draw_heatmap():
    xl = []
    for j in range(len(f_names)):
        xl.append(list(map(float, np.array(f_vals)[:,j])))

    nf = len(f_names)
    m = np.zeros((nf,nf))

    for x in range(nf):
        for y in range(nf):
            m[x, y] = np.corrcoef(xl[x], xl[y])[0][1]
    m = np.round(m,3)

    blues_cmap = cm.get_cmap('Blues')
    blues = color_convert(blues_cmap, 255)        

    trace = go.Heatmap(x=f_names,
                       y = f_names,
                       z = m,
                       colorscale = blues,
                       showscale = False)

    layout = go.Layout(
        width = 500,
        height = 500,
        margin=dict(l=100))

    data=[trace]
    py.iplot({'data': data, 'layout': layout}, show_link=False)
    
draw_heatmap()

In [6]:
X_reduct = TSNE(n_components=2).fit_transform(f_vals)

In [None]:
def scatter(artist):
    data = [dict(
                type = 'scatter',
                x = [x for k, x in enumerate(X_reduct[:,0]) if f_art[k] == artist],
                y = [x for k, x in enumerate(X_reduct[:,1]) if f_art[k] == artist],
                hoverinfo = "text",
                hoveron = "points",
                mode = 'markers',
                text = [x for k, x in enumerate(f_labels) if f_art[k] == artist],
                marker = dict(color="blue"),
                opacity = 0.7
            ),
            dict(
                type = 'scatter',
                x = [x for k, x in enumerate(X_reduct[:,0]) if f_art[k] != artist],
                y = [x for k, x in enumerate(X_reduct[:,1]) if f_art[k] != artist],
                hoverinfo = "text",
                hoveron = "points",
                mode = 'markers',
                text = [x for k, x in enumerate(f_labels) if f_art[k] != artist],
                marker = dict(color="red"),
                opacity = 0.7
            )]

    layout = dict(
                hovermode = 'closest',
                xaxis = dict(title = ""),
                yaxis = dict(title = ""),
                annotations = go.Annotations([
                    go.Annotation(
                        x = 0.5,
                        y = -0.16191064079952971,
                        showarrow=False,
                        text = 'Move cursor to watch song',
                        xref = 'paper',
                        yref = 'paper',
                        opacity = 0.6
        )]),
            )
    py.iplot({'data': data, 'layout': layout}, validate=False, show_link=False)

scatter("Ciggaretes_After_Sex")

In [8]:
import sys
sys.path.append("../src")
from classifier import *

In [9]:
def features_raiting(target_class, n_validation=10, topn=10):
    tags, _, tag_bank = load_features_bank(tag_songs_stat)
    flags, _, _ = mark_tag(target_class, tags)

    s_err = 0.0
    i_val = 0.0
    for n in range(n_validation):
        clf = binary_classifier(tag_bank, flags, 1.0, False)
        i_val += clf.feature_importances_
        s_err += clf.score(tag_bank, flags)

    return sorted([[features_names[j], round3(i_val[j] / n_validation)]
           for j in range(len(features_names))],
                  key=lambda x: x[1], reverse=True)[:topn]

def fn_pure(fn):
    return os.path.splitext(os.path.basename(fn))[0]

cnames = glob.glob("../data/classifiers/*.clf")

imp = []
for tag in cnames:
    imp.append(features_raiting(fn_pure(tag)))

In [None]:
def add_bar(fig, x, y, n_row, n_col):
    global chart_count

    data = go.Bar(
            x=y,
            y=x,
            name="",
            marker = dict(color='purple'),
            opacity=0.7,
            orientation="h"
    )
    fig.append_trace(data, (chart_count // n_col) + 1, (chart_count % n_col) + 1)
    fig['layout']['yaxis{0}'.format(chart_count+1)].update(ticklen=3, autorange='reversed')
    fig['layout']['yaxis{0}'.format(chart_count+1)]['tickfont'].update(size=11,
                                                                       color="lightgrey")
    fig['layout']['xaxis{0}'.format(chart_count+1)].update(showticklabels=False)
    fig['layout']['margin'].update(l=140)
    chart_count += 1

def draw_bars():
    n_row = len(cnames)//2
    n_col = 2
    n_charts = n_row * n_col

    fig = tools.make_subplots(rows=n_row, cols=n_col, print_grid=False,horizontal_spacing = 0.3,
                              subplot_titles=[fn_pure(x) for x in cnames])

    for j in range(len(imp)):
        x1 = [fn_pure(x[0]) for x in imp[j]]
        x2 = [x[1] for x in imp[j]]
        add_bar(fig, x1, x2, n_row, n_col)

    fig['layout'].update(height=700, width=750, showlegend=False)

    py.iplot(fig, show_link=False)
  
chart_count = 0
draw_bars()