In [1]:
%matplotlib inline
import csv
import umap
import pickle
import gensim
import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from gensim.models.callbacks import CallbackAny2Vec

import bokeh.plotting as bp
from bokeh.models import HoverTool, BoxSelectTool, CustomJS
from bokeh.plotting import figure, show, output_notebook
from bokeh.embed import components, file_html
from bokeh.resources import CDN, Resources
from bokeh.models.widgets import TextInput
from bokeh.layouts import column
from bokeh.core.properties import Instance
from bokeh.util.compiler import TypeScript
from bokeh.models.widgets.inputs import InputWidget, String
from bokeh.models.callbacks import Callback

In [2]:
class logger(CallbackAny2Vec):
    None
    
embedding_model = gensim.models.Word2Vec.load('../models/track2vec')
print(embedding_model)
# embedding = embedding_model.wv.syn0
embedding = embedding_model.wv.vectors

Word2Vec<vocab=956918, vector_size=100, alpha=0.025>


In [3]:
tracks = pickle.load(open("../../deej-ai.online-app/model/spotify_tracks.p", "rb"))
urls = pickle.load(open("../../deej-ai.online-app/model/spotify_urls.p", "rb"))

In [4]:
# mp3tovecs = pickle.load(open('../spotifytovec.p', 'rb'))
mp3tovecs = pickle.load(open('../../deej-ai.online-app/model/spotifytovec.p', 'rb'))
w2c = []
for item in mp3tovecs:
    if item in tracks and tracks[item][1] != '':
        w2c.append((item, tracks[item], urls[item]))#, embedding_model.wv.vocab[item].count))
# w2c = sorted(w2c, key=lambda x: x[3], reverse=True)
len(w2c)

956917

In [5]:
# custom widget that only has callback when the focus changes

output_notebook()

TS_CODE = """
import {TextInput, TextInputView} from "models/widgets/text_input"

import * as p from "core/properties"

export class MyTextInputView extends TextInputView {
  model: MyTextInput

  change_input(): void {
    this.model.value = this.input_el.value
    super.change_input()
  }
  
  change_input_oninput(): void {
    this.model.value_input = this.input_el.value
  }
}

export namespace MyTextInput {
  export type Attrs = p.AttrsOf<Props>

  export type Props = TextInput.Props
}

export interface MyTextInput extends MyTextInput.Attrs {}

export class MyTextInput extends TextInput {
  properties: MyTextInput.Props

  constructor(attrs?: Partial<MyTextInput.Attrs>) {
    super(attrs)
  }

  static init_MyTextInput(): void {
    this.prototype.default_view = MyTextInputView
  }
}
"""


class MyTextInput(InputWidget):
    ''' Single-line input widget.
    '''

    __implementation__ = TypeScript(TS_CODE)
    
    value = String(default="", help="""
    Initial or entered text value.
    Change events are triggered whenever <enter> is pressed.
    """)
    
    callback = Instance(Callback, help="""
    A callback to run in the browser whenever the current Slider value changes.
    """)

In [10]:
tooltips = """
    <audio class="preview">
        <source src="@url" type="audio/mp3">
    </audio>
    <div style="font-family: Calibri; font-size: 12px;">
        <span class="play" style="visibility: hidden;">▶️</span>@track
    </div>
"""
code = """
    function next_track(previews, plays, i) {
        if (i > 0) {
            plays[i-1].style.visibility = "hidden";
        }
        if (previews[i]) {
            previews[i].onended = function() {
                next_track(previews, plays, i+1);
            }
            plays[i].style.visibility = "visible";
            previews[i].play();
        }
    }
    next_track(document.getElementsByClassName("preview"), document.getElementsByClassName("play"), 0);
"""


def plot_tsne(embedding_matrix, index_to_word, num_words = None, title = 't-SNE',
              random_state=None, tsne_df=None, perplexity=30):
    matrix = embedding_matrix
    if num_words is not None:
        matrix = matrix[:num_words,:]
    info = [index_to_word.get(_, ("<UNK>", "")) for _ in range(matrix.shape[0])]
    tracks = [_[0] for _ in info]
    urls = [_[1] for _ in info]
    
    # defining the chart
    output_notebook(Resources(mode="cdn", components=["bokeh", "bokeh-gl"]))
    fig = bp.figure(output_backend="webgl", sizing_mode='stretch_both', title=title,
                    tools="pan,wheel_zoom,box_zoom,reset,undo,hover",
                    x_axis_type=None, y_axis_type=None)

    # dimensionality reduction. converting the vectors to 2d vectors
    
    tsne_model = TSNE(n_components=2, verbose=2, random_state=random_state, perplexity=perplexity, n_jobs=-1)
#     tsne_model = umap.UMAP(verbose=True)
    tsne_w2v = tsne_model.fit_transform(matrix)

    # putting everything in a dataframe
    tsne_df = pd.DataFrame(tsne_w2v, columns=['x', 'y'])
    tsne_df['track'] = tracks
    tsne_df['url'] = urls

    # plotting. the corresponding word appears when you hover on the data point.
    fig.scatter(x='x', y='y', fill_color='blue', line_color='blue', source=tsne_df, line_alpha=0.5, fill_alpha=0.5)
    hover = fig.select(dict(type=HoverTool))
    hover.tooltips = tooltips
    hover.callback = CustomJS(args={}, code=code)
    hover.renderers = [fig.renderers[0]]

    fig.scatter(x='x', y='y', fill_color='red', line_color='red', source=pd.DataFrame(columns=['x', 'y', 'track', 'url']), line_alpha=1, fill_alpha=1)
    textinput = MyTextInput(value="", title="Find tracks:")
    textinput.callback = CustomJS(args=dict(source=fig.renderers[0].data_source, source2=fig.renderers[1].data_source), code="""
        indices = [];
        if (this.value.trim() != '') {
            text = this.value.split(' ');
            for (i = 0; i < source.data['track'].length; i++) {
                found = true;
                for (j = 0; j < text.length; j++) {
                    if (!source.data['track'][i].toLowerCase().includes(text[j].toLowerCase())) {
                        found = false;
                        break;
                    }
                }
                if (found) {
                    indices.push(i);
                }
            }
        }
        source2.data['x'] = new Float32Array(indices.length);
        source2.data['y'] = new Float32Array(indices.length);
        source2.data['track'] = [];
        source2.data['url'] = [];
        source2.data['index'] = [];
        for (i = 0; i < indices.length; i++) {
            source2.data['x'][i] = source.data['x'][indices[i]];
            source2.data['y'][i] = source.data['y'][indices[i]];
            source2.data['track'].push(source.data['track'][indices[i]]);
            source2.data['url'].push(source.data['url'][indices[i]]);
            source2.data['index'].push(i);
        }
        source2.change.emit();
    """)
    layout = column(fig, textinput, sizing_mode='scale_width')
    return layout, tsne_df

In [11]:
track_names = dict([(i, (_[1], _[2])) for i, _ in enumerate(w2c)])
# vectors = np.concatenate([np.expand_dims(embedding[embedding_model.wv.vocab[_[0]].index], axis=0) for _ in w2c])
vectors = np.concatenate([np.expand_dims(embedding[embedding_model.wv.key_to_index[_[0]]], axis=0) for _ in w2c])

In [None]:
layout, tsne_df = plot_tsne(vectors, track_names, random_state = 616906225, title = f'Track2Vec (creativity = 0) t-SNE projection from 100 dimensions into 2 of {len(track_names):,} Spotify tracks')

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 956917 samples in 25.091s...


In [None]:
output_notebook()
#show(layout)

In [None]:
script, div = components(layout)

In [None]:
html = """
<!DOCTYPE html>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <title>Track2Vec t-SNE</title>
        <script src="https://cdn.bokeh.org/bokeh/release/bokeh-1.4.0.min.js"></script>
        <script src="https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js"></script>
        <script src="https://cdn.bokeh.org/bokeh/release/bokeh-widgets-1.4.0.min.js"></script>
        <script src="https://cdn.bokeh.org/bokeh/release/bokeh-tables-1.4.0.min.js"></script>
""" + script + """
    </head>
    <body>
""" + div + """
    </body>
</html>
"""

In [None]:
with open('tracktovec.html', 'wt') as file:
    file.write(html)

In [None]:
track_names = dict([(i, (_[1], _[2])) for i, _ in enumerate(w2c)])
vectors = np.concatenate([np.expand_dims(mp3tovecs[_[0]], axis=0) for _ in w2c])

In [18]:
layout, tsne_df = plot_tsne(vectors, track_names, random_state = 615906225, title = f'Mp3ToVec (creativity = 1) t-SNE projection from 100 dimensions into 2 of {len(track_names):,} Spotify tracks')

[t-SNE] Computed neighbors for 956917 samples in 1834.586s...
[t-SNE] Computed conditional probabilities for sample 1000 / 956917
[t-SNE] Computed conditional probabilities for sample 2000 / 956917
[t-SNE] Computed conditional probabilities for sample 3000 / 956917
[t-SNE] Computed conditional probabilities for sample 4000 / 956917
[t-SNE] Computed conditional probabilities for sample 5000 / 956917
[t-SNE] Computed conditional probabilities for sample 6000 / 956917
[t-SNE] Computed conditional probabilities for sample 7000 / 956917
[t-SNE] Computed conditional probabilities for sample 8000 / 956917
[t-SNE] Computed conditional probabilities for sample 9000 / 956917
[t-SNE] Computed conditional probabilities for sample 10000 / 956917
[t-SNE] Computed conditional probabilities for sample 11000 / 956917
[t-SNE] Computed conditional probabilities for sample 12000 / 956917
[t-SNE] Computed conditional probabilities for sample 13000 / 956917
[t-SNE] Computed conditional probabilities for sam

[t-SNE] Computed conditional probabilities for sample 130000 / 956917
[t-SNE] Computed conditional probabilities for sample 131000 / 956917
[t-SNE] Computed conditional probabilities for sample 132000 / 956917
[t-SNE] Computed conditional probabilities for sample 133000 / 956917
[t-SNE] Computed conditional probabilities for sample 134000 / 956917
[t-SNE] Computed conditional probabilities for sample 135000 / 956917
[t-SNE] Computed conditional probabilities for sample 136000 / 956917
[t-SNE] Computed conditional probabilities for sample 137000 / 956917
[t-SNE] Computed conditional probabilities for sample 138000 / 956917
[t-SNE] Computed conditional probabilities for sample 139000 / 956917
[t-SNE] Computed conditional probabilities for sample 140000 / 956917
[t-SNE] Computed conditional probabilities for sample 141000 / 956917
[t-SNE] Computed conditional probabilities for sample 142000 / 956917
[t-SNE] Computed conditional probabilities for sample 143000 / 956917
[t-SNE] Computed con

[t-SNE] Computed conditional probabilities for sample 254000 / 956917
[t-SNE] Computed conditional probabilities for sample 255000 / 956917
[t-SNE] Computed conditional probabilities for sample 256000 / 956917
[t-SNE] Computed conditional probabilities for sample 257000 / 956917
[t-SNE] Computed conditional probabilities for sample 258000 / 956917
[t-SNE] Computed conditional probabilities for sample 259000 / 956917
[t-SNE] Computed conditional probabilities for sample 260000 / 956917
[t-SNE] Computed conditional probabilities for sample 261000 / 956917
[t-SNE] Computed conditional probabilities for sample 262000 / 956917
[t-SNE] Computed conditional probabilities for sample 263000 / 956917
[t-SNE] Computed conditional probabilities for sample 264000 / 956917
[t-SNE] Computed conditional probabilities for sample 265000 / 956917
[t-SNE] Computed conditional probabilities for sample 266000 / 956917
[t-SNE] Computed conditional probabilities for sample 267000 / 956917
[t-SNE] Computed con

[t-SNE] Computed conditional probabilities for sample 385000 / 956917
[t-SNE] Computed conditional probabilities for sample 386000 / 956917
[t-SNE] Computed conditional probabilities for sample 387000 / 956917
[t-SNE] Computed conditional probabilities for sample 388000 / 956917
[t-SNE] Computed conditional probabilities for sample 389000 / 956917
[t-SNE] Computed conditional probabilities for sample 390000 / 956917
[t-SNE] Computed conditional probabilities for sample 391000 / 956917
[t-SNE] Computed conditional probabilities for sample 392000 / 956917
[t-SNE] Computed conditional probabilities for sample 393000 / 956917
[t-SNE] Computed conditional probabilities for sample 394000 / 956917
[t-SNE] Computed conditional probabilities for sample 395000 / 956917
[t-SNE] Computed conditional probabilities for sample 396000 / 956917
[t-SNE] Computed conditional probabilities for sample 397000 / 956917
[t-SNE] Computed conditional probabilities for sample 398000 / 956917
[t-SNE] Computed con

[t-SNE] Computed conditional probabilities for sample 511000 / 956917
[t-SNE] Computed conditional probabilities for sample 512000 / 956917
[t-SNE] Computed conditional probabilities for sample 513000 / 956917
[t-SNE] Computed conditional probabilities for sample 514000 / 956917
[t-SNE] Computed conditional probabilities for sample 515000 / 956917
[t-SNE] Computed conditional probabilities for sample 516000 / 956917
[t-SNE] Computed conditional probabilities for sample 517000 / 956917
[t-SNE] Computed conditional probabilities for sample 518000 / 956917
[t-SNE] Computed conditional probabilities for sample 519000 / 956917
[t-SNE] Computed conditional probabilities for sample 520000 / 956917
[t-SNE] Computed conditional probabilities for sample 521000 / 956917
[t-SNE] Computed conditional probabilities for sample 522000 / 956917
[t-SNE] Computed conditional probabilities for sample 523000 / 956917
[t-SNE] Computed conditional probabilities for sample 524000 / 956917
[t-SNE] Computed con

[t-SNE] Computed conditional probabilities for sample 638000 / 956917
[t-SNE] Computed conditional probabilities for sample 639000 / 956917
[t-SNE] Computed conditional probabilities for sample 640000 / 956917
[t-SNE] Computed conditional probabilities for sample 641000 / 956917
[t-SNE] Computed conditional probabilities for sample 642000 / 956917
[t-SNE] Computed conditional probabilities for sample 643000 / 956917
[t-SNE] Computed conditional probabilities for sample 644000 / 956917
[t-SNE] Computed conditional probabilities for sample 645000 / 956917
[t-SNE] Computed conditional probabilities for sample 646000 / 956917
[t-SNE] Computed conditional probabilities for sample 647000 / 956917
[t-SNE] Computed conditional probabilities for sample 648000 / 956917
[t-SNE] Computed conditional probabilities for sample 649000 / 956917
[t-SNE] Computed conditional probabilities for sample 650000 / 956917
[t-SNE] Computed conditional probabilities for sample 651000 / 956917
[t-SNE] Computed con

[t-SNE] Computed conditional probabilities for sample 772000 / 956917
[t-SNE] Computed conditional probabilities for sample 773000 / 956917
[t-SNE] Computed conditional probabilities for sample 774000 / 956917
[t-SNE] Computed conditional probabilities for sample 775000 / 956917
[t-SNE] Computed conditional probabilities for sample 776000 / 956917
[t-SNE] Computed conditional probabilities for sample 777000 / 956917
[t-SNE] Computed conditional probabilities for sample 778000 / 956917
[t-SNE] Computed conditional probabilities for sample 779000 / 956917
[t-SNE] Computed conditional probabilities for sample 780000 / 956917
[t-SNE] Computed conditional probabilities for sample 781000 / 956917
[t-SNE] Computed conditional probabilities for sample 782000 / 956917
[t-SNE] Computed conditional probabilities for sample 783000 / 956917
[t-SNE] Computed conditional probabilities for sample 784000 / 956917
[t-SNE] Computed conditional probabilities for sample 785000 / 956917
[t-SNE] Computed con

[t-SNE] Computed conditional probabilities for sample 896000 / 956917
[t-SNE] Computed conditional probabilities for sample 897000 / 956917
[t-SNE] Computed conditional probabilities for sample 898000 / 956917
[t-SNE] Computed conditional probabilities for sample 899000 / 956917
[t-SNE] Computed conditional probabilities for sample 900000 / 956917
[t-SNE] Computed conditional probabilities for sample 901000 / 956917
[t-SNE] Computed conditional probabilities for sample 902000 / 956917
[t-SNE] Computed conditional probabilities for sample 903000 / 956917
[t-SNE] Computed conditional probabilities for sample 904000 / 956917
[t-SNE] Computed conditional probabilities for sample 905000 / 956917
[t-SNE] Computed conditional probabilities for sample 906000 / 956917
[t-SNE] Computed conditional probabilities for sample 907000 / 956917
[t-SNE] Computed conditional probabilities for sample 908000 / 956917
[t-SNE] Computed conditional probabilities for sample 909000 / 956917
[t-SNE] Computed con

In [19]:
output_notebook()
#show(layout)

In [20]:
script, div = components(layout)

In [21]:
html = """
<!DOCTYPE html>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <title>Mp3ToVec t-SNE</title>
        <script src="https://cdn.bokeh.org/bokeh/release/bokeh-1.4.0.min.js"></script>
        <script src="https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js"></script>
        <script src="https://cdn.bokeh.org/bokeh/release/bokeh-widgets-1.4.0.min.js"></script>
        <script src="https://cdn.bokeh.org/bokeh/release/bokeh-tables-1.4.0.min.js"></script>
""" + script + """
    </head>
    <body>
""" + div + """
    </body>
</html>
"""

In [22]:
with open('mp3tovec.html', 'wt') as file:
    file.write(html)

In [None]:
tsne_df.head()

In [None]:
tsne_df.describe()

In [None]:
tsne_df[tsne_df['track'].str.contains('Monolink')]