# Basic D3 Visualization

Step 1: Make some chartable data

In [1]:
import gzip
import numpy as np

In [2]:
from jupyter_core.paths import jupyter_config_dir
jupyter_dir = jupyter_config_dir()
jupyter_dir

'/home/srush/.jupyter'

In [3]:
def load_wvec(fname, vocab=10000):                                                                                                                                                                                      
    """                                                                                                                                                                                                              
    Loads 300x1 word vecs from Google (Mikolov) word2vec                                                                                                                                                             
    """                                                                                                                                                                                                              
    word_vecs = {}                                                                                                                                                                                                   
    with gzip.open(fname, "rb") as f:                                                                                                                                                                                     
        header = f.readline()                                                                                                                                                                                        
        vocab_size, layer1_size = map(int, header.split())                                                                                                                                                           
        binary_len = np.dtype('float32').itemsize * layer1_size
        
        vecs = np.zeros((vocab, layer1_size))
        print(vecs.shape)
        words = []
        for i, line in enumerate(range(vocab)):
            word = []
            while True:                                                                                                                                                                                              
                ch = f.read(1)
                if ch == b' ':
                    word = ''.join(word)
                    break
                if ch != b'\n':
                    word.append(ch.decode("utf-8", "ignore"))                
            vecs[i] = np.fromstring(f.read(binary_len), dtype='float32')                                                                                                                                  
            words.append(word)
# #             else:                                                                                                                                                                                                    
# #                 f.read(binary_len)                                                                                                                                                                                   
    return (vecs, words)
word_vecs, words  = load_wvec("/home/srush/data/GoogleNews-vectors-negative300.bin.gz", 1000)

(1000, 300)


In [5]:
import sklearn.decomposition
import sklearn.manifold

pca = sklearn.decomposition.pca.PCA(n_components=2).fit_transform(word_vecs)
tsne = sklearn.manifold.t_sne.TSNE(n_components=2)\
    .fit_transform(sklearn.decomposition.pca.PCA(n_components=5).fit_transform(word_vecs))

In [6]:
#import pandas
#df = pandas.DataFrame(word_vecs, words)
#a = df.to_json(orient="records")
#a[:10000]

d = []
for w, vec, vec2 in zip(words, pca, tsne):
    d.append({"word":w, "vals": list(vec), "tsne": list(vec2)})
import json
out = json.dumps(d)

So here's where we actually translate the data to the frontend. I also converted the data on the frontend; you 
could do it in Python if you so desired.

In [37]:
from IPython.display import Javascript
Javascript("""
           window.data_vecs={};
           """.format(out))

<IPython.core.display.Javascript object>

In [38]:
%%javascript
require.config({
    paths: {
        d3: '//cdnjs.cloudflare.com/ajax/libs/d3/4.2.1/d3.min',
        lodash: '//cdnjs.cloudflare.com/ajax/libs/lodash.js/4.14.1/lodash.min'
    }
});

<IPython.core.display.Javascript object>

And here it is! Pretty cool, right? Notably, this chart is interactive and responds to user input (tooltip).

D3 presented below without comment. The point is your arbitrary visualization code will work, too!

In [115]:
%%javascript
window.show = {}
window.transform = {}
window.plot_points = function plot_points(d3, svg){
    var showfn
    if (show == "pca") {
        showfn = function(d) {return d.vals}
    }
    else {
        showfn = function(d) {return d.tsne}
    }

    var xex = d3.extent(data_vecs, function (d) {return showfn(d)[0];});
    var yex = d3.extent(data_vecs, function (d) {return showfn(d)[1];});
    var x = transform.rescaleX(d3.scaleLinear().domain(xex).range([0, width/2]));
    var y = transform.rescaleY(d3.scaleLinear().domain(yex).range([0, height/2]));
    
    var dots = svg.selectAll('.dots')
        .data(data_vecs, function(d){return d.word})

    dots.exit().remove();

    dots
       .transition()
        .attr("x", function(d) { return x(showfn(d)[0]); })
        .attr("y", function(d) { return y(showfn(d)[1]); })
        .attr("fill", function(d) {
            if (d.marked) {
                return "red";
            } else { 
                return "black";
            }  } );

    dots
        .enter()        
        .append("text")
        .attr("class", "dots")
        .text(function(d) { return d.word; })
        .attr("x", function(d) { return x(showfn(d)[0]); })
        .attr("y", function(d) { return y(showfn(d)[1]); })
        .on("click", function(d) { d.marked = true; plot_points(d3, svg);} );
 
   
}

<IPython.core.display.Javascript object>

In [116]:
%%javascript 
window.width = 1000
window.height = 1000
require(['d3'], function(d3){
    $("#chart1").remove();
    $("#bod").remove();
    element.append("<div id='bod'></div>")
    element.append("<svg id='chart1' height="+height+" width="+width+"></svg>");  
    var container;

    function attach() {
        container = d3.select("#chart1")
                    .append('g');
                    //.attr('transform','translate(' + width/2 + ',' + height/2 + ')');
        transform = d3.zoomIdentity;
        console.log(transform);
        var zoom = d3.zoom()
            .scaleExtent([1, 10])
            .on("zoom", zoomed);
        show = "t-sne"
        
        plot_points(d3, container);

        function make_button(t, f) {
            return d3.select('#bod').append("div").append('a')
                .text(t)
                .on('click', f);
        }
        container.call(zoom);
        make_button("sne", function(){return plot_points(d3, container);})
        make_button("pca", function(){return plot_points(d3, container);})
    }
    function zoomed() {
        transform = d3.event.transform;
        plot_points(d3, container);
    }
    attach();
      

})

<IPython.core.display.Javascript object>