# Data visualisations

In [4]:
import matplotlib.patheffects as PathEffects
from gensim.models.doc2vec import Doc2Vec
import numpy as np
from sklearn.manifold import TSNE

import seaborn as sns
from matplotlib import pyplot as plt


In [1]:
# Utility function to visualize the outputs of t-SNE
def draw_scatter(model_name, tsne_coords, colours):
    #choose colour palette with seaborn
    num_classes = len(np.unique(colours))
    assert num_classes == 20
    palette = np.array(sns.color_palette("hls", num_classes))
        
    # create a scatter plot.
    f = plt.figure(figsize=(8, 8))
    ax = plt.subplot(aspect='equal')
    sc = ax.scatter(tsne_coords[:,0], tsne_coords[:,1], lw=0, s=40, c=palette[colours.astype(np.int)])
    plt.xlim(-25, 25)
    plt.ylim(-25, 25)
    
    plt.title('My title')

    ax.axis('off')
    ax.axis('tight')
    
    # add the labels for each digit corresponding to the label
    txts = []

    for i in range(num_classes):

        # Position of each label at median of data points.

        xtext, ytext = np.median(tsne_coords[colours == i, :], axis=0)
        txt = ax.text(xtext, ytext, str(i), fontsize=10)
        txt.set_path_effects([
            PathEffects.Stroke(linewidth=5, foreground="w"),
            PathEffects.Normal()])
        txts.append(txt)
    
    model_fig_name = model_name.split('.')[0] + '_fig.png'
    plt.savefig(('figs/'+model_fig_name), dpi=120)

In [5]:
def draw_tsne(model_name, doc_vectors, labels):
    tsne_coords = TSNE(n_components=2, verbose=1, random_state=0, angle=.99, init='pca').fit_transform(doc_vectors)
#     tsne_coords = TSNE(n_components=2).fit_transform(doc_vectors)
    
    colours = np.asarray([int(labels[i]) for i in range(0, len(labels))])
    
    draw_scatter(model_name, tsne_coords, colours)