# Code to plot the cosine similarity between topic vectors belonging to different partitions of the entire dataset
* for each partition it plots a heatmap of the cosine similarity between topic vectors belonging to different windows

In [None]:
import os
from time import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%pylab inline

In [None]:
from gensim import corpora, models, similarities
from scipy.spatial.distance import cosine

## INPUT PARAMETERS

In [None]:
# parameters to select the topic2vec model to load
n_docs = 11314
n_window_t2v = 2
random_seeds_partition = [55]
n_topics = 8

In [None]:
cwd =  os.getcwd() # Prints the working directory
results_dir_path = cwd + '/results/DBOW/20NG_lemmatiz_DBOW_win5_n_topics' + str (n_topics) +'_n_doc' + str(n_docs) + '_n_win' + str(n_window_t2v)


In [None]:
topic_list = []
for i in xrange(n_topics):
    topic_list.append('topic_' + str(i))

In [None]:
for i_random_seed in random_seeds_partition:

    topic_to_topic = np.zeros((n_topics,n_topics))
    
    fig, axes = plt.subplots(nrows=1, ncols=n_window_t2v-1)
    axes.set_position([0.1, 0.15, 0.7, 0.7])
    fig.set_size_inches(8, 8)
    fig.suptitle('Topic similarity interwindows\n partition seed ' + str(i_random_seed), size = 20, y = 0.93)

    for i_window in xrange(n_window_t2v-1):

        fname_win1 = results_dir_path + '/t2v_20NG_partSEED' + str(i_random_seed) + '_win' + str(i_window) + '.model'
        model_win1 = models.Doc2Vec.load(fname_win1)
        
        fname_win2 = results_dir_path + '/t2v_20NG_partSEED' + str(i_random_seed) + '_win' + str(i_window+1) + '.model'
        model_win2 = models.Doc2Vec.load(fname_win2)
        
        for idx,i_topic in enumerate(topic_list):
            vec_a = model_win1.docvecs[i_topic]
            for jdx,j_topic in enumerate(topic_list):
                vec_b = model_win2.docvecs[j_topic]
                
                topic_to_topic[idx,jdx] = 1 - cosine(vec_a, vec_b)
            
        # Plot it out
#        ax = axes[i_window]
        ax = axes
        heatmap = ax.pcolor(topic_to_topic, cmap=plt.cm.Blues, alpha=0.8, vmin=-0.2, vmax=0.2)
        ax.set_xlabel('Window '+ str(i_window+1), size = 20)
        ax.set_ylabel('Window '+ str(i_window), size = 20)
    
        # turn off the frame
        ax.set_frame_on(False)

        # note I could have used nba_sort.columns but made "labels" instead
        ax.set_xticklabels([], minor=False, rotation='vertical')
        ax.set_yticklabels([], minor=False)

        for t in ax.xaxis.get_major_ticks():
            t.tick1On = False
            t.tick2On = False
        for t in ax.yaxis.get_major_ticks():
            t.tick1On = False
            t.tick2On = False
    

    # Add colorbar, make sure to specify tick locations to match desired ticklabels
    # Make an axis for the colorbar on the right side
    cax = fig.add_axes([0.85, 0.15, 0.02, 0.7])
    cbar = fig.colorbar(heatmap, cax=cax, ticks=[-0.3, -0.15, 0, 0.15, 0.3])
    cbar.ax.set_yticklabels(['-0.3', '-0.15', '0', '0.15', '0.3'], size = 20)  # vertically oriented colorbar
     
        
    fig.savefig(results_dir_path +'/topic_similarity_interwindows_partSEED' + str(i_random_seed) + '.png', bbox = 'True', dpi = 200)