In [42]:
import os
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
import math
import numpy as np

HOME_PATH = os.path.expanduser('~')
PICKLES_PATH = HOME_PATH + '/Projects/tminm/pickles/'
HEAVY_PICKLES_PATH = HOME_PATH + '/Projects/tminm/heavy_pickles/'

ROW_LENGTH = 62
COLUMN_LENGTH = 1.25
NUMBER_OF_ROWS = 8

In [43]:
docs = pd.read_pickle(HEAVY_PICKLES_PATH + 'gen_corpus.pickle').to_dict()
# docs = pd.read_pickle(HEAVY_PICKLES_PATH + 'seq_gen_corpus.pickle').to_dict()
document_ids_map = pd.read_pickle(PICKLES_PATH + 'doc_id_mapping.pickle').to_dict()
vocab = pd.read_pickle(HEAVY_PICKLES_PATH + 'vocab.pickle').tolist()
inverted_document_ids_map = {v: int(k) for k, v in document_ids_map.iteritems()}

In [44]:
def generate_coordinates(data_size, column_size, row_size):    
    x_coord = 0
    y_coord = NUMBER_OF_ROWS - 1
    row_scan = True
    scanning_right = True
    coordinates = {}
    for j in range(0, NUMBER_OF_ROWS):
        for i in range(0, row_size):
            doc_id = i + column_size * j + row_size * j
            coordinates[doc_id] = {'x': x_coord, 'y': y_coord}
            # Switching horizontal scanning to vertical.
            if (i % (row_size - 1) == 0 and i != 0):
                y_coord -= 1
                continue
            if scanning_right:
                x_coord += 1
            else:
                x_coord -= 1         
        # Reversing the scanner's direction.
        scanning_right = not scanning_right
    return coordinates

def calculate_the_sizes(data, number_of_rows, row_length, column_length):
    data_size = len(data)
    single_scan_size = (data_size + 1) / number_of_rows
    column_size = int(math.floor(single_scan_size \
    * (column_length / (row_length + column_length))))
    row_size = single_scan_size - column_size 
    return data_size, single_scan_size, column_size, row_size

data_size, single_scan_size, column_size, row_size = \
    calculate_the_sizes(docs, NUMBER_OF_ROWS, ROW_LENGTH, COLUMN_LENGTH)
coordinates_mapping = generate_coordinates(data_size, column_size, row_size)

# word_key = "{'start': 374.97097984961994, 'end': 375.22268528175505}"
word_key = "{'start': 706.97473682118107, 'end': 709.47398627680934}"
# word_key = "{'start': 849.5159026964642, 'end': 852.00502121877867}"
print(docs)

z_matrix = np.zeros(shape=(NUMBER_OF_ROWS, row_size)) 
for i, doc in docs.iteritems():
    if i in coordinates_mapping and word_key in doc:
        doc_coord = coordinates_mapping[i]
        z_matrix[doc_coord['y']][doc_coord['x']] = doc[word_key]


{0: {"{'start': 179.72240481945596, 'end': 179.84201558601319}": 4, "{'start': 687.72336429701784, 'end': 689.82460840048145}": 2, "{'start': 257.81271068131315, 'end': 258.46418638509346}": 2, "{'start': 164.64115486647745, 'end': 164.65922933639968}": 1, "{'start': 1152.2544912885883, 'end': 1153.9168121255577}": 56, "{'start': 495.85949403352657, 'end': 495.87012126144896}": 28, "{'start': 299.66563754618085, 'end': 299.72167393285832}": 29, "{'start': 332.25995908074475, 'end': 332.3811052865687}": 2, "{'start': 378.06406035077714, 'end': 378.07585220030563}": 3, "{'start': 418.49577222358261, 'end': 418.9511583427954}": 1, "{'start': 272.99333759999467, 'end': 273.00153769729172}": 6, "{'start': 884.6214778197417, 'end': 895.03291992030927}": 2, "{'start': 197.63995697582897, 'end': 197.66283785638697}": 3, "{'start': 424.14900036462944, 'end': 426.35182965225692}": 1, "{'start': 702.10123225089603, 'end': 702.15296069915087}": 20, "{'start': 771.33147147726345, 'end': 773.8000297

In [45]:
import plotly.plotly as py
import plotly.graph_objs as go

x_vector = np.zeros(shape=(row_size, 1))
for i in range(0, row_size):
    x_vector[i] = i
y_vector = np.zeros(shape=(NUMBER_OF_ROWS, 1))
for i in range(0, NUMBER_OF_ROWS):
    y_vector[i] = i

layout = go.Layout(
    height=350,
    xaxis=dict(title='x'),
    yaxis=dict(title='y')
)
data = [
    go.Heatmap(
        z=z_matrix,
        x=x_vector,
        y=y_vector,
        colorscale='Electric',
        colorbar=dict(title='Number of Occurences')
    )
]
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)