In [3]:
import os
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
import math
import numpy as np

HOME_PATH = os.path.expanduser('~')
PICKLES_PATH = HOME_PATH + '/Projects/tminm/pickles/'
HEAVY_PICKLES_PATH = HOME_PATH + '/Projects/tminm/heavy_pickles/'

ROW_LENGTH = 62
COLUMN_LENGTH = 1.25
NUMBER_OF_ROWS = 8

In [19]:
docs = pd.read_pickle(HEAVY_PICKLES_PATH + 'gen_corpus.pickle').to_dict()
# docs = pd.read_pickle(HEAVY_PICKLES_PATH + 'seq_gen_corpus.pickle').to_dict()
document_ids_map = pd.read_pickle(PICKLES_PATH + 'doc_id_mapping.pickle').to_dict()
vocab = pd.read_pickle(HEAVY_PICKLES_PATH + 'vocab.pickle').tolist()
inverted_document_ids_map = {v: int(k) for k, v in document_ids_map.iteritems()}

In [20]:
def generate_coordinates(data_size, column_size, row_size):    
    x_coord = 0
    y_coord = NUMBER_OF_ROWS - 1
    row_scan = True
    scanning_right = True
    coordinates = {}
    for j in range(0, NUMBER_OF_ROWS):
        for i in range(0, row_size):
            doc_id = i + column_size * j + row_size * j
            coordinates[doc_id] = {'x': x_coord, 'y': y_coord}
            # Switching horizontal scanning to vertical.
            if (i % (row_size - 1) == 0 and i != 0):
                y_coord -= 1
                continue
            if scanning_right:
                x_coord += 1
            else:
                x_coord -= 1         
        # Reversing the scanner's direction.
        scanning_right = not scanning_right
    return coordinates

def calculate_the_sizes(data, number_of_rows, row_length, column_length):
    data_size = len(data)
    single_scan_size = (data_size + 1) / number_of_rows
    column_size = int(math.floor(single_scan_size \
    * (column_length / (row_length + column_length))))
    row_size = single_scan_size - column_size 
    return data_size, single_scan_size, column_size, row_size

data_size, single_scan_size, column_size, row_size = \
    calculate_the_sizes(docs, NUMBER_OF_ROWS, ROW_LENGTH, COLUMN_LENGTH)
coordinates_mapping = generate_coordinates(data_size, column_size, row_size)

# word_key = "{'start': 374.97097984961994, 'end': 375.22268528175505}"
word_key = "{'start': 183.2662009945702, 'end': 183.45152875032315}"
print(docs)

z_matrix = np.zeros(shape=(NUMBER_OF_ROWS, row_size)) 
for i, doc in docs.iteritems():
    if i in coordinates_mapping and word_key in doc:
        doc_coord = coordinates_mapping[i]
        z_matrix[doc_coord['y']][doc_coord['x']] = doc[word_key]


{0: {"{'start': 941.97620219674525, 'end': 943.22028181914675}": 3, "{'start': 271.29523612200592, 'end': 271.48026038031566}": 2, "{'start': 954.76533510951174, 'end': 956.65472865418838}": 3, "{'start': 428.18302162658159, 'end': 428.5262208435787}": 25, "{'start': 639.72581665868427, 'end': 640.03565423900648}": 25, "{'start': 273.41829932217257, 'end': 273.6301843088026}": 4, "{'start': 234.1014252034775, 'end': 234.69856757186398}": 4, "{'start': 1713.4809322811316, 'end': 1714.2776255492458}": 3, "{'start': 154.03161217386258, 'end': 154.11300801643134}": 8, "{'start': 586.25549849996855, 'end': 587.00761614604767}": 1, "{'start': 545.12406667289031, 'end': 545.59665975758139}": 41, "{'start': 463.88737518159661, 'end': 464.10541442241555}": 12, "{'start': 208.90313702883239, 'end': 208.94479559709015}": 1, "{'start': 524.15076855060522, 'end': 524.19311892031033}": 2, "{'start': 427.78535110949451, 'end': 427.94908636035296}": 1, "{'start': 398.70426010673503, 'end': 398.7204362

In [18]:
import plotly.plotly as py
import plotly.graph_objs as go

x_vector = np.zeros(shape=(row_size, 1))
for i in range(0, row_size):
    x_vector[i] = i
y_vector = np.zeros(shape=(NUMBER_OF_ROWS, 1))
for i in range(0, NUMBER_OF_ROWS):
    y_vector[i] = i

layout = go.Layout(
    height=350,
    xaxis=dict(title='x'),
    yaxis=dict(title='y')
)
data = [
    go.Heatmap(
        z=z_matrix,
        x=x_vector,
        y=y_vector,
        colorscale='Electric',
        colorbar=dict(title='Number of Occurences')
    )
]
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)