In [8]:
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
import math
import numpy as np

ROW_LENGTH = 62
COLUMN_LENGTH = 1.25
NUMBER_OF_ROWS = 8

In [9]:
document_topic_probabilities = pd.read_pickle('../pickles/document_topic_probabilities.pickle')
document_ids_map = pd.read_pickle('../pickles/doc_id_mapping.pickle').to_dict()
inverted_document_ids_map = {v: int(k) for k, v in document_ids_map.iteritems()}

In [10]:
def calculate_the_sizes(data, number_of_rows, row_length, column_length):
    data_size = len(data)
    single_scan_size = (data_size + 1) / number_of_rows
    column_size = int(math.floor(single_scan_size \
    * (column_length / (row_length + column_length))))
    row_size = single_scan_size - column_size 
    return data_size, single_scan_size, column_size, row_size

In [12]:
def generate_coordinates(data_size, column_size, row_size):    
    x_coord = 0
    y_coord = NUMBER_OF_ROWS - 1
    row_scan = True
    scanning_right = True
    coordinates = {}
    for j in range(0, NUMBER_OF_ROWS):
        for i in range(0, row_size):
            doc_id = i + column_size * j + row_size * j
            coordinates[doc_id] = {'x': x_coord, 'y': y_coord}
            # Switching horizontal scanning to vertical.
            if (i % (row_size - 1) == 0 and i != 0):
                y_coord -= 1
                continue
            if scanning_right:
                x_coord += 1
            else:
                x_coord -= 1         
        # Reversing the scanner's direction.
        scanning_right = not scanning_right
    return coordinates

In [14]:
data_size, single_scan_size, column_size, row_size = \
    calculate_the_sizes(inverted_document_ids_map, NUMBER_OF_ROWS, ROW_LENGTH, COLUMN_LENGTH)
coordinates_mapping = generate_coordinates(data_size, column_size, row_size)

In [67]:
topic_id = 0

probability_matrix = np.zeros(shape=(NUMBER_OF_ROWS, row_size))
for i, doc in enumerate(document_topic_probabilities):
    doc_id = inverted_document_ids_map[i]
    if doc_id in coordinates_mapping:
        doc_coord = coordinates_mapping[doc_id]
        probability_matrix[doc_coord['y']][doc_coord['x']] = doc[topic_id]

In [68]:
import plotly.plotly as py
import plotly.graph_objs as go

x_vector = np.zeros(shape=(row_size, 1))
for i in range(0, row_size):
    x_vector[i] = i
y_vector = np.zeros(shape=(NUMBER_OF_ROWS, 1))
for i in range(0, NUMBER_OF_ROWS):
    y_vector[i] = i

layout = go.Layout(
    title='\'Topic %s\' probability distribution.' % topic_id,
    height=350,
    xaxis=dict(title='x'),
    yaxis=dict(title='y')
)
data = [
    go.Heatmap(
        z=probability_matrix,
        x=x_vector,
        y=y_vector,
        colorscale='Electric',
        colorbar=dict(title='Percentage')
    )
]
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)