In [1]:
import os
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
import math
import numpy as np

HOME_PATH = os.path.expanduser('~')
DATA_PATH = HOME_PATH + '/Projects/ssmsi/pickles/corpora/generated-corpora/'
AUX_DATA_PATH = HOME_PATH + '/Projects/ssmsi/pickles/corpora/a-to-h/'

ROW_LENGTH = 62
COLUMN_LENGTH = 1.25
NUMBER_OF_ROWS = 8

vocab = pd.read_pickle(AUX_DATA_PATH + 'a-to-h_vocab.pickle').tolist()

### Option: A-to-h corpus 

In [2]:
docs = pd.read_pickle(AUX_DATA_PATH + 'a-to-h_corpus.pickle').to_dict()
word_key = '(374.97097984961994, 375.22268528175505)'

### Option: Naivelly generated corpus

In [2]:
docs = pd.read_pickle(DATA_PATH + 'naive_corpus.pickle').to_dict()
word_key = "{'start': 764.02336015008007, 'end': 764.1950241267549}"

### Option: Dynamically generated corpus

In [2]:
docs = pd.read_pickle(DATA_PATH + 'seq_corpus.pickle').to_dict()
word_key = "{'start': 638.58361506523988, 'end': 638.66990759572423}"

### Creating the coordinate map

In [3]:
def calculate_the_sizes(data, number_of_rows, row_length, column_length):
    data_size = len(data)
    single_scan_size = (data_size + 1) / number_of_rows
    column_size = int(math.floor(single_scan_size \
    * (column_length / (row_length + column_length))))
    row_size = single_scan_size - column_size 
    return data_size, single_scan_size, column_size, row_size

data_size, \
single_scan_size, \
column_size, \
row_size = calculate_the_sizes(docs, NUMBER_OF_ROWS, ROW_LENGTH, COLUMN_LENGTH)

In [4]:
def generate_coordinates(data_size, column_size, row_size):    
    x_coord = 0
    y_coord = NUMBER_OF_ROWS - 1
    row_scan = True
    scanning_right = True
    coordinates = {}
    for j in range(0, NUMBER_OF_ROWS):
        for i in range(0, row_size):
            doc_id = i + column_size * j + row_size * j
            coordinates[doc_id] = {'x': x_coord, 'y': y_coord}
            # Switching horizontal scanning to vertical.
            if (i % (row_size - 1) == 0 and i != 0):
                y_coord -= 1
                continue
            if scanning_right:
                x_coord += 1
            else:
                x_coord -= 1         
        # Reversing the scanner's direction.
        scanning_right = not scanning_right
    return coordinates

coordinates_mapping = generate_coordinates(data_size, column_size, row_size)

### Mapping the documents to the coordinates

In [5]:
# Mapping to the z values.
z_matrix = np.zeros(shape=(NUMBER_OF_ROWS, row_size)) 
for i, doc in docs.iteritems():
    if word_key in doc and int(i) in coordinates_mapping:
        doc_coord = coordinates_mapping[int(i)]
        z_matrix[doc_coord['y']][doc_coord['x']] = doc[word_key]

# Creating the x and y axes.         
x_vector = np.zeros(shape=(row_size, 1))
for i in range(0, row_size):
    x_vector[i] = i
y_vector = np.zeros(shape=(NUMBER_OF_ROWS, 1))
for i in range(0, NUMBER_OF_ROWS):
    y_vector[i] = i

In [6]:
import plotly.plotly as py
import plotly.graph_objs as go

layout = go.Layout(
    height=350,
    xaxis=dict(title='x'),
    yaxis=dict(title='y')
)
data = [
    go.Heatmap(
        z=z_matrix,
        x=x_vector,
        y=y_vector,
        colorscale='Electric',
        colorbar=dict(title='Total Intensity')
    )
]
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)