In [1]:
import os
import pandas as pd
import numpy as np

HOME_PATH = os.path.expanduser('~') + '/Projects/ssmsi/'
DATA_PATH = HOME_PATH + 'pickles/corpora/generated-corpora/'
AUX_DATA_PATH = HOME_PATH + 'pickles/corpora/a-to-h/'

vocab = pd.read_pickle(AUX_DATA_PATH + 'a-to-h_vocab.pickle').tolist()
coordinates_mapping = pd.read_pickle(AUX_DATA_PATH + 'a-to-h_coord-map.pickle').to_dict()

### Option: A-to-h corpus 

In [2]:
docs = pd.read_pickle(AUX_DATA_PATH + 'a-to-h_corpus.pickle').to_dict()
word_key = '(374.97097984961994, 375.22268528175505)'

### Option: Naivelly generated corpus

In [2]:
docs = pd.read_pickle(DATA_PATH + 'naive_corpus.pickle').to_dict()
word_key = "{'start': 764.02336015008007, 'end': 764.1950241267549}"

### Option: Dynamically generated corpus

In [2]:
docs = pd.read_pickle(DATA_PATH + 'seq_corpus.pickle').to_dict()
word_key = "{'start': 638.58361506523988, 'end': 638.66990759572423}"

### Mapping the documents to the coordinates

In [3]:
NUMBER_OF_COLUMNS = 776
NUMBER_OF_ROWS = 8

# Mapping to the z values.
z_matrix = np.zeros(shape=(NUMBER_OF_ROWS, NUMBER_OF_COLUMNS)) 
for i, doc in docs.iteritems():
    if word_key in doc and int(i) in coordinates_mapping:
        doc_coord = coordinates_mapping[int(i)]
        z_matrix[doc_coord['y']][doc_coord['x']] = doc[word_key]

# Creating the x and y axes.         
x_vector = np.zeros(shape=(NUMBER_OF_COLUMNS, 1))
for i in range(0, NUMBER_OF_COLUMNS):
    x_vector[i] = i
y_vector = np.zeros(shape=(NUMBER_OF_ROWS, 1))
for i in range(0, NUMBER_OF_ROWS):
    y_vector[i] = i

### Plotting the corpus

In [4]:
import plotly.plotly as py
import plotly.graph_objs as go

layout = go.Layout(height=350,
                   xaxis=dict(title='x'),
                   yaxis=dict(title='y'))
data = [go.Heatmap(z=z_matrix,
                   x=x_vector,
                   y=y_vector,
                   colorscale='Electric',
                   colorbar=dict(title='Total Intensity'))]
fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [8]:
import pprint as pp

print pp.pprint(docs)

{0: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 198},
 1: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 225},
 2: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 193},
 3: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 177},
 4: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 193},
 5: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 190},
 6: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 186},
 7: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 218},
 8: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 184},
 9: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 199},
 10: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 190},
 11: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 197},
 12: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 220},
 13: {"{'start': 590.59195875787316, 'end': 591.31399616631097}": 183},
 1