In [1]:
import sys
import os
import psycopg2
import pandas as pd

In [2]:
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.charts import Scatter, output_file, show
from bokeh.models import HoverTool
import matplotlib as mpl
import matplotlib.cm as cmx

In [3]:
output_notebook()

TOOLS = "pan,wheel_zoom,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select"

In [4]:
conn = psycopg2.connect("host='localhost' dbname='ind'")
cursor = conn.cursor()

In [5]:
cursor.execute('select E.X1, E.X2, C.* from embedded E inner join company_dets C on E.id = C.id')
SNE_vecs = cursor.fetchall()
colnames = [desc[0] for desc in cursor.description]

In [6]:
vecs = pd.DataFrame(SNE_vecs, columns=colnames)
vecs.head()

Unnamed: 0,x1,x2,id,sec_header,former_conformed_name,film_number,sec_file_number,central_index_key,sic_cd,public_document_count,...,street_2,sec_act,date_of_name_change,conformed_submission_type,company_conformed_name,state_of_incorporation,form_type,name,business_description,raw_description
0,0.096793,-4.24718,38725_10-K_2015-03-04.txt,,,15672604,000-00362,38725,3621,19,...,,1934 Act,,10-K,FRANKLIN ELECTRIC CO INC,IN,10-K,FRANKLIN ELECTRIC CO INC,item 0 . business general franklin electric co...,ITEM 1. BUSINESS General Franklin Electric ...
1,-0.840741,3.99862,1444598_10-K_2015-03-16.txt,,,15703610,001-34435,1444598,7389,19,...,SUITE 1000,1934 Act,,10-K,Emdeon Inc.,DE,10-K,EMDEON INC,item 0 . business overview we are a leading pr...,ITEM 1. BUSINESS \n Overview We are a lead...
2,9.429,5.5567,860413_10-K_2015-03-02.txt,,FIRST INTERSTATE BANCSYSTEM OF MONTANA INC,15661208,001-34653,860413,6022,13,...,401 NO 31ST STREET,1934 Act,19930615.0,10-K,FIRST INTERSTATE BANCSYSTEM INC,MT,10-K,FIRST INTERSTATE BANCSYSTEM INC,item 0 . business the disclosures set forth in...,Item 1. Business The disclosures set forth in...
3,9.04832,-6.08241,1453001_10-K_2015-04-15.txt,,,15772607,000-54997,1453001,2834,15,...,,1934 Act,,10-K,VACCINOGEN INC,MD,10-K,VACCINOGEN INC,item 0 . description of business overview vacc...,Item 1. DESCRIPTION OF BUSINESS \n\n \n\n \...
4,-4.32277,7.35713,1022469_10-K_2015-03-03.txt,,,15668624,001-12147,1022469,2421,13,...,200 EAST ELM,1934 Act,,10-K,DELTIC TIMBER CORP,DE,10-K,DELTIC TIMBER CORP,item 0 . business introduction deltic timber c...,Item 1. Business Introduction \n Deltic T...


In [7]:
theme = cmx.get_cmap('viridis') 
cNorm = mpl.colors.Normalize(vmin=0, vmax=9999)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=theme)
print scalarMap.get_clim()

(0, 9999)


In [8]:
colors = []
for s in vecs['sic_cd']:
    try:
        colorVal = scalarMap.to_rgba(int(s))
        colors.append("#%02x%02x%02x" % (colorVal[0]*255,colorVal[1]*255,colorVal[2]*255))
    except:
        colors.append("#d3d3d3")

In [9]:
source = ColumnDataSource(
        data=dict(
            x=list(vecs['x1']),
            y=list(vecs['x2']),
            desc=list(vecs['sic_cd']),
            name=list([v.title() for v in vecs['name']]),
        )
    )

hover = HoverTool(
        tooltips=[
            ("Name", "@name"),
            ("SIC", "@desc"),
        ]
    )

In [10]:
plot = figure(plot_width=800, tools=[hover, TOOLS])

In [11]:
plot.scatter('x', 'y', source=source, color=colors, alpha=.5, size=3)
plot.toolbar.logo = None
plot.axis.visible = False
plot.grid.visible = False

In [12]:
show(plot)