In [1]:
import random
import pandas
import bokeh
import bokeh.io

import numpy as np

from bokeh.plotting import figure,  show
from bokeh.models import HoverTool, ColumnDataSource

In [2]:
bokeh.io.output_notebook()

In [3]:
#read data
aoa=pandas.read_csv("data/AoA_ratings_Kuperman_et_al_BRM.csv")
semantics=pandas.read_csv("data/Parameters.csv")

In [4]:
# clean up input data
aoa.columns = map(str.lower, aoa.columns)
aoa = aoa.set_index("word")
aoa = aoa.drop("dunno",axis=1)


semantics["name"]=semantics["name"].str.lower()
semantics = semantics.drop("markup_description",axis=1)
semantics = semantics.drop("id",axis=1)
semantics = semantics.drop("pk",axis=1)
semantics = semantics.drop("jsondata",axis=1)
semantics = semantics.drop("representation",axis=1)

In [5]:
df = semantics.join(aoa, on="name")
df = df.dropna(subset=["rating.mean"])

df = df.rename(columns={
        'ontological_category': 'category', 
        'semanticfield': 'field', 
        'description': 'meaning', 
        'rating.mean': "mean", 
        'rating.sd': 'sd'})

semantic_fields=sorted(df["field"].unique())
ontological_categories=sorted(df["category"].unique())

df=df.set_index("name")

In [None]:
df.head(2)

Unnamed: 0_level_0,meaning,category,field,occurtotal,occurnum,freq_pm,mean,sd
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
contemptible,Deserving of contempt or scorn.,Property,Emotions and values,18.0,12.0,0.78,13.33,2.02
dust,Any kind of solid material divided in particle...,Person/Thing,The physical world,18.0,18.0,23.84,5.06,1.39


In [None]:
mean = df["mean"]
category = df["category"]
field = df["field"]

radii = df["mean"]/20


color_choices = ["#FFFC31", "#5C415D", "#E94F37", "#5EB1BF"]


colors = [
    color_choices[ontological_categories.index(number)] for number in category
]

TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,box_select,lasso_select"

# create a new plot with the tools above, and explicit ranges
p = figure(tools=TOOLS, x_range=(0,18), y_range=semantic_fields, plot_width=900, plot_height=700)

source = ColumnDataSource(
        data=dict(
            x=mean,
            y=field,
            name=df.index,
            radius=radii,
            colors=colors,
            legend=ontological_categories,
            age=df["mean"],
            desc=df["meaning"],
        )
    )

hover = HoverTool(
        tooltips=[
            ("Word", "@name"),
            ("Meaning", "@desc"),
            ("Age of Acquisition","@age"),
        ]
    )

p.add_tools(hover)

# add a circle renderer with vectorized colors and sizes
p.circle("x","y", radius=.1, source=source, fill_color="colors", fill_alpha=0.6, line_color=None, legend="legend")

# show the results
show(p)