In [55]:
import pandas as pd
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import ColumnDataSource

output_notebook()

In [15]:
df = pd.read_csv(filepath_or_buffer='../data/aa_filt.csv')
df['avg-std'] = df['avg'] - df['std']
df['avg+std'] = df['avg'] + df['std']

In [3]:
aminoacids = sorted(df.comp_id.unique())
atoms = sorted(df.atom_id.unique())

In [4]:
catoms = ['C', 'CA', 'CB', 'CD', 'CD1', 'CD2', 'CE', 'CE1', 'CE2',
          'CE3', 'CG', 'CG1', 'CG2', 'CH2', 'CZ', 'CZ2', 'CZ3']

hatoms = ['H', 'HA', 'HA2', 'HA3', 'HB', 'HB2', 'HB3', 'HD1', 'HD2',
          'HD21', 'HD22', 'HD3', 'HE', 'HE1', 'HE2', 'HE21', 'HE22',
          'HE3', 'HG', 'HG1', 'HG12', 'HG13', 'HG2', 'HG3', 'HH', 
          'HH11', 'HH12', 'HH2', 'HH21', 'HH22', 'HZ', 'HZ2', 'HZ3']

natoms = ['N', 'ND1', 'ND2', 'NE', 'NE1', 'NE2', 'NH1', 'NH2', 'NZ']

In [106]:
catomdf = df[df['atom_id'].isin(catoms)]
gp = catomdf.groupby(['comp_id', 'atom_id'])
cds = ColumnDataSource(data=gp)

tooltips = [
    ('current position', '$x'),
    ('amino acid, atom', '@comp_id_atom_id'),
    ('avg', '@avg_mean'),
    ('std', '@std_mean'),
    ('max', '@max_mean'),
    ('min', '@min_mean'),
]

p = figure(title = 'Chemical Shift distribution by amino acid',
           y_range = gp,
           tools = ['pan', 'wheel_zoom','reset', 'crosshair, hover'],
           plot_width=800, plot_height=1600, 
           tooltips=tooltips)

p.x_range.flipped = True

p.hbar(y='comp_id_atom_id', left='avg-std_mean',right='avg+std_mean', height=0.75,
       alpha=0.5, source=cds)
p.scatter(y='comp_id_atom_id', x='avg_mean', source=cds,
          alpha=1, size=10)
p.scatter(y='comp_id_atom_id', x='min_mean', source=cds,
          alpha=0.25, size=8, marker='inverted_triangle')
p.scatter(y='comp_id_atom_id', x='max_mean', source=cds,
          alpha=0.25, size=8, marker='triangle')

show(p)

In [87]:
from bokeh.transform import factor_cmap
from bokeh.palettes import viridis

fcmap = factor_cmap('atom_id', palette=viridis(len(catoms)), factors=catoms)

p = figure(title = 'Chemical Shift distribution by amino acid',
           y_range = aminoacids,
           tools = ['pan', 'wheel_zoom','reset', 'crosshair, hover'])

p.x_range.flipped = True

p.hbar(y='comp_id', left='avg-std',right='avg+std', height=0.75,
       alpha=0.5, fill_color=fcmap, source=catomdf)
show(p)