Python for Bioinformatics
-----------------------------

![title](https://s3.amazonaws.com/py4bio/tapabiosmall.png)

This Jupyter notebook is intented to be used alongside the book [Python for Bioinformatics](http://py3.us/)



**Note:** Before opening the file, this file should be accesible from this Jupyter notebook. In order to do so, the following commands will download these files from Github and extract them into a directory called samples.

Chapter 14: Graphics in Python
-----------------------------

**USING BOKEH**

In [None]:
!curl https://raw.githubusercontent.com/Serulab/Py4Bio/master/samples/samples.tar.bz2 -o samples.tar.bz2
!mkdir samples
!tar xvfj samples.tar.bz2 -C samples

In [None]:
import bokeh

In [None]:
import IPython

**Listing 14.1:** basiccircle.py: A circle made with Bokeh

In [None]:
from bokeh.plotting import figure, output_file, show

p = figure(width=400, height=400)
p.circle(2, 3, radius=.5, alpha=0.5)
output_file("out.html")
show(p)

In [None]:
IPython.display.HTML(filename='out.html')

**Listing 14.2:** fourcircles.py: 4 circles made with Bokeh

In [None]:
from bokeh.plotting import figure, output_file, show

p = figure(width=500, height=500)
x = [1, 1, 2, 2]
y = [1, 2, 1, 2]
p.circle(x, y, radius=.35, alpha=0.5, color='red')
output_file("out.html")
show(p)

**Listing 14.3:** plot1.py: A minimal plot

In [None]:
from bokeh.plotting import figure, output_file, show

x = [1, 2, 3, 4, 5, 6, 7, 8]
y = [.7, 1.4, 2.1, 3, 3.85, 4.55, 5.8, 6.45]

p = figure(title='Mean wt increased vs. time',
           x_axis_label='Time in days',
           y_axis_label='% Mean WT increased')
p.circle(x, y, legend='Subject 1', size=10)
output_file('test.html')
show(p)

**Listing 14.4:** plot2.py: Two data series plot

In [None]:

from bokeh.plotting import figure, output_file, show

x = [1, 2, 3, 4, 5, 6, 7, 8]
y = [.7, 1.4, 2.1, 3, 3.85, 4.55, 5.8, 6.45]
z = [.5, 1.1, 1.9, 2.5, 3.1, 3.9, 4.85, 5.2]

p = figure(title='Mean wt increased vs. time',
           x_axis_label='Time in days',
           y_axis_label='% Mean WT increased')
p.circle(x, y, legend='Subject 1', size=10)
p.circle(x, z, legend='Subject 2', size=10, line_color='red',
         fill_color='white')
p.legend.location = 'top_left'
output_file('test.html')
show(p)

**Listing 14.5:** fishpc.py: Scatter plot

In [None]:
from bokeh.charts import Scatter, output_file, show
from pandas import DataFrame

df = DataFrame.from_csv('samples/fishdata.csv')

scatter = Scatter(df, x='PC1', y='PC2', color='feeds',
        marker='species', title=
        'Metabolic variations based on 1H NMR profiling of fishes',
        xlabel='Principal Component 1: 35.8%',
        ylabel='Principal Component 2: 15.1%')
scatter.legend.background_fill_alpha = 0.3
output_file('scatter.html')
show(scatter)

**Listing 14.6:** heatmap.py: Plot a gene expression file

In [None]:
from bokeh.charts import HeatMap, bins, output_file, show
import pandas as pd

DATA_FILE = 'samples/GSM188012.CEL'
dtype = {'x': int, 'y': int, 'lux': float}
dataset = pd.read_csv(DATA_FILE, sep='\t', dtype=dtype)
hm = HeatMap(dataset, x=bins('x'), y=bins('y'), values='lux',
             title='Expression', stat='mean')
output_file("heatmap7.html", title="heatmap.py example")
show(hm)

**Listing 14.7:** chord.py: A Chord diagram

In [None]:
from bokeh.charts import output_file, Chord
from bokeh.io import show
import pandas as pd
data = pd.read_csv('samples/test3.csv')
chord_from_df = Chord(data, source='name_x', target='name_y',
                      value='value')
output_file('chord.html')
show(chord_from_df)

In [None]:
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.sampledata.commits import data
from bokeh.transform import jitter

output_file("bars.html")

DAYS = ['Sun', 'Sat', 'Fri', 'Thu', 'Wed', 'Tue', 'Mon']

source = ColumnDataSource(data)

p = figure(plot_width=800, plot_height=300, y_range=DAYS, x_axis_type='datetime',
           title="Commits by Time of Day (US/Central) 2012—2016")

p.circle(x='time', y=jitter('day', width=0.6, range=p.y_range),  source=source, alpha=0.3)

p.xaxis[0].formatter.days = ['%Hh']
p.x_range.range_padding = 0
p.ygrid.grid_line_color = None

show(p)

In [None]:
#from bokeh.charts import 
from bokeh.models import ColumnDataSource, Label, LabelSet, Range1d
from bokeh.io import output_file, show
from bokeh.plotting import figure
from pandas import DataFrame
import pandas

df = pandas.read_csv('samples/fishdata.csv')
source = ColumnDataSource(df)

# p = figure( y_range=DAYS, x_axis_type='datetime',
#           title="Commits by Time of Day (US/Central) 2012—2016")
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"

#p = figure(tools=TOOLS)

p = figure(title = "Iris Morphology",tools=TOOLS)
x=df['PC1']
y=df['PC2']
#radii=df['index']

# p.scatter(x, y,
#            fill_alpha=0.6, marker=markers,
#           line_color=None)
p.scatter("PC1", "PC2",color="feeds",marker='feeds',
           #marker="species",color='feeds',
           legend_label='feeds',
           source=df)
#p.xaxis[0].axis_label = 'Weight (lbs)'
#p.yaxis[0].axis_label = 'Height (in)'
#p.legend.location = "top_left"

labels = LabelSet(x='PC1',y='PC2', text='feeds', level='glyph',
               x_offset=5, y_offset=5,
               source=source, render_mode='canvas')

p.add_layout(labels)

#p.scatter("PC1", "PC2",  source=df)

#p.scatter(source)

# scatter = Scatter( color='feeds',
#         marker='species', title=
#         'Metabolic variations based on 1H NMR profiling of fishes',
#         xlabel='Principal Component 1: 35.8%',
#         ylabel='Principal Component 2: 15.1%')
#scatter.legend.background_fill_alpha = 0.3
output_file('scatter.html')
show(p)

In [None]:
import numpy as np

from bokeh.plotting import figure, output_file, show

N = 4000
x = np.random.random(size=N) * 100
y = np.random.random(size=N) * 100
print(x)
radii = np.random.random(size=N) * 1.5
colors = [
    "#%02x%02x%02x" % (int(r), int(g), 150) for r, g in zip(50+2*x, 30+2*y)
]

TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"

p = figure(tools=TOOLS)

p.scatter(x, y, radius=radii,
          fill_color=colors, fill_alpha=0.6,
          line_color=None)

output_file("color_scatter.html", title="color_scatter.py example")

show(p)  # open a browser


In [None]:
from bokeh.plotting import figure, output_file, show
from bokeh.sampledata.iris import flowers
from bokeh.transform import factor_cmap, factor_mark

SPECIES = ['setosa', 'versicolor', 'virginica']
MARKERS = ['hex', 'circle_x', 'triangle']

p = figure(title = "Iris Morphology")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Sepal Width'

p.scatter("petal_length", "sepal_width", source=flowers, legend_field="species", fill_alpha=0.4, size=12,
          marker=factor_mark('species', MARKERS, SPECIES),
          color=factor_cmap('species', 'Category10_3', SPECIES))
output_file("scatter_factor.html", title="color_scatter.py example")
show(p)

In [None]:
from bokeh.io import output_notebook
output_notebook()

In [None]:
# scatter plot with color and legend
from bokeh.models import ColumnDataSource, Label, LabelSet, Range1d
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.transform import factor_cmap, factor_mark
from pandas import DataFrame
import pandas

df = pandas.read_csv('samples/fishdata.csv')
source = ColumnDataSource(df)
MARKERS = ['hex', 'circle_x', 'triangle','square']

# p = figure( y_range=DAYS, x_axis_type='datetime',
#           title="Commits by Time of Day (US/Central) 2012—2016")
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"

#p = figure(tools=TOOLS)

p = figure(title = "Iris Morphology",tools=TOOLS)
x=df['PC1']
y=df['PC2']
feeds=set(df['feeds'])
print(feeds)
print(set(df['species']))
#radii=df['index']

# p.scatter(x, y,
#            fill_alpha=0.6, marker=markers,
#           line_color=None)
p.scatter("PC1", "PC2",legend_field="species",
           #marker="species",color='feeds',
           marker=factor_mark('species', MARKERS, list(set(df['species']))),
           color=factor_cmap('feeds', 'Category10_4', list(feeds)),
           source=df)
#p.xaxis[0].axis_label = 'Weight (lbs)'
#p.yaxis[0].axis_label = 'Height (in)'
p.legend.location = "top_left"

# labels = LabelSet(x='PC1',y='PC2', text='feeds', level='glyph',
#                x_offset=5, y_offset=5,
#                source=source, render_mode='canvas')

# p.add_layout(labels)

#p.scatter("PC1", "PC2",  source=df)

#p.scatter(source)

# scatter = Scatter( color='feeds',
#         marker='species', title=
#         'Metabolic variations based on 1H NMR profiling of fishes',
#         xlabel='Principal Component 1: 35.8%',
#         ylabel='Principal Component 2: 15.1%')
#scatter.legend.background_fill_alpha = 0.3
output_file('scatter.html')
show(p)

In [None]:
# scatter plot with color and legend
from bokeh.models import ColumnDataSource, Label, LabelSet, Range1d
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.transform import factor_cmap, factor_mark
from pandas import DataFrame
import pandas

df = pandas.read_csv('samples/fishdata.csv')
 
MARKERS = ['hex', 'circle_x', 'triangle','square']
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"
p = figure(title = 'Metabolic variations based on 1H NMR profiling of fishes',tools=TOOLS)
p.xaxis.axis_label = 'Principal Component 1: 35.8%'
p.yaxis.axis_label = 'Principal Component 2: 15.1%'
p.scatter("PC1", "PC2",legend_field="species",
           marker=factor_mark('species', MARKERS, list(set(df['species']))),
           color=factor_cmap('feeds', 'Category10_4', list(set(df['feeds']))),
           source=df)
p.legend.location = "top_left"
p.legend.background_fill_alpha = 0.3

output_file('scatter.html')
show(p)

In [None]:
# heat map with color and legend
from bokeh.models import ColumnDataSource, Label, LabelSet, Range1d
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.transform import factor_cmap, factor_mark
from pandas import DataFrame
import pandas as pd
from bokeh.io import output_file, show
from bokeh.models import (BasicTicker, ColorBar, ColumnDataSource,
                          LinearColorMapper, PrintfTickFormatter,)
from bokeh.plotting import figure
from bokeh.sampledata.unemployment1948 import data
from bokeh.transform import transform

DATA_FILE = 'samples/GSM188012.CEL'
dtype = {'x': int, 'y': int, 'lux': float}
dataset = pd.read_csv(DATA_FILE, sep='\t', dtype=dtype)
source = ColumnDataSource(dataset)
colors = ["#f0f3fe", "#cadbed", "#a6c9df", "#79add2", "#4682b8","#205297"]
#mapper = LinearColorMapper(palette=colors, low=dataset.lux.min(), high=dataset.lux.max())
mapper = LinearColorMapper(palette=colors, low=161, high=2452.2)

print(dataset.lux.min())
print(dataset.lux.max())
p = figure( title='Expression')

p.rect(x="x", y="y", width=1, height=1, source=source,
       line_color=None, fill_color=transform('lux', mapper))
output_file("heatmap7.html", title="heatmap.py example")
show(p)
#hm = HeatMap(dataset, x=bins('x'), y=bins('y'), values='lux',
#             title='Expression', stat='mean')
#output_file("heatmap7.html", title="heatmap.py example")
#show(hm)

In [None]:
import pandas as pd

from bokeh.io import output_file, show
from bokeh.models import (BasicTicker, ColorBar, ColumnDataSource,
                          LinearColorMapper, PrintfTickFormatter,)
from bokeh.plotting import figure
from bokeh.sampledata.unemployment1948 import data
from bokeh.transform import transform

output_file("unemploymemt.html")

data.Year = data.Year.astype(str)
data = data.set_index('Year')
data.drop('Annual', axis=1, inplace=True)
data.columns.name = 'Month'

# reshape to 1D array or rates with a month and year for each row.
df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()

source = ColumnDataSource(df)

# this is the colormap from the original NYTimes plot
colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
mapper = LinearColorMapper(palette=colors, low=df.rate.min(), high=df.rate.max())

p = figure(plot_width=800, plot_height=300, title="US Unemployment 1948—2016",
           x_range=list(data.index), y_range=list(reversed(data.columns)),
           toolbar_location=None, tools="", x_axis_location="above")

p.rect(x="Year", y="Month", width=1, height=1, source=source,
       line_color=None, fill_color=transform('rate', mapper))

color_bar = ColorBar(color_mapper=mapper, location=(0, 0),
                     ticker=BasicTicker(desired_num_ticks=len(colors)),
                     formatter=PrintfTickFormatter(format="%d%%"))

p.add_layout(color_bar, 'right')

p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "7px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = 1.0

show(p)

In [None]:
import bokeh
bokeh.sampledata.download()

In [None]:
import holoviews as hv
from holoviews import opts, dim
from bokeh.sampledata.airport_routes import routes, airports

hv.extension('bokeh')
# Count the routes between Airports
route_counts = routes.groupby(['SourceID', 'DestinationID']).Stops.count().reset_index()
nodes = hv.Dataset(airports, 'AirportID', 'City')
chord = hv.Chord((route_counts, nodes), ['SourceID', 'DestinationID'], ['Stops'])

# Select the 20 busiest airports
busiest = list(routes.groupby('SourceID').count().sort_values('Stops').iloc[-20:].index.values)
busiest_airports = chord.select(AirportID=busiest, selection_mode='nodes')
busiest_airports.opts(
    opts.Chord(cmap='Category20', edge_color=dim('SourceID').str(), 
               height=800, labels='City', node_color=dim('AirportID').str(), width=800))

In [None]:
import holoviews as hv
from holoviews import opts, dim
from bokeh.sampledata.airport_routes import routes, airports
from bokeh.io import show, output_file
import pandas as pd
from bokeh.models import (BasicTicker, ColorBar, ColumnDataSource,
                          LinearColorMapper, PrintfTickFormatter,)
data = pd.read_csv('samples/test3.csv')
print(data.head(3))
#source = ColumnDataSource(data)
#chord_from_df = Chord(data, source='name_x', target='name_y',
#                      value='value')
#output_file('chord.html')
#show(chord_from_df)
hv.extension('bokeh')
# Count the routes between Airports
#route_counts = routes.groupby(['SourceID', 'DestinationID']).Stops.count().reset_index()
#nodes = hv.Dataset(airports, 'AirportID', 'City')
#chord = hv.Chord((route_counts, nodes), ['SourceID', 'DestinationID'], ['Stops'])
#print(nodes)
#print(route_counts)
#print(source)
#print(data)


# Select the 20 busiest airports
#busiest = list(routes.groupby('SourceID').count().sort_values('Stops').iloc[-20:].index.values)
#busiest_airports = chord.select(AirportID=busiest, selection_mode='nodes')
#busiest_airports.opts(
#    opts.Chord(cmap='Category20', edge_color=dim('SourceID').str(), 
#               height=800, labels='City', node_color=dim('AirportID').str(), width=800))

links = pd.DataFrame(data)
nodes = hv.Dataset(pd.DataFrame(data['name_y']), 'index')
print(nodes.data.head())
#
chord = hv.Chord(links)
#chord = hv.Chord((links, nodes)).select(value=(5, None))
chord.opts(
    opts.Chord(cmap='Category20',edge_color=dim('name_x').str(),labels='name_x',
    width=800,height=800))