In [1]:
#Interactive Proteome Volcano Plot comparing 2 treatments (low vs. High Zn)
#Updated 1/8/21 by MK

In [2]:
#Import all needed packages
import pandas as pd
import bokeh
from bokeh.io import push_notebook, show, output_notebook, export_png, export_svgs
from bokeh.layouts import row 
from bokeh.plotting import figure, show, save
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource, Span
from bokeh.models.tools import HoverTool
from bokeh.resources import CDN, INLINE
from bokeh.embed import file_html

In [3]:
#Create the dataframe (df) by importing data from excel sheet
df = pd.read_excel (r'C:\Users\kello\Desktop\MakVolcanoPlotExample\ChProteinsRedo.xlsx', sheet_name='ValuesSorted',engine='openpyxl')
#Show the top of the dataframe
df.head()

Unnamed: 0,Accession,Identified Proteins (1064),Pfam_Description,KOG_Description,Molecular Weight,0 Co Av,1 Co Av,3 Co Av,10 Co Av,30 Co Av,...,P_100Zn_10Co_B,Unnamed: 59,Pval_3Co_vs_100Co,PvalCo,LFC_3Co_vs_100Co,Unnamed: 63,Pval_3Zn_vs_30Zn,PvalZn,LFC_3Zn_vs_30Zn,Index
0,contig_5003_174_937_+,contig_5003_174_937_+,Aminotransferase class I and II,Alanine aminotransferase,28 kDa,0.0,0.0,0.1,0.0,0.63587,...,0.0,,0.14747,0.831297,-5.738646,,0.34728,0.459321,0.885098,748.0
1,contig_9864_1_509_+,contig_9864_1_509_+,PF13451 Zinc knuckle,0,17 kDa,1.157585,1.05644,0.1,1.09071,1.2717,...,3.727,,0.42265,0.374019,-4.356918,,3.4e-05,4.471345,-4.673415,20.0
2,contig_33956_88_2413_+,contig_33956_88_2413_+,PF13637,0,86 kDa,0.383445,0.70338,0.1,0.0,0.1,...,0.0,,0.42265,0.374019,-3.617357,,0.42265,0.374019,-2.096481,820.0
3,contig_12198_241_557_-,contig_12198_241_557_-,unknown protein,0,12 kDa,1.93895,1.7598,0.65295,0.37976,1.2717,...,0.0,,0.057339,1.241552,-3.342473,,1.0,0.0,1.0,1029.0
4,contig_4457_11_1162_-,contig_4457_11_1162_-,PF13847,0,43 kDa,2.70585,1.05644,0.350125,1.47045,0.63587,...,0.7454,,0.124418,0.905118,-2.934594,,0.344707,0.46255,-0.573978,746.0


In [4]:
#Functions section
#A function that will allow us to add, color, and outline groups of differentially expressed proteins
def add_points(p, df, x, y, color, alpha=0.2, outline=False): #alpha  is opacity
    
    source1 = bokeh.models.ColumnDataSource(df)

    # Specify data source
    p.circle(x=x, y=y, size=7,
             alpha=alpha, source=source1,
             color=color, name='circles')
    
    #If you set outline=True
    if outline:
        p.circle(x=x, y=y, size=7,
                 alpha=1,
                 source=source1, color='black',
                 fill_color=None, name='outlines')

    # prettify
    #p.background_fill_color = "#DFDFE5"
    #p.background_fill_alpha = 0.5
    
    return p

#A function to separate significantly upregulated and downregulated proteins
def selector(df):
    sig_up = (df.PvalZn > 1.3) & (df.LFC_3Zn_vs_30Zn > 2)
    sig_down = (df.PvalZn > 1.3) & (df.LFC_3Zn_vs_30Zn < -2)
    ups = df[sig_up]
    downs =df[sig_down]
    return ups, downs
    
def selectorCo(df):
    #Signficant differences for Co
    sig_upCo = (df.PvalCo > 1.3) & (df.LFC_3Co_vs_100Co > 2)
    sig_downCo = (df.PvalCo > 1.3) & (df.LFC_3Co_vs_100Co < (-2))
    upsCo = df[sig_upCo]
    downsCo =df[sig_downCo]
    
    return upsCo,downsCo

In [7]:
#Create the figure comparing 3nM Zn vs 30nM Zn proteins (p)
p = bokeh.plotting.figure(x_range=[-8,7], y_range=[0,5.5], title = "Chaetoceros RS19 Proteins: 3nM Zn vs. 30nM Zn")
p.xaxis.axis_label = 'Log2 (Fold Change (3nM Zn / 30nM Zn))'
p.yaxis.axis_label = '-log10(p-value)'
#p.legend.location = "top_left"

# Add the hover tool
hover = HoverTool()
p.add_tools(hover)
hover.tooltips=[
    ('Protein', '@Pfam_Description'),
    ('Contig_ID', '@Accession')]

#Add the p value =0.05 cutoff line, note -log10(0.05) = 1.3
p.line( x=[-8,7],y=[1.3,1.3],
       color='grey', line_dash='dashed', line_width=1,)

#Add the Log Fold Change <2 or >2 cutoff lines
p.line( x=[-2,-2],y=[0,5.5],
       color='grey', line_dash='dashed', line_width=1)
p.line( x=[2,2],y=[0,5.5],
       color='grey', line_dash='dashed', line_width=1)

#Plot the data 
p.circle('LFC_3Zn_vs_30Zn','PvalZn',source=df,fill_alpha=0.1,size=10,color='grey')

#Need to call Selector here
ups,downs = selector(df)

p = add_points(p, ups, 'LFC_3Zn_vs_30Zn','PvalZn', color='green', alpha=0.6, outline=True)
p = add_points(p, downs, 'LFC_3Zn_vs_30Zn','PvalZn', color='red', alpha=0.6, outline=True)

#Keeps the created plot in-line in notebook
#bokeh.io.reset_output()
bokeh.io.output_notebook(INLINE)
show(p)

In [6]:
#Create the figure comparing 3nM Co vs 30nM Co proteins (p2)
p2 = bokeh.plotting.figure(x_range=[-8,7], y_range=[0,5.5], title = "Chaetoceros RS19 Proteins: 3nM Co vs. 100nM Co")
p2.xaxis.axis_label = 'Log2 (Fold Change (3nM Co / 100nM Co))'
p2.yaxis.axis_label = '-log10(p-value)'
#p.legend.location = "top_left"

# Add the hover tool
hover = HoverTool()
p2.add_tools(hover)
hover.tooltips=[
    ('Protein', '@Pfam_Description'),
    ('Contig_ID', '@Accession')]

#Add the p value =0.05 cutoff line, note -log10(0.05) = 1.3
p2.line( x=[-8,7],y=[1.3,1.3],
       color='gray', line_dash='dashed', line_width=1,)

#Add the Log Fold Change <2 or >2 cutoff lines
p2.line( x=[-2,-2],y=[0,5.5],
       color='gray', line_dash='dashed', line_width=1)
p2.line( x=[2,2],y=[0,5.5],
       color='gray', line_dash='dashed', line_width=1)

#Plot the data 
p2.circle('LFC_3Co_vs_100Co','PvalCo',source=df,fill_alpha=0.1,size=10,color='grey')

#Need to call Selector here
upsCo,downsCo = selectorCo(df)

p2 = add_points(p2, upsCo, 'LFC_3Co_vs_100Co','PvalCo', color='green', alpha=0.6, outline=True)
p2 = add_points(p2, downsCo, 'LFC_3Co_vs_100Co','PvalCo', color='red', alpha=0.6, outline=True)

#Keeps the created plot in-line in notebook
#bokeh.io.reset_output()
bokeh.io.output_notebook(INLINE)
show(p2)