In [1]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
# from bokeh.charts import Scatter, output_file, show
from bkcharts import Scatter, output_file, show
from bokeh.palettes import Spectral4
from bokeh.layouts import gridplot
from bokeh.models import HoverTool
np.__version__

'1.13.2'

### Overview

- [Match Contacts Function](#Match-Contacts-Function)
- [Output Ratio Function](#Output-Ratio-Function)
- [Plot Ratio Function](#Plot-Ratio-Function)
- [Main Function](#Main-Function)

### Load files into DataFrames

In [2]:
pdb_file_1 = "sbm/pdb_maps_12A/cutoff/all_pdb_monomer_12A.contacts"
pdb_file_2 = "sbm/pdb_maps_12A/cutoff/all_pdb_interface_12A.contacts"
dca_file_1 = "plmDCA/pfam/act_pfam_f10_mapped.contacts"
dca_file_2 = "plmDCA/pfam/act_pfam_f20_mapped.contacts"
dca_file_3 = "plmDCA/pfam/act_pfam_f50_mapped.contacts"

indices = ['residue_i', 'residue_j']
monomer_pdb = pd.read_table(pdb_file_1, delim_whitespace=True, names=indices)
interface_pdb = pd.read_table(pdb_file_2, delim_whitespace=True, names=indices)

dca_pairs_f10 = pd.read_table(dca_file_1, delim_whitespace=True, names=indices)
dca_pairs_f20 = pd.read_table(dca_file_2, delim_whitespace=True, names=indices)
dca_pairs_f50 = pd.read_table(dca_file_3, delim_whitespace=True, names=indices)
dca_dataframes = [dca_pairs_f10, dca_pairs_f20, dca_pairs_f50]

In [3]:
total_mon_pairs = len(monomer_pdb)
total_inter_pairs = len(interface_pdb)
total_dca_pairs = len(dca_dataframes[0])
print("Total Monomer pairs: %d" % total_mon_pairs)
print("Total Inter pairs  : %d" % total_inter_pairs)
print("Total DCA pairs    : %d" % total_dca_pairs)

Total Monomer pairs: 15965
Total Inter pairs  : 3141
Total DCA pairs    : 65014


### Match Contacts Function

[Back to Overview](#Overview)

In [4]:
def matchContacts(dca_pairs, monomer_pdb, interface_pdb, N):
    """
    Given a DataFrame of residue pairs, this 
    function computes the intersection between
    given monomer and interfacial pairs (which
    are also DataFrames). A ratio of the number
    of interface to monomer intersections is
    returned.
    """
    num_top_dca = N
    top_dca_pairs = dca_pairs[:num_top_dca]
    monomer_match = pd.merge(top_dca_pairs, monomer_pdb, 
                             how='inner', on=indices)
    num_monomer_match = len(monomer_match)
    interface_match = pd.merge(top_dca_pairs, interface_pdb, 
                               how='inner', on=indices)
    num_interface_match = len(interface_match)

    ratio = float(num_interface_match) / float(num_monomer_match)

#     print("Monomer matches: %d" % num_monomer_match)
#     print("Interface matches: %d" % num_interface_match)
#     print("IM Ratio = %.3f" % ratio)
    return ratio

### Output Ratio Function

[Back to Overview](#Overview)

In [8]:
def outputRatio(dca_dataframes, monomer_pdb, interface_pdb, N):
    """ 
    Given a list of DataFrames, this function calls
    the matchRatio() function to compute ratios and
    returns an array of ratios of shape(len(df),N).
    """
    num_df = len(dca_dataframes)
    ratio = np.zeros((num_df, N))
    for df in xrange(num_df):
        for i in xrange(N):
            if (i == 0):
                ratio[df][0] = 0.0
            else:    
                ratio[df][i] = matchContacts(
                    dca_dataframes[df], monomer_pdb, interface_pdb, i
                )
    return ratio

### Plot Ratio Function

[Back to Overview](#Overview)

In [9]:
def plotRatio(ratio_array, x_range):
    """
    Plots the ratios given an array of
    len(ratio_array) and an x-axis range.
    """
    num_files = len(ratio_array)
    color_list = ['teal', 'coral', 'green']
    
    hover = HoverTool(tooltips=[('Top Pairs, IMatch Ratio', '$x{0}, $y{0.00}')])
    TOOLS = "crosshair, pan, wheel_zoom, reset, save, box_select"
    p = figure(width=800, plot_height=400, 
              tools=[TOOLS, hover], toolbar_location="above",
              x_axis_label='Top DCA pairs', 
              y_axis_label='IMatch Ratio (Inter:Mon)')
    
    for i in xrange(num_files):   
        p.line(xrange(x_range), ratio_array[i], color=color_list[i],
               legend=('f%d'%i), line_width=1.25)
    p.background_fill_color='beige'
    p.legend.location='top_left'
    p.xgrid.grid_line_color = 'navy'
    p.xgrid.grid_line_dash = 'dashed'
    p.xgrid.grid_line_alpha = 0.1
    p.ygrid.band_fill_alpha = 0.1
    p.ygrid.band_fill_color = "navy"
    p.legend.orientation='horizontal'
    p.legend.click_policy='hide'
    output_notebook()
    show(p)

### Main Function

[Back to Overview](#Overview)

In [10]:
N = 4000
r = outputRatio(dca_dataframes, monomer_pdb, interface_pdb, N)


In [11]:
plotRatio(r, N)

[Back to Overview](#Overview)