In [169]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook
# from bokeh.charts import Scatter, output_file, show
from bkcharts import Scatter, output_file, show
from bokeh.palettes import *
from bokeh.layouts import gridplot
from bokeh.models import HoverTool
np.__version__

'1.13.3'

### Overview

- [Match Contacts Function](#Match-Contacts-Function)
- [Output Ratio Function](#Output-Ratio-Function)
- [Plot Ratio Function](#Plot-Ratio-Function)
- [Main Function](#Main-Function)

### Load files into DataFrames

In [163]:
pdb_file_1 = "/home/kmm5/actin/sbm/pdb_maps_12A/cutoff/all_pdb_monomer_12A.contacts"
pdb_file_2 = "/home/kmm5/actin/sbm/pdb_maps_12A/cutoff/all_pdb_interface_12A.contacts"
# dca_file_1 = "/home/kmm5/actin/plmDCA/pfam/pfam30/act_pfam_f10_mapped.contacts"
# dca_file_2 = "/home/kmm5/actin/plmDCA/pfam/pfam30/act_pfam_f20_mapped.contacts"
# dca_file_3 = "/home/kmm5/actin/plmDCA/pfam/pfam30/act_pfam_f50_mapped.contacts"
dca_file_1 = "/home/kmm5/actin/plmDCA/pfam/pfam31/mapped_act_pfam31_f10.contacts"
dca_file_2 = "/home/kmm5/actin/plmDCA/pfam/pfam31/mapped_act_pfam31_f20.contacts"
dca_file_3 = "/home/kmm5/actin/plmDCA/pfam/pfam31/mapped_act_pfam31_f30.contacts"
dca_file_4 = "/home/kmm5/actin/plmDCA/pfam/pfam31/mapped_act_pfam31_f40.contacts"
dca_file_5 = "/home/kmm5/actin/plmDCA/pfam/pfam31/mapped_act_pfam31_f50.contacts"
dca_file_6 = "/home/kmm5/actin/plmDCA/cytoplasm/from_actin-actin_subseq/cyt_oda_mapped.contacts"
indices = ['residue_i', 'residue_j']
monomer_pdb = pd.read_table(pdb_file_1, delim_whitespace=True, names=indices)
interface_pdb = pd.read_table(pdb_file_2, delim_whitespace=True, names=indices)

dca_pairs_f10 = pd.read_table(dca_file_1, delim_whitespace=True, names=indices)
dca_pairs_f20 = pd.read_table(dca_file_2, delim_whitespace=True, names=indices)
dca_pairs_f30 = pd.read_table(dca_file_3, delim_whitespace=True, names=indices)
dca_pairs_f40 = pd.read_table(dca_file_4, delim_whitespace=True, names=indices)
dca_pairs_f50 = pd.read_table(dca_file_5, delim_whitespace=True, names=indices)
dca_pairs_actx2 = pd.read_table(dca_file_6, delim_whitespace=True, names=indices)
dca_dataframes = [dca_pairs_f10, dca_pairs_f20, dca_pairs_f30, dca_pairs_f40, 
                  dca_pairs_f50, dca_pairs_actx2]

In [164]:
total_mon_pairs = len(monomer_pdb)
total_inter_pairs = len(interface_pdb)
total_dca_pairs = len(dca_dataframes[0])
print("Total Monomer pairs: %d" % total_mon_pairs)
print("Total Inter pairs  : %d" % total_inter_pairs)
print("Total DCA pairs    : %d" % total_dca_pairs)

Total Monomer pairs: 15965
Total Inter pairs  : 3141
Total DCA pairs    : 65014


### Match Contacts Function

[Back to Overview](#Overview)

In [160]:
def matchContacts(dca_pairs, monomer_pdb, interface_pdb, num_top_dca):
    """
    Given a DataFrame of residue pairs, this 
    function computes the intersection between
    given monomer and interfacial pairs (which
    are also DataFrames). A ratio of the number
    of interface to monomer intersections is
    returned.
    """
    top_dca_pairs = dca_pairs[:num_top_dca]
    monomer_match = pd.merge(top_dca_pairs, monomer_pdb, 
                             how='inner', on=indices)
    num_monomer_match = len(monomer_match)
    interface_match = pd.merge(top_dca_pairs, interface_pdb, 
                               how='inner', on=indices)
    num_interface_match = len(interface_match)
    # avoids dividing by zero
    z_factor = 0.0001
    ratio = float(num_interface_match) / float(num_monomer_match + z_factor)

#     print("Monomer matches: %d" % num_monomer_match)
#     print("Interface matches: %d" % num_interface_match)
#     print("IM Ratio = %.3f" % ratio)
    return ratio

In [161]:
matchContacts(dca_dataframes[0], monomer_pdb, interface_pdb, 1)

0.0

### Output Ratio Function

[Back to Overview](#Overview)

In [165]:
def outputRatio(dca_dataframes, monomer_pdb, interface_pdb, num_top_dca):
    """ 
    Given a list of DataFrames, this function calls
    the matchContacts() function to compute ratios and
    returns an array of ratios of shape(len(df),N).
    """
    num_df = len(dca_dataframes)
    ratio = np.zeros((num_df, num_top_dca))
    for df in range(num_df):
        for i in range(num_top_dca):
            if (i == 0):
                ratio[df][0] = 0.0
            else:    
                ratio[df][i] = matchContacts(
                    dca_dataframes[df], monomer_pdb, interface_pdb, i
                )
    return ratio

### Plot Ratio Function

[Back to Overview](#Overview)

In [174]:
def plotRatio(ratio_array, x_range):
    """
    Plots the ratios given an array of
    len(ratio_array) and an x-axis range.
    """
    num_files = len(ratio_array)
    color_list = Set1_9
    
    hover = HoverTool(tooltips=[('Top Pairs, IMatch Ratio', '$x{0}, $y{0.00}')])
    TOOLS = "crosshair, pan, wheel_zoom, reset, save, box_select"
    p = figure(width=800, plot_height=400, 
              tools=[TOOLS, hover], toolbar_location="above",
              x_axis_label='Top DCA pairs', 
              y_axis_label='IMatch Ratio (Inter:Mon)')
    
    for i in range(num_files):   
        p.line(range(x_range), ratio_array[i], color=color_list[i],
               legend=('f%d'%(10+10*i)), line_width=1.25)
    p.background_fill_color='beige'
    p.xgrid.grid_line_color = 'navy'
    p.xgrid.grid_line_dash = 'dashed'
    p.xgrid.grid_line_alpha = 0.1
    p.ygrid.band_fill_alpha = 0.1
    p.ygrid.band_fill_color = "navy"
    p.legend.location='bottom_right'
    p.legend.orientation='horizontal'
    p.legend.click_policy='hide'
    output_notebook()
    show(p)

### Main Function

[Back to Overview](#Overview)

In [None]:
N = 4000
r = outputRatio(dca_dataframes, monomer_pdb, interface_pdb, N)

In [175]:
plotRatio(r, N)

[Back to Overview](#Overview)