# Evaluate PIPE-Sites

## Import PIPE-Sites output

In [1]:
from __future__ import division

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

In [2]:
PIPE_columns = ['protein_a', 'protein_b', 'average_score', 'max_score', 'compute_time', 'weighted_score',
                'weighted_score_old', 'site1_height', 'site1a_start', 'site1a_end', 'site1b_start', 'site1b_end',
                'site2_height', 'site2a_start', 'site2a_end', 'site2b_start', 'site2b_end', 'site3_height',
                'site3a_start', 'site3a_end', 'site3b_start', 'site3b_end']

In [3]:
len(PIPE_columns)

22

In [4]:
PIPESites_df = pd.read_csv('data/PIPE_output/output/yeast-yeast.out',
                           index_col=None,
                           header=None,
                           names=PIPE_columns,
                           sep='\t')
PIPESites_df

Unnamed: 0,protein_a,protein_b,average_score,max_score,compute_time,weighted_score,weighted_score_old,site1_height,site1a_start,site1a_end,...,site2_height,site2a_start,site2a_end,site2b_start,site2b_end,site3_height,site3a_start,site3a_end,site3b_start,site3b_end
0,Q99394,Q03630,0.999887,14639,0.899638,139.022723,139.022723,25,200,225,...,23,167,192,140,160,19,147,172,140,160
1,Q99380,P39007,0.999683,14639,0.126009,99.790238,99.790238,47,17,37,...,43,17,37,641,666,19,17,37,666,693
2,Q99394,P32613,0.999881,14639,1.010490,94.885224,94.885224,32,249,269,...,25,200,225,133,153,23,167,192,133,153
3,Q99344,Q03919,0.999759,14639,0.230951,63.894927,63.894927,88,280,300,...,80,280,300,0,28,72,23,48,58,78
4,Q99288,Q99288,0.999960,14639,0.387843,196.945361,196.945361,45,200,224,...,31,248,273,315,335,28,315,335,118,144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1601,O94742,Q12250,0.999868,14639,0.639819,73.632483,73.632483,86,70,90,...,62,70,90,69,99,59,23,52,426,446
1602,O94742,Q04062,0.999850,14639,0.261285,73.465315,73.465315,90,70,90,...,59,23,52,374,394,46,70,90,53,80
1603,O94742,P46674,0.999956,14639,0.579728,86.389229,86.389229,248,70,90,...,219,70,90,699,725,113,70,90,139,174
1604,O74700,O74700,0.999160,14639,1.112819,159.153959,159.153959,58,0,27,...,12,68,88,23,48,11,33,62,68,88


In [5]:
PIPESites_df = PIPESites_df[['protein_a', 'protein_b', 'site1a_start', 'site1a_end', 'site1b_start', 'site1b_end', 'site2a_start', 'site2a_end', 'site2b_start', 'site2b_end', 'site3a_start', 'site3a_end', 'site3b_start', 'site3b_end']]
PIPESites_df

Unnamed: 0,protein_a,protein_b,site1a_start,site1a_end,site1b_start,site1b_end,site2a_start,site2a_end,site2b_start,site2b_end,site3a_start,site3a_end,site3b_start,site3b_end
0,Q99394,Q03630,200,225,140,160,167,192,140,160,147,172,140,160
1,Q99380,P39007,17,37,562,591,17,37,641,666,17,37,666,693
2,Q99394,P32613,249,269,81,115,200,225,133,153,167,192,133,153
3,Q99344,Q03919,280,300,17,63,280,300,0,28,23,48,58,78
4,Q99288,Q99288,200,224,315,335,248,273,315,335,315,335,118,144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1601,O94742,Q12250,70,90,166,194,70,90,69,99,23,52,426,446
1602,O94742,Q04062,70,90,260,284,23,52,374,394,70,90,53,80
1603,O94742,P46674,70,90,688,715,70,90,699,725,70,90,139,174
1604,O74700,O74700,0,27,68,88,68,88,23,48,33,62,68,88


## Import known protein sites test set

In [6]:
with open('data/yeast_processed_norm_area_50/test/coords.json', 'r') as f:
    train_coords = json.load(f)
len(train_coords)

111

Filter for test set in PIPE-Sites

In [7]:
PIPESites_df = PIPESites_df[PIPESites_df.apply(lambda x: f'{x.protein_a}_{x.protein_b}' in train_coords.keys(), axis=1)].reset_index(drop=True)
PIPESites_df

Unnamed: 0,protein_a,protein_b,site1a_start,site1a_end,site1b_start,site1b_end,site2a_start,site2a_end,site2b_start,site2b_end,site3a_start,site3a_end,site3b_start,site3b_end
0,Q12446,P60010,274,299,356,376,253,282,356,376,509,534,356,376
1,Q12438,Q12438,212,232,65,89,212,232,71,95,65,89,212,232
2,Q12329,Q12329,82,107,356,376,356,376,79,107,160,190,356,376
3,Q12223,Q12223,219,239,173,199,173,199,219,239,159,185,219,239
4,Q12189,Q12189,181,205,239,259,239,259,159,185,239,259,20,51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,P08018,P53599,301,327,1560,1580,472,500,1560,1580,649,669,1368,1409
107,P06782,P38717,156,202,1210,1230,438,469,1210,1230,226,255,1210,1230
108,P06780,P51862,190,210,231,262,190,210,243,270,190,210,318,350
109,P06245,P05986,21,59,379,399,175,218,379,399,361,381,193,236


In [8]:
PIPESites_df['ranges_pred1'] = PIPESites_df.apply(lambda x: [[x['site1a_start'], x['site1a_end']],[x['site1b_start'], x['site1b_end']]], axis= 1)
PIPESites_df['ranges_pred2'] = PIPESites_df.apply(lambda x: [[x['site2a_start'], x['site2a_end']],[x['site2b_start'], x['site2b_end']]], axis= 1)
PIPESites_df['ranges_pred3'] = PIPESites_df.apply(lambda x: [[x['site3a_start'], x['site3a_end']],[x['site3b_start'], x['site3b_end']]], axis= 1)
PIPESites_df = PIPESites_df[['protein_a', 'protein_b', 'ranges_pred1', 'ranges_pred2', 'ranges_pred3']]
PIPESites_df

Unnamed: 0,protein_a,protein_b,ranges_pred1,ranges_pred2,ranges_pred3
0,Q12446,P60010,"[[274, 299], [356, 376]]","[[253, 282], [356, 376]]","[[509, 534], [356, 376]]"
1,Q12438,Q12438,"[[212, 232], [65, 89]]","[[212, 232], [71, 95]]","[[65, 89], [212, 232]]"
2,Q12329,Q12329,"[[82, 107], [356, 376]]","[[356, 376], [79, 107]]","[[160, 190], [356, 376]]"
3,Q12223,Q12223,"[[219, 239], [173, 199]]","[[173, 199], [219, 239]]","[[159, 185], [219, 239]]"
4,Q12189,Q12189,"[[181, 205], [239, 259]]","[[239, 259], [159, 185]]","[[239, 259], [20, 51]]"
...,...,...,...,...,...
106,P08018,P53599,"[[301, 327], [1560, 1580]]","[[472, 500], [1560, 1580]]","[[649, 669], [1368, 1409]]"
107,P06782,P38717,"[[156, 202], [1210, 1230]]","[[438, 469], [1210, 1230]]","[[226, 255], [1210, 1230]]"
108,P06780,P51862,"[[190, 210], [231, 262]]","[[190, 210], [243, 270]]","[[190, 210], [318, 350]]"
109,P06245,P05986,"[[21, 59], [379, 399]]","[[175, 218], [379, 399]]","[[361, 381], [193, 236]]"


In [9]:
PIPESites_df['ranges_true'] =  PIPESites_df.apply(lambda x: train_coords[x['protein_a'] + '_' + x['protein_b']], axis=1)
PIPESites_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PIPESites_df['ranges_true'] =  PIPESites_df.apply(lambda x: train_coords[x['protein_a'] + '_' + x['protein_b']], axis=1)


Unnamed: 0,protein_a,protein_b,ranges_pred1,ranges_pred2,ranges_pred3,ranges_true
0,Q12446,P60010,"[[274, 299], [356, 376]]","[[253, 282], [356, 376]]","[[509, 534], [356, 376]]","[[544, 574], [2, 375]]"
1,Q12438,Q12438,"[[212, 232], [65, 89]]","[[212, 232], [71, 95]]","[[65, 89], [212, 232]]","[[128, 193], [128, 193]]"
2,Q12329,Q12329,"[[82, 107], [356, 376]]","[[356, 376], [79, 107]]","[[160, 190], [356, 376]]","[[247, 356], [247, 356]]"
3,Q12223,Q12223,"[[219, 239], [173, 199]]","[[173, 199], [219, 239]]","[[159, 185], [219, 239]]","[[47, 201], [47, 201]]"
4,Q12189,Q12189,"[[181, 205], [239, 259]]","[[239, 259], [159, 185]]","[[239, 259], [20, 51]]","[[74, 251], [74, 251]]"
...,...,...,...,...,...,...
106,P08018,P53599,"[[301, 327], [1560, 1580]]","[[472, 500], [1560, 1580]]","[[649, 669], [1368, 1409]]","[[360, 623], [1266, 1558]]"
107,P06782,P38717,"[[156, 202], [1210, 1230]]","[[438, 469], [1210, 1230]]","[[226, 255], [1210, 1230]]","[[55, 306], [310, 423]]"
108,P06780,P51862,"[[190, 210], [231, 262]]","[[190, 210], [243, 270]]","[[190, 210], [318, 350]]","[[12, 185], [663, 844]]"
109,P06245,P05986,"[[21, 59], [379, 399]]","[[175, 218], [379, 399]]","[[361, 381], [193, 236]]","[[70, 324], [88, 342]]"


## Dice

In [10]:
# Formula adapted from https://stackoverflow.com/questions/25349178/calculating-percentage-of-bounding-box-overlap-for-image-detector-evaluation
def get_dice_iou(bb1, bb2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.

    Parameters
    ----------
    bb1 : list: [[y1, y2], [x1, x2]]
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner
    bb1 : list: [[y1, y2], [x1, x2]]
        The (x1, y1) position is at the top left corner,
        the (x2, y2) position is at the bottom right corner

    Returns
    -------
    float
        in [0, 1]
    """

    assert bb1[0][0] < bb1[0][1]
    assert bb1[1][0] < bb1[1][1]
    assert bb2[0][0] < bb2[0][1]
    assert bb2[1][0] < bb2[1][1]

    # determine the coordinates of the intersection rectangle
    x_left = max(bb1[1][0], bb2[1][0])
    y_top = max(bb1[0][0], bb2[0][0])
    x_right = min(bb1[1][1], bb2[1][1])
    y_bottom = min(bb1[0][1], bb2[0][1])

    if x_right < x_left or y_bottom < y_top:
        return 0.0, 0.0

    # The intersection of two axis-aligned bounding boxes is always an
    # axis-aligned bounding box
    intersection_area = (x_right - x_left + 1) * (y_bottom - y_top + 1)

    # compute the area of both AABBs
    bb1_area = (bb1[1][1] - bb1[1][0] + 1) * (bb1[0][1] - bb1[0][0] + 1)
    bb2_area = (bb2[1][1] - bb2[1][0] + 1) * (bb2[0][1] - bb2[0][0] + 1)

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    
    # dice
    dice =  2 * intersection_area / float(bb1_area + bb2_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou, dice

In [11]:
PIPESites_df['iou_dice1'] = PIPESites_df.apply(lambda x: get_dice_iou(x.ranges_pred1, x.ranges_true), axis=1)
PIPESites_df['iou_dice2'] = PIPESites_df.apply(lambda x: get_dice_iou(x.ranges_pred2, x.ranges_true), axis=1)
PIPESites_df['iou_dice3'] = PIPESites_df.apply(lambda x: get_dice_iou(x.ranges_pred3, x.ranges_true), axis=1)
PIPESites_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PIPESites_df['iou_dice1'] = PIPESites_df.apply(lambda x: get_dice_iou(x.ranges_pred1, x.ranges_true), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PIPESites_df['iou_dice2'] = PIPESites_df.apply(lambda x: get_dice_iou(x.ranges_pred2, x.ranges_true), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-v

Unnamed: 0,protein_a,protein_b,ranges_pred1,ranges_pred2,ranges_pred3,ranges_true,iou_dice1,iou_dice2,iou_dice3
0,Q12446,P60010,"[[274, 299], [356, 376]]","[[253, 282], [356, 376]]","[[509, 534], [356, 376]]","[[544, 574], [2, 375]]","(0.0, 0.0)","(0.0, 0.0)","(0.0, 0.0)"
1,Q12438,Q12438,"[[212, 232], [65, 89]]","[[212, 232], [71, 95]]","[[65, 89], [212, 232]]","[[128, 193], [128, 193]]","(0.0, 0.0)","(0.0, 0.0)","(0.0, 0.0)"
2,Q12329,Q12329,"[[82, 107], [356, 376]]","[[356, 376], [79, 107]]","[[160, 190], [356, 376]]","[[247, 356], [247, 356]]","(0.0, 0.0)","(0.0, 0.0)","(0.0, 0.0)"
3,Q12223,Q12223,"[[219, 239], [173, 199]]","[[173, 199], [219, 239]]","[[159, 185], [219, 239]]","[[47, 201], [47, 201]]","(0.0, 0.0)","(0.0, 0.0)","(0.0, 0.0)"
4,Q12189,Q12189,"[[181, 205], [239, 259]]","[[239, 259], [159, 185]]","[[239, 259], [20, 51]]","[[74, 251], [74, 251]]","(0.010193200351273366, 0.020180694836846844)","(0.011003134796238245, 0.021766766921955907)","(0.0, 0.0)"
...,...,...,...,...,...,...,...,...,...
106,P08018,P53599,"[[301, 327], [1560, 1580]]","[[472, 500], [1560, 1580]]","[[649, 669], [1368, 1409]]","[[360, 623], [1266, 1558]]","(0.0, 0.0)","(0.0, 0.0)","(0.0, 0.0)"
107,P06782,P38717,"[[156, 202], [1210, 1230]]","[[438, 469], [1210, 1230]]","[[226, 255], [1210, 1230]]","[[55, 306], [310, 423]]","(0.0, 0.0)","(0.0, 0.0)","(0.0, 0.0)"
108,P06780,P51862,"[[190, 210], [231, 262]]","[[190, 210], [243, 270]]","[[190, 210], [318, 350]]","[[12, 185], [663, 844]]","(0.0, 0.0)","(0.0, 0.0)","(0.0, 0.0)"
109,P06245,P05986,"[[21, 59], [379, 399]]","[[175, 218], [379, 399]]","[[361, 381], [193, 236]]","[[70, 324], [88, 342]]","(0.0, 0.0)","(0.0, 0.0)","(0.0, 0.0)"


In [12]:
PIPESites_df[['iou1', 'dice1']] = pd.DataFrame(PIPESites_df['iou_dice1'].tolist(), index=PIPESites_df.index)
PIPESites_df[['iou2', 'dice2']] = pd.DataFrame(PIPESites_df['iou_dice2'].tolist(), index=PIPESites_df.index)
PIPESites_df[['iou3', 'dice3']] = pd.DataFrame(PIPESites_df['iou_dice3'].tolist(), index=PIPESites_df.index)
PIPESites_df.drop(['iou_dice1', 'iou_dice2', 'iou_dice3'], inplace=True, axis=1)
PIPESites_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,protein_a,protein_b,ranges_pred1,ranges_pred2,ranges_pred3,ranges_true,iou1,dice1,iou2,dice2,iou3,dice3
0,Q12446,P60010,"[[274, 299], [356, 376]]","[[253, 282], [356, 376]]","[[509, 534], [356, 376]]","[[544, 574], [2, 375]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0
1,Q12438,Q12438,"[[212, 232], [65, 89]]","[[212, 232], [71, 95]]","[[65, 89], [212, 232]]","[[128, 193], [128, 193]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0
2,Q12329,Q12329,"[[82, 107], [356, 376]]","[[356, 376], [79, 107]]","[[160, 190], [356, 376]]","[[247, 356], [247, 356]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0
3,Q12223,Q12223,"[[219, 239], [173, 199]]","[[173, 199], [219, 239]]","[[159, 185], [219, 239]]","[[47, 201], [47, 201]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0
4,Q12189,Q12189,"[[181, 205], [239, 259]]","[[239, 259], [159, 185]]","[[239, 259], [20, 51]]","[[74, 251], [74, 251]]",0.010193,0.020181,0.011003,0.021767,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
106,P08018,P53599,"[[301, 327], [1560, 1580]]","[[472, 500], [1560, 1580]]","[[649, 669], [1368, 1409]]","[[360, 623], [1266, 1558]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0
107,P06782,P38717,"[[156, 202], [1210, 1230]]","[[438, 469], [1210, 1230]]","[[226, 255], [1210, 1230]]","[[55, 306], [310, 423]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0
108,P06780,P51862,"[[190, 210], [231, 262]]","[[190, 210], [243, 270]]","[[190, 210], [318, 350]]","[[12, 185], [663, 844]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0
109,P06245,P05986,"[[21, 59], [379, 399]]","[[175, 218], [379, 399]]","[[361, 381], [193, 236]]","[[70, 324], [88, 342]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0


In [13]:
PIPESites_df[['iou1', 'dice1', 'iou2', 'dice2', 'iou3', 'dice3']].describe()

Unnamed: 0,iou1,dice1,iou2,dice2,iou3,dice3
count,111.0,111.0,111.0,111.0,111.0,111.0
mean,0.001687,0.003246,0.000867,0.001691,0.00355,0.006632
std,0.008019,0.015353,0.004665,0.009013,0.015664,0.028578
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0
max,0.051921,0.098717,0.042484,0.081505,0.129024,0.228558


## Distance Measure

The distance measure is as described in PIPE-Sites: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-12-225

Get lengths of proteins

In [14]:
protein_sequence = "data/uniprot-proteome_UP000002311_stripped.fasta"
protein_sequence_CD = pd.read_csv(protein_sequence, header = None, sep = "\t")
print("Loaded UniProt proteome")
new_df_ps = pd.DataFrame(index = protein_sequence_CD[0].iloc[::2].map(lambda x: str(x)[1:].strip()))
new_df_ps['Sequence'] = protein_sequence_CD[0].iloc[1::2].to_numpy()
new_df_ps['Length'] = new_df_ps['Sequence'].map(lambda x: len(x))
new_df_ps

Loaded UniProt proteome


Unnamed: 0_level_0,Sequence,Length
0,Unnamed: 1_level_1,Unnamed: 2_level_1
P21192,MDNVVDPWYINPSGFAKDTQDEEYVQHHDNVNPTIPPPDNYILNNE...,770
P46993,MTTLASSIEHKTKHLAAPFENDENPWMKKYCCQCKSCKMSVPVQPW...,209
P47117,MSYLNNPAVVMDNGTGLTKLGFAGNDSPSWVFPTAIATAAPSNTKK...,449
P22768,MSKGKVCLAYSGGLDTSVILAWLLDQGYEVVAFMANVGQEEDFDAA...,420
P29311,MSTSREDSVYLAKLAEQAERYEEMVENMKTVASSGQELSVEERNLL...,267
...,...,...
P47049,MYEMSGIDSLFHDRVVHDYSHTSEQVIVVYISSAAGDNSWLHQWFK...,396
P53142,MNRILSSASLLSNVSMPRQNKHKITKALCYAIIVASIGSIQFGYHL...,486
Q05919,MKRFLLSRRQRHLRMICFHNISLFRANGDSKLIKEYGDGFIPCFFI...,439
Q04170,MSFENKLPTPLENNDAKGHMVCTLNKTTDARRAAETLSIAFSNSPA...,232


In [15]:
PIPESites_df['len_a'] = PIPESites_df['protein_a'].apply(lambda x: new_df_ps.Length.loc[x])
PIPESites_df['len_b'] = PIPESites_df['protein_b'].apply(lambda x: new_df_ps.Length.loc[x])
PIPESites_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PIPESites_df['len_a'] = PIPESites_df['protein_a'].apply(lambda x: new_df_ps.Length.loc[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PIPESites_df['len_b'] = PIPESites_df['protein_b'].apply(lambda x: new_df_ps.Length.loc[x])


Unnamed: 0,protein_a,protein_b,ranges_pred1,ranges_pred2,ranges_pred3,ranges_true,iou1,dice1,iou2,dice2,iou3,dice3,len_a,len_b
0,Q12446,P60010,"[[274, 299], [356, 376]]","[[253, 282], [356, 376]]","[[509, 534], [356, 376]]","[[544, 574], [2, 375]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,633,375
1,Q12438,Q12438,"[[212, 232], [65, 89]]","[[212, 232], [71, 95]]","[[65, 89], [212, 232]]","[[128, 193], [128, 193]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,231,231
2,Q12329,Q12329,"[[82, 107], [356, 376]]","[[356, 376], [79, 107]]","[[160, 190], [356, 376]]","[[247, 356], [247, 356]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,375,375
3,Q12223,Q12223,"[[219, 239], [173, 199]]","[[173, 199], [219, 239]]","[[159, 185], [219, 239]]","[[47, 201], [47, 201]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,238,238
4,Q12189,Q12189,"[[181, 205], [239, 259]]","[[239, 259], [159, 185]]","[[239, 259], [20, 51]]","[[74, 251], [74, 251]]",0.010193,0.020181,0.011003,0.021767,0.0,0.0,258,258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,P08018,P53599,"[[301, 327], [1560, 1580]]","[[472, 500], [1560, 1580]]","[[649, 669], [1368, 1409]]","[[360, 623], [1266, 1558]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,668,1579
107,P06782,P38717,"[[156, 202], [1210, 1230]]","[[438, 469], [1210, 1230]]","[[226, 255], [1210, 1230]]","[[55, 306], [310, 423]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,633,1229
108,P06780,P51862,"[[190, 210], [231, 262]]","[[190, 210], [243, 270]]","[[190, 210], [318, 350]]","[[12, 185], [663, 844]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,209,1356
109,P06245,P05986,"[[21, 59], [379, 399]]","[[175, 218], [379, 399]]","[[361, 381], [193, 236]]","[[70, 324], [88, 342]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,380,398


In [16]:

def dm(pred, true, len_a, len_b):
    
    assert pred[0][0] < pred[0][1]
    assert pred[1][0] < pred[1][1]
    assert true[0][0] < true[0][1]
    assert true[1][0] < true[1][1]
    
    delta_a = max(true[0][0]-pred[0][0], pred[0][1]-true[0][1], 0)/len_a
    delta_b = max(true[1][0]-pred[1][0], pred[1][1]-true[1][1], 0)/len_b
    
    result = ((delta_a**2 + delta_b**2)**0.5)/(2**0.5)
    
    return result

In [17]:
PIPESites_df['dm1'] = PIPESites_df.apply(lambda x: dm(x.ranges_pred1, x.ranges_true, x.len_a, x.len_b), axis=1)
PIPESites_df['dm2'] = PIPESites_df.apply(lambda x: dm(x.ranges_pred2, x.ranges_true, x.len_a, x.len_b), axis=1)
PIPESites_df['dm3'] = PIPESites_df.apply(lambda x: dm(x.ranges_pred3, x.ranges_true, x.len_a, x.len_b), axis=1)
PIPESites_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PIPESites_df['dm1'] = PIPESites_df.apply(lambda x: dm(x.ranges_pred1, x.ranges_true, x.len_a, x.len_b), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  PIPESites_df['dm2'] = PIPESites_df.apply(lambda x: dm(x.ranges_pred2, x.ranges_true, x.len_a, x.len_b), axis=1)


Unnamed: 0,protein_a,protein_b,ranges_pred1,ranges_pred2,ranges_pred3,ranges_true,iou1,dice1,iou2,dice2,iou3,dice3,len_a,len_b,dm1,dm2,dm3
0,Q12446,P60010,"[[274, 299], [356, 376]]","[[253, 282], [356, 376]]","[[509, 534], [356, 376]]","[[544, 574], [2, 375]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,633,375,0.301615,0.325074,0.039143
1,Q12438,Q12438,"[[212, 232], [65, 89]]","[[212, 232], [71, 95]]","[[65, 89], [212, 232]]","[[128, 193], [128, 193]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,231,231,0.226808,0.211413,0.226808
2,Q12329,Q12329,"[[82, 107], [356, 376]]","[[356, 376], [79, 107]]","[[160, 190], [356, 376]]","[[247, 356], [247, 356]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,375,375,0.313404,0.319021,0.168328
3,Q12223,Q12223,"[[219, 239], [173, 199]]","[[173, 199], [219, 239]]","[[159, 185], [219, 239]]","[[47, 201], [47, 201]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,238,238,0.112899,0.112899,0.112899
4,Q12189,Q12189,"[[181, 205], [239, 259]]","[[239, 259], [159, 185]]","[[239, 259], [20, 51]]","[[74, 251], [74, 251]]",0.010193,0.020181,0.011003,0.021767,0.0,0.0,258,258,0.021926,0.021926,0.149614
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,P08018,P53599,"[[301, 327], [1560, 1580]]","[[472, 500], [1560, 1580]]","[[649, 669], [1368, 1409]]","[[360, 623], [1266, 1558]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,668,1579,0.063226,0.009852,0.048693
107,P06782,P38717,"[[156, 202], [1210, 1230]]","[[438, 469], [1210, 1230]]","[[226, 255], [1210, 1230]]","[[55, 306], [310, 423]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,633,1229,0.464309,0.498735,0.464309
108,P06780,P51862,"[[190, 210], [231, 262]]","[[190, 210], [243, 270]]","[[190, 210], [318, 350]]","[[12, 185], [663, 844]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,209,1356,0.240628,0.234780,0.198797
109,P06245,P05986,"[[21, 59], [379, 399]]","[[175, 218], [379, 399]]","[[361, 381], [193, 236]]","[[70, 324], [88, 342]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,380,398,0.136269,0.101269,0.106066


In [18]:
PIPESites_df[['iou1', 'dice1', 'dm1', 'iou2', 'dice2', 'dm2', 'iou3', 'dice3', 'dm3']].describe()

Unnamed: 0,iou1,dice1,dm1,iou2,dice2,dm2,iou3,dice3,dm3
count,111.0,111.0,111.0,111.0,111.0,111.0,111.0,111.0,111.0
mean,0.001687,0.003246,0.255006,0.000867,0.001691,0.277161,0.00355,0.006632,0.245483
std,0.008019,0.015353,0.1826,0.004665,0.009013,0.182791,0.015664,0.028578,0.191292
min,0.0,0.0,0.004064,0.0,0.0,0.005022,0.0,0.0,0.019999
25%,0.0,0.0,0.112468,0.0,0.0,0.130054,0.0,0.0,0.094496
50%,0.0,0.0,0.21774,0.0,0.0,0.231468,0.0,0.0,0.183419
75%,0.0,0.0,0.356788,0.0,0.0,0.424448,0.0,0.0,0.363129
max,0.051921,0.098717,0.807256,0.042484,0.081505,0.871023,0.129024,0.228558,0.70984


In [19]:
PIPESites_df.columns

Index(['protein_a', 'protein_b', 'ranges_pred1', 'ranges_pred2',
       'ranges_pred3', 'ranges_true', 'iou1', 'dice1', 'iou2', 'dice2', 'iou3',
       'dice3', 'len_a', 'len_b', 'dm1', 'dm2', 'dm3'],
      dtype='object')

## Use best of 3 DM

In [20]:
PIPESites_df['bestdm'] = PIPESites_df.apply(lambda x: np.argmin([x.dm1, x.dm2, x.dm3]) + 1, axis=1)
PIPESites_df

Unnamed: 0,protein_a,protein_b,ranges_pred1,ranges_pred2,ranges_pred3,ranges_true,iou1,dice1,iou2,dice2,iou3,dice3,len_a,len_b,dm1,dm2,dm3,bestdm
0,Q12446,P60010,"[[274, 299], [356, 376]]","[[253, 282], [356, 376]]","[[509, 534], [356, 376]]","[[544, 574], [2, 375]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,633,375,0.301615,0.325074,0.039143,3
1,Q12438,Q12438,"[[212, 232], [65, 89]]","[[212, 232], [71, 95]]","[[65, 89], [212, 232]]","[[128, 193], [128, 193]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,231,231,0.226808,0.211413,0.226808,2
2,Q12329,Q12329,"[[82, 107], [356, 376]]","[[356, 376], [79, 107]]","[[160, 190], [356, 376]]","[[247, 356], [247, 356]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,375,375,0.313404,0.319021,0.168328,3
3,Q12223,Q12223,"[[219, 239], [173, 199]]","[[173, 199], [219, 239]]","[[159, 185], [219, 239]]","[[47, 201], [47, 201]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,238,238,0.112899,0.112899,0.112899,1
4,Q12189,Q12189,"[[181, 205], [239, 259]]","[[239, 259], [159, 185]]","[[239, 259], [20, 51]]","[[74, 251], [74, 251]]",0.010193,0.020181,0.011003,0.021767,0.0,0.0,258,258,0.021926,0.021926,0.149614,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,P08018,P53599,"[[301, 327], [1560, 1580]]","[[472, 500], [1560, 1580]]","[[649, 669], [1368, 1409]]","[[360, 623], [1266, 1558]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,668,1579,0.063226,0.009852,0.048693,2
107,P06782,P38717,"[[156, 202], [1210, 1230]]","[[438, 469], [1210, 1230]]","[[226, 255], [1210, 1230]]","[[55, 306], [310, 423]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,633,1229,0.464309,0.498735,0.464309,1
108,P06780,P51862,"[[190, 210], [231, 262]]","[[190, 210], [243, 270]]","[[190, 210], [318, 350]]","[[12, 185], [663, 844]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,209,1356,0.240628,0.234780,0.198797,3
109,P06245,P05986,"[[21, 59], [379, 399]]","[[175, 218], [379, 399]]","[[361, 381], [193, 236]]","[[70, 324], [88, 342]]",0.000000,0.000000,0.000000,0.000000,0.0,0.0,380,398,0.136269,0.101269,0.106066,2


In [21]:
def bestof(df):
    if df.bestdm == 1:
        return df[['iou1', 'dice1', 'dm1']].values
    elif df.bestdm == 2:
        return df[['iou2', 'dice2', 'dm2']].values
    elif df.bestdm == 3:
        return df[['iou3', 'dice3', 'dm3']].values

In [22]:
PIPESites_df[['iou_best', 'dice_best', 'dm_best']] = pd.DataFrame(PIPESites_df.apply(lambda x: bestof(x), axis=1).tolist(), index = PIPESites_df.index)

In [23]:
PIPESites_df

Unnamed: 0,protein_a,protein_b,ranges_pred1,ranges_pred2,ranges_pred3,ranges_true,iou1,dice1,iou2,dice2,...,dice3,len_a,len_b,dm1,dm2,dm3,bestdm,iou_best,dice_best,dm_best
0,Q12446,P60010,"[[274, 299], [356, 376]]","[[253, 282], [356, 376]]","[[509, 534], [356, 376]]","[[544, 574], [2, 375]]",0.000000,0.000000,0.000000,0.000000,...,0.0,633,375,0.301615,0.325074,0.039143,3,0.000000,0.000000,0.039143
1,Q12438,Q12438,"[[212, 232], [65, 89]]","[[212, 232], [71, 95]]","[[65, 89], [212, 232]]","[[128, 193], [128, 193]]",0.000000,0.000000,0.000000,0.000000,...,0.0,231,231,0.226808,0.211413,0.226808,2,0.000000,0.000000,0.211413
2,Q12329,Q12329,"[[82, 107], [356, 376]]","[[356, 376], [79, 107]]","[[160, 190], [356, 376]]","[[247, 356], [247, 356]]",0.000000,0.000000,0.000000,0.000000,...,0.0,375,375,0.313404,0.319021,0.168328,3,0.000000,0.000000,0.168328
3,Q12223,Q12223,"[[219, 239], [173, 199]]","[[173, 199], [219, 239]]","[[159, 185], [219, 239]]","[[47, 201], [47, 201]]",0.000000,0.000000,0.000000,0.000000,...,0.0,238,238,0.112899,0.112899,0.112899,1,0.000000,0.000000,0.112899
4,Q12189,Q12189,"[[181, 205], [239, 259]]","[[239, 259], [159, 185]]","[[239, 259], [20, 51]]","[[74, 251], [74, 251]]",0.010193,0.020181,0.011003,0.021767,...,0.0,258,258,0.021926,0.021926,0.149614,1,0.010193,0.020181,0.021926
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,P08018,P53599,"[[301, 327], [1560, 1580]]","[[472, 500], [1560, 1580]]","[[649, 669], [1368, 1409]]","[[360, 623], [1266, 1558]]",0.000000,0.000000,0.000000,0.000000,...,0.0,668,1579,0.063226,0.009852,0.048693,2,0.000000,0.000000,0.009852
107,P06782,P38717,"[[156, 202], [1210, 1230]]","[[438, 469], [1210, 1230]]","[[226, 255], [1210, 1230]]","[[55, 306], [310, 423]]",0.000000,0.000000,0.000000,0.000000,...,0.0,633,1229,0.464309,0.498735,0.464309,1,0.000000,0.000000,0.464309
108,P06780,P51862,"[[190, 210], [231, 262]]","[[190, 210], [243, 270]]","[[190, 210], [318, 350]]","[[12, 185], [663, 844]]",0.000000,0.000000,0.000000,0.000000,...,0.0,209,1356,0.240628,0.234780,0.198797,3,0.000000,0.000000,0.198797
109,P06245,P05986,"[[21, 59], [379, 399]]","[[175, 218], [379, 399]]","[[361, 381], [193, 236]]","[[70, 324], [88, 342]]",0.000000,0.000000,0.000000,0.000000,...,0.0,380,398,0.136269,0.101269,0.106066,2,0.000000,0.000000,0.101269


In [24]:
PIPESites_df[['iou_best', 'dice_best', 'dm_best']].describe()

Unnamed: 0,iou_best,dice_best,dm_best
count,111.0,111.0,111.0
mean,0.005023,0.009466,0.189915
std,0.017092,0.031436,0.173476
min,0.0,0.0,0.004064
25%,0.0,0.0,0.056092
50%,0.0,0.0,0.139391
75%,0.0,0.0,0.266704
max,0.129024,0.228558,0.667071


In [25]:
PIPESites_df[['iou1', 'dice1', 'dm1']].describe()

Unnamed: 0,iou1,dice1,dm1
count,111.0,111.0,111.0
mean,0.001687,0.003246,0.255006
std,0.008019,0.015353,0.1826
min,0.0,0.0,0.004064
25%,0.0,0.0,0.112468
50%,0.0,0.0,0.21774
75%,0.0,0.0,0.356788
max,0.051921,0.098717,0.807256
