## Retrieve interactions from BioGrid

In [4]:
import numpy as np
import pandas as pd
import os
import sys
import datetime
import time
import pickle
# import idconversion as idc
# from idconversion import idconversion as idc
from urllib.error import HTTPError

## Helper Functions


In [5]:
def time_stamper(tdate=datetime.datetime.now()):
    fmtdate = tdate.strftime('%Y-%m-%d.%H:%M:%S')
    return fmtdate

In [6]:
time_stamper(datetime.datetime.now())

'2019-06-28.15:22:28'

In [7]:
# dir(idc)


In [8]:
!ls 

2018-12-18.210000_result_mapped.txt  bioigrid_list.txt
BioGridDataDec2018.ipynb	       mapping_table.csv
biogrid_to_uniprot.tab.gz	       result_mapped.txt


In [9]:
os.getcwd()

'/home/user/Notebooks2019/PhD/BioGrid/BioGrid'

In [10]:
!cat result_mapped.txt | cut -f 1,2 | head

yourlist:M201812188471C63D39733769F8E060B506551E1201F3EBJ	Entry
131072	Q86XK7
131079	Q5VW00
131082	Q6ZR62
131091	Q5GFL6
131092	Q8WY41
131098	A6NHT5
131110	Q6ZRI0
131112	P59894
131113	Q8IXP5


In [11]:
# idc.show_mapping_table()

In [12]:
# Datadir = '/Users/ken/Documents/From_KEN_MAC/2018Data'
Datadir = '/media/user/FAT_BOY/2018LITTLEMAN_EXT/2018Data' 

In [13]:
biogridfile = os.path.join(Datadir, "BIOGRID-ALL-3.5.165.tab2.txt")

In [14]:
df = pd.read_csv(biogridfile, sep='\t')

  interactivity=interactivity, compiler=compiler, result=result)


In [15]:
df.head(2)

Unnamed: 0,#BioGRID Interaction ID,Entrez Gene Interactor A,Entrez Gene Interactor B,BioGRID ID Interactor A,BioGRID ID Interactor B,Systematic Name Interactor A,Systematic Name Interactor B,Official Symbol Interactor A,Official Symbol Interactor B,Synonyms Interactor A,...,Pubmed ID,Organism Interactor A,Organism Interactor B,Throughput,Score,Modification,Phenotypes,Qualifications,Tags,Source Database
0,103,6416,2318,112315,108607,-,-,MAP2K4,FLNC,JNKK|JNKK1|MAPKK4|MEK4|MKK4|PRKMK4|SAPKK-1|SAP...,...,9006895,9606,9606,Low Throughput,-,-,-,-,-,BIOGRID
1,117,84665,88,124185,106603,-,-,MYPN,ACTN2,CMD1DD|CMH22|MYOP|RCM4,...,11309420,9606,9606,Low Throughput,-,-,-,-,-,BIOGRID


## Get interactors

In [16]:
df.columns

Index(['#BioGRID Interaction ID', 'Entrez Gene Interactor A',
       'Entrez Gene Interactor B', 'BioGRID ID Interactor A',
       'BioGRID ID Interactor B', 'Systematic Name Interactor A',
       'Systematic Name Interactor B', 'Official Symbol Interactor A',
       'Official Symbol Interactor B', 'Synonyms Interactor A',
       'Synonyms Interactor B', 'Experimental System',
       'Experimental System Type', 'Author', 'Pubmed ID',
       'Organism Interactor A', 'Organism Interactor B', 'Throughput', 'Score',
       'Modification', 'Phenotypes', 'Qualifications', 'Tags',
       'Source Database'],
      dtype='object')

In [17]:
len(df['Organism Interactor A'].unique().tolist())

62

In [18]:
df.shape

(1583787, 24)

## Get Human Interactions

In [19]:
human_mask = (df['Organism Interactor A'] == 9606) & (df['Organism Interactor B'] == 9606)

In [20]:
df_human = df.loc[human_mask].reset_index()

In [21]:
df_human = df_human.drop(['index'], axis=1)

In [22]:
df_human.shape

(409173, 24)

In [23]:
df_human.head(2)

Unnamed: 0,#BioGRID Interaction ID,Entrez Gene Interactor A,Entrez Gene Interactor B,BioGRID ID Interactor A,BioGRID ID Interactor B,Systematic Name Interactor A,Systematic Name Interactor B,Official Symbol Interactor A,Official Symbol Interactor B,Synonyms Interactor A,...,Pubmed ID,Organism Interactor A,Organism Interactor B,Throughput,Score,Modification,Phenotypes,Qualifications,Tags,Source Database
0,103,6416,2318,112315,108607,-,-,MAP2K4,FLNC,JNKK|JNKK1|MAPKK4|MEK4|MKK4|PRKMK4|SAPKK-1|SAP...,...,9006895,9606,9606,Low Throughput,-,-,-,-,-,BIOGRID
1,117,84665,88,124185,106603,-,-,MYPN,ACTN2,CMD1DD|CMH22|MYOP|RCM4,...,11309420,9606,9606,Low Throughput,-,-,-,-,-,BIOGRID


In [24]:
df_human['Experimental System'].unique()

array(['Two-hybrid', 'Affinity Capture-Western', 'Reconstituted Complex',
       'FRET', 'Co-purification', 'Protein-peptide', 'Co-localization',
       'Affinity Capture-MS', 'Biochemical Activity',
       'Affinity Capture-RNA', 'Co-crystal Structure', 'Far Western',
       'Phenotypic Enhancement', 'Phenotypic Suppression',
       'Co-fractionation', 'Protein-RNA', 'Synthetic Rescue',
       'Affinity Capture-Luminescence', 'PCA', 'Dosage Rescue',
       'Negative Genetic', 'Dosage Lethality', 'Synthetic Growth Defect',
       'Proximity Label-MS', 'Synthetic Lethality', 'Positive Genetic'],
      dtype=object)

In [25]:
sub_mask = ['BioGRID ID Interactor A','BioGRID ID Interactor B', 
            'Experimental System Type', 'Score', 'Pubmed ID']

In [26]:
df_human_exp = df_human.loc[:,sub_mask]

In [27]:
df_human_exp.columns = ["BiogridA", "BiogridB", "ExperimentalSystem", "Score" ,"PubmedId"]

In [28]:
df_human_exp.head(2)

Unnamed: 0,BiogridA,BiogridB,ExperimentalSystem,Score,PubmedId
0,112315,108607,physical,-,9006895
1,124185,106603,physical,-,11309420


In [29]:
# df_human_exp['Score'].unique().tolist()

In [30]:
lsa = df_human_exp['BiogridA'].unique().tolist()
lsb = df_human_exp['BiogridB'].unique().tolist()

In [31]:
lsfull = set(lsa).union(set(lsb))

In [32]:
len(lsfull)

17299

In [33]:
lsN = list(set(lsa + lsb))

In [34]:
len(lsN)

17299

## Write BioGRID ids to a file

In [35]:
with open("bioigrid_list.txt", "w") as infile:
    for i in list(lsfull):
        infile.write("{}\n".format(i))
        

## Read mapped BioGRID IDs

In [36]:
mappedfile = os.path.join(os.path.curdir, "biogrid_to_uniprot.tab.gz")

In [37]:
dfmap = pd.read_csv(mappedfile, compression='gzip', sep='\t')

In [38]:
dfmap.head(2)

Unnamed: 0,yourlist:M20190627E5A08BB0B2D1C45B0C7BC3B55FD265562430386,Entry,Entry name,Status,Protein names,Gene names,Organism,Length
0,131072,Q86XK7,VSIG1_HUMAN,reviewed,V-set and immunoglobulin domain-containing pro...,VSIG1 GPA34,Homo sapiens (Human),387
1,131079,Q5VW00,DC122_HUMAN,reviewed,DDB1- and CUL4-associated factor 12-like prote...,DCAF12L2 WDR40C,Homo sapiens (Human),463


In [39]:
dfmap.rename(columns={'yourlist:M20190627E5A08BB0B2D1C45B0C7BC3B55FD265562430386':'BiogridID', 
                     'Entry':"UniprotID"}, inplace=True)

dfmap.shape

In [40]:
dfmap.shape

(16145, 8)

In [41]:
dfmap.head(3)

Unnamed: 0,BiogridID,UniprotID,Entry name,Status,Protein names,Gene names,Organism,Length
0,131072,Q86XK7,VSIG1_HUMAN,reviewed,V-set and immunoglobulin domain-containing pro...,VSIG1 GPA34,Homo sapiens (Human),387
1,131079,Q5VW00,DC122_HUMAN,reviewed,DDB1- and CUL4-associated factor 12-like prote...,DCAF12L2 WDR40C,Homo sapiens (Human),463
2,131082,Q6ZR62,RTL4_HUMAN,reviewed,Retrotransposon Gag-like protein 4 (Mammalian ...,RTL4 MAR3 MART4 ZCCHC16,Homo sapiens (Human),310


In [42]:
# dfmap.set_index('BiogridID', inplace=True)

In [43]:
dfmap.head(2)

Unnamed: 0,BiogridID,UniprotID,Entry name,Status,Protein names,Gene names,Organism,Length
0,131072,Q86XK7,VSIG1_HUMAN,reviewed,V-set and immunoglobulin domain-containing pro...,VSIG1 GPA34,Homo sapiens (Human),387
1,131079,Q5VW00,DC122_HUMAN,reviewed,DDB1- and CUL4-associated factor 12-like prote...,DCAF12L2 WDR40C,Homo sapiens (Human),463


## Obtain data with repeating IDs

In [44]:
df_repeats = dfmap.loc[dfmap['BiogridID'].apply(lambda x: len(x.split(',')) >1)]

In [45]:
df_repeats.shape

(92, 8)

In [46]:
df_repeats.head()

Unnamed: 0,BiogridID,UniprotID,Entry name,Status,Protein names,Gene names,Organism,Length
205,132127132129,P60411,KR109_HUMAN,reviewed,Keratin-associated protein 10-9 (High sulfur k...,KRTAP10-9 KAP10.9 KAP18-9 KRTAP10.9 KRTAP18-9 ...,Homo sapiens (Human),292
221,132320132321,A5PLL7,TM189_HUMAN,reviewed,Transmembrane protein 189,TMEM189 KUA,Homo sapiens (Human),270
544,608768610778,Q6P1K8,T2H2L_HUMAN,reviewed,General transcription factor IIH subunit 2-lik...,GTF2H2C; GTF2H2C_2 GTF2H2D,Homo sapiens (Human),395
551,1320441529300,Q8N2Z9,CENPS_HUMAN,reviewed,Centromere protein S (CENP-S) (Apoptosis-induc...,CENPS APITD1 FAAP16 MHF1,Homo sapiens (Human),138
552,1315071529312,Q7Z6K5,ARPIN_HUMAN,reviewed,Arpin (Arp2/3 inhibition protein),ARPIN C15orf38,Homo sapiens (Human),226


## Split repeats



In [47]:
df_repeats_split = df_repeats.BiogridID.apply(pd.Series).\
    merge(df_repeats, right_index = True, left_index = True).\
    drop(['BiogridID'], axis=1).\
    melt(id_vars = ['UniprotID', 0], value_name = 'Gene')
#     drop('variable', axis=1)

In [48]:
df_repeats_split.head()

Unnamed: 0,UniprotID,0,variable,Gene
0,P60411,132127132129,Entry name,KR109_HUMAN
1,A5PLL7,132320132321,Entry name,TM189_HUMAN
2,Q6P1K8,608768610778,Entry name,T2H2L_HUMAN
3,Q8N2Z9,1320441529300,Entry name,CENPS_HUMAN
4,Q7Z6K5,1315071529312,Entry name,ARPIN_HUMAN


In [49]:
dict_repeats ={}
for _, row in df_repeats.iterrows():
    ids, protein = str(row.BiogridID).split(','), row.UniprotID
    for  id in ids:
        dict_repeats[id]=[protein]
        print(id, protein)
    print("="*10)
    
    

132127 P60411
132129 P60411
132320 A5PLL7
132321 A5PLL7
608768 Q6P1K8
610778 Q6P1K8
132044 Q8N2Z9
1529300 Q8N2Z9
131507 Q7Z6K5
1529312 Q7Z6K5
106773 P04745
106774 P04745
106775 P04745
1529410 P35226
107116 P35226
107181 P0C0L4
107182 P0C0L4
139227 P12532
107579 P12532
107829 P0DML3
107830 P0DML3
755774 P30046
108018 P30046
608786 P59665
108031 P59665
1529403 P49356
108627 P49356
608768 Q13888
109221 Q13888
109272 P84243
109273 P84243
109289 P69905
109290 P69905
109662 P01562
109670 P01562
110005 P43631
110008 P43631
608698 P43362
110282 P43362
938509 Q02080
110371 Q02080
1529413 P41271
110761 P41271
3195276 Q15269
111780 Q15269
1529331 P18621
112059 P18621
112490 Q16637
112491 Q16637
608281 Q16385
112635 Q16385
138644 P0DMM9
112687 P0DMM9
138644 P0DMN0
112687 P0DMN0
132321 Q13404
113183 Q13404
112145 P62854
3193504 P62854
109266 P04908
113931 P04908
593221 Q6FI13
113933 Q6FI13
109270 P62807
113935 P62807
113939 P62807
113940 P62807
113942 P62807
113943 P62807
936685 P0C869
114229 P0C86

In [50]:
dict_repeats

{'132127': ['P60411'],
 '132129': ['P60411'],
 '132320': ['A5PLL7'],
 '132321': ['Q13404'],
 '608768': ['Q13888'],
 '610778': ['Q6P1K8'],
 '132044': ['Q8N2Z9'],
 '1529300': ['Q8N2Z9'],
 '131507': ['Q7Z6K5'],
 '1529312': ['Q7Z6K5'],
 '106773': ['P04745'],
 '106774': ['P04745'],
 '106775': ['P04745'],
 '1529410': ['P35226'],
 '107116': ['P35226'],
 '107181': ['P0C0L4'],
 '107182': ['P0C0L4'],
 '139227': ['P12532'],
 '107579': ['P12532'],
 '107829': ['P0DML3'],
 '107830': ['P0DML3'],
 '755774': ['P30046'],
 '108018': ['P30046'],
 '608786': ['P59665'],
 '108031': ['P59665'],
 '1529403': ['P49356'],
 '108627': ['P49356'],
 '109221': ['Q13888'],
 '109272': ['P84243'],
 '109273': ['P84243'],
 '109289': ['P69905'],
 '109290': ['P69905'],
 '109662': ['P01562'],
 '109670': ['P01562'],
 '110005': ['P43631'],
 '110008': ['P43631'],
 '608698': ['P43362'],
 '110282': ['P43362'],
 '938509': ['Q02080'],
 '110371': ['Q02080'],
 '1529413': ['P41271'],
 '110761': ['P41271'],
 '3195276': ['Q15269'],
 '111

## Create dictionary from single ids

In [51]:
dfsingle = dfmap.loc[~dfmap['BiogridID'].apply(lambda x: len(x.split(',')) >1)]

In [52]:
dfsingle.shape

(16053, 8)

In [53]:
idgroup = dfsingle.groupby(['BiogridID'])['UniprotID']

In [54]:
dict_singles = {k:list(v) for k, v in idgroup}

In [55]:
len(dict_singles)

16010

In [56]:
dict_singles

{'106523': ['P04217'],
 '106524': ['P01023'],
 '106527': ['P18440'],
 '106528': ['P11245'],
 '106530': ['P01011'],
 '106532': ['Q13685'],
 '106533': ['Q16613'],
 '106534': ['P49588'],
 '106536': ['P80404'],
 '106537': ['O95477'],
 '106538': ['Q9BZC7'],
 '106539': ['Q99758'],
 '106540': ['O75027'],
 '106541': ['Q8NE71'],
 '106542': ['P78363'],
 '106543': ['P00519'],
 '106544': ['P19801'],
 '106545': ['P42684'],
 '106546': ['P16442'],
 '106547': ['Q12979'],
 '106548': ['P09110'],
 '106549': ['Q13085'],
 '106550': ['O00763'],
 '106551': ['P28330'],
 '106552': ['P11310'],
 '106553': ['P16219'],
 '106554': ['P45954'],
 '106555': ['P49748'],
 '106556': ['P24752'],
 '106557': ['Q9BWD1'],
 '106558': ['Q16515'],
 '106559': ['P78348'],
 '106561': ['P22303'],
 '106563': ['P53396'],
 '106564': ['P21399'],
 '106565': ['P10323'],
 '106566': ['Q99798'],
 '106567': ['Q15067'],
 '106568': ['P24666'],
 '106569': ['P11117'],
 '106570': ['P13686'],
 '106571': ['P15309'],
 '106573': ['P68133'],
 '106574': 

In [57]:
## Merge dictionaries

In [58]:
dict_repeats.update(dict_singles)

In [59]:
len(dict_repeats)

16194

In [60]:
dict_biogrid = dict_repeats

In [61]:
del dict_repeats
del dict_singles

## Convert BioGrid ids to uniprot

In [62]:
df_human_exp.head()

Unnamed: 0,BiogridA,BiogridB,ExperimentalSystem,Score,PubmedId
0,112315,108607,physical,-,9006895
1,124185,106603,physical,-,11309420
2,106605,108625,physical,-,8599089
3,108894,111384,physical,-,10938104
4,112038,112651,physical,-,10875894


In [63]:
def getProtein(x):
    try:
        return dict_biogrid[x]
    except KeyError:
        return "NNN"

In [167]:
df_human_exp['ProtA']= df_human_exp['BiogridA'].apply(lambda x: getProtein(str(x)))

In [65]:
df_human_exp['ProtB']= df_human_exp['BiogridB'].apply(lambda x: getProtein(str(x)))

In [178]:
df_human_exp['P1'] = df_human_exp['ProtA'].apply(lambda x: str(x).strip('[]') if len(x)==1 else x)

In [180]:
df_human_exp['P2'] = df_human_exp['ProtB'].apply(lambda x: str(x).strip('[]') if len(x)==1 else x)

In [181]:
df_human_exp.head(30)

Unnamed: 0,BiogridA,BiogridB,ExperimentalSystem,Score,PubmedId,ProtA,ProtB,NumProteins,P1,P2
0,112315,108607,physical,-,9006895,[P45985],[Q14315],"(1, 1)",'P45985','Q14315'
1,124185,106603,physical,-,11309420,[Q86TC9],[P35609],"(1, 1)",'Q86TC9','P35609'
2,106605,108625,physical,-,8599089,[Q04771],[P49354],"(1, 1)",'Q04771','P49354'
3,108894,111384,physical,-,10938104,[P23769],[P29590],"(1, 1)",'P23769','P29590'
4,112038,112651,physical,-,10875894,[P15927],[P40763],"(1, 1)",'P15927','P40763'
5,106870,116775,physical,-,10747089,[P84077],[Q9NZ52],"(1, 1)",'P84077','Q9NZ52'
6,106872,117174,physical,-,9038142,[P61204],[P53365],"(1, 1)",'P61204','P53365'
7,106872,118084,physical,-,9038142,[P61204],[P53367],"(1, 1)",'P61204','P53367'
8,119970,106728,physical,-,15231747,[Q8IZH2],[P04075],"(1, 1)",'Q8IZH2','P04075'
9,106848,115769,physical,-,9843960,[P05067],[Q92624],"(1, 1)",'P05067','Q92624'


In [182]:
df_human_exp.to_csv("biogrid_human_ppi_28_06_2019.tab", encoding='utf8', sep='\t')

In [172]:
df_human_exp.dtypes

BiogridA               int64
BiogridB               int64
ExperimentalSystem    object
Score                 object
PubmedId               int64
ProtA                 object
ProtB                 object
NumProteins           object
P1                    object
dtype: object

In [72]:
df_human_exp["NumProteins"] = df_human_exp[['ProtA', 'ProtB']].\
    apply(lambda x: (len(x[0]), len(x[1])), axis=1)

In [75]:
df_human_exp.NumProteins.unique()

array([(1, 1), (1, 2), (1, 3), (2, 1), (3, 1), (2, 3), (2, 2), (3, 3),
       (4, 1), (1, 4), (3, 2), (3, 4), (4, 4), (4, 3), (2, 4)],
      dtype=object)

In [68]:
# df_human_exp.loc[df_human_exp['Score'] > '0.5']

In [70]:
len(df_human_exp["PubmedId"].unique())

26529

In [66]:
%who

Datadir	 HTTPError	 biogridfile	 datetime	 df	 df_human	 df_human_exp	 df_repeats	 df_repeats_split	 
dfmap	 dfsingle	 dict_biogrid	 getProtein	 human_mask	 i	 id	 idgroup	 ids	 
infile	 lsN	 lsa	 lsb	 lsfull	 mappedfile	 np	 os	 pd	 
pickle	 protein	 row	 sub_mask	 sys	 time	 time_stamper	 


In [67]:
df_human_exp.dtypes

BiogridA               int64
BiogridB               int64
ExperimentalSystem    object
Score                 object
PubmedId               int64
ProtA                 object
ProtB                 object
dtype: object

In [68]:
df_human_exp['Score']=df_human_exp.Score.str.replace("-", "100.0", regex=True)

In [76]:
# df_human_exp.loc[df_human_exp.Score == '100.0']

## Create a mapping dictionary object

In [None]:
idgroup = dfmap.groupby(['BiogridID'])['UniprotID']

In [None]:
dict_biogrid = {k:list(v) for k, v in idgroup}

In [None]:
dict_biogrid

In [None]:
# dfmap.loc[(dfmap['BiogridID'] > '107180') & (dfmap['BiogridID'] < '107185')]

In [None]:
lsboth = set(lsa).intersection(set(lsb))

In [None]:
lsBiogridProts = list(lsfull)[:]

In [None]:
len(lsboth)

In [None]:
lsBiogridProts.__len__()

## Break Long Lists into Chunks

In [None]:
def chunks(l, n=1000):
    for i in range(0, len(l), n):
        yield l[i:i+n]

In [None]:
lschunks = chunks(lsBiogridProts)
topchunks = (i[:10] for i in lschunks)

try:
    mapped = (idc.idmapping('BIOGRID_ID', 'ACC', lsids=j) for j in topchunks)
except KeyboardInterrupt as ke:
    print("Interrupted by the user")


In [None]:
# next(mapped)

In [None]:
!ls

## Process chunks

In [None]:
%%time
def process_chunks(idfro, idto, lstchunks):
    count = 0
    for chunk in lstchunks:
        print(len(chunk))
#         time.sleep(2)
        count +=1
#         yield len(chunk)
#         print("processing chunk_{}...".format(count))
# #         yield chunk[:10]
#         yield idc.idmapping(idfro,idto, lsids=chunk)
        

In [None]:
%%time
my_chunks =  process_chunks('BIOGRID_ID', 'ACC', lsChunks)

In [None]:
my_chunks

In [None]:
list(p)

In [None]:
!ls

In [None]:
%%time
idc.idmapping('BIOGRID_ID','ACC', lsids=lsBiogridProts)

In [None]:
!ls

In [None]:
!cut -f 1,2 result_mapped.txt | head 

In [None]:
" ".join(list(map(lambda x: str(x), IDLIST)))

In [None]:
idc.idconversion.IDLIST

In [None]:
la = (i for i in range(10))

In [None]:
next(la)

In [None]:
squares = (i**2 for i in la)

In [116]:
data = [['1','2', 0.5], ['2','3',0.6], ['1','3', 0.8], ['3','69', 0.9]]

In [117]:
dataB = [['1','2', 0.2], ['2','3',0.9], ['1','3',0.7], ['4','20',0.8]]

In [119]:
dfA = pd.DataFrame(data, columns=['prot1', 'prot2', 'score'])

In [120]:
dfA

Unnamed: 0,prot1,prot2,score
0,1,2,0.5
1,2,3,0.6
2,1,3,0.8
3,3,69,0.9


In [121]:
dfB = pd.DataFrame(dataB, columns=['prot1', 'prot2', 'score'])

In [122]:
dfB

Unnamed: 0,prot1,prot2,score
0,1,2,0.2
1,2,3,0.9
2,1,3,0.7
3,4,20,0.8


In [159]:
dfC = pd.merge(dfA, dfB,  how='left', left_on=list(set(['prot1','prot2'])), right_on = list(set(['prot2','prot1'])))

In [160]:
dfC

Unnamed: 0,prot1,prot2,score_x,score_y
0,1,2,0.5,0.2
1,2,3,0.6,0.9
2,1,3,0.8,0.7
3,3,69,0.9,


In [149]:
from functools import reduce

In [None]:
## Aggregate scores from different methods

In [154]:
def naive_score(*args):
    scores = []
    for arg in args:
        scores.append(1.0-arg)
    return 1-reduce(lambda x, y: (x*y), scores )

In [156]:
naive_score(0.6, 0.9)

0.96

In [147]:
# dfC["total_score"] = dfC[['score_x', 'score_y']].apply(lambda x: (1-(1-x[0])*(1-x[1])), axis=1)

In [161]:
dfC["total_score"] = dfC[['score_x', 'score_y']].apply(lambda x: naive_score(*x), axis=1)

In [162]:
dfC

Unnamed: 0,prot1,prot2,score_x,score_y,total_score
0,1,2,0.5,0.2,0.6
1,2,3,0.6,0.9,0.96
2,1,3,0.8,0.7,0.94
3,3,69,0.9,,
