In [1]:
import pandas as pd

import networkx as nx

In [2]:
protein_list = pd.read_csv('protein_list.csv')
protein_list

Unnamed: 0.1,Unnamed: 0,Protein1_ID
0,0,P03428
1,1,P03431
2,2,P03433
3,3,P03452
4,4,P03466
...,...,...
15680,15680,Q6NUS8
15681,15681,P12018
15682,15682,Q96IU2
15683,15683,Q6PEW1


In [3]:
# protein_class =  pd.read_csv('protein_class.csv')
# protein_class

In [4]:
## Read dataframe
preds = pd.read_csv('hvppi_preds/concat_all_predictions.csv').sort_values(by=['Score'], ascending=False)
preds

Unnamed: 0,Pro1ID,Pro2ID,Score,Interaction,Segment,IAV Strain,human_pname_raw
390327,Q99729,P03496,0.990,yes,NS1,PR8,ROAA_HUMAN
113249,P51991,P03496,0.988,yes,NS1,PR8,ROA3_HUMAN
453098,Q14103,P03496,0.988,yes,NS1,PR8,HNRPD_HUMAN
423947,Q12906,P03496,0.985,yes,NS1,PR8,ILF3_HUMAN
42114,P09651,P03496,0.983,yes,NS1,PR8,ROA1_HUMAN
...,...,...,...,...,...,...,...
44556,Q0VDI3,P03437,0.000,no,HA,Aichi,TM267_HUMAN
160340,P54849,P03437,0.000,no,HA,Aichi,EMP1_HUMAN
629585,Q96RI0,P03437,0.000,no,HA,Aichi,PAR4_HUMAN
556031,Q9Y5I7,P03437,0.000,no,HA,Aichi,CLD16_HUMAN


In [5]:
pos = preds[preds['Score'] >= 0.143].reset_index(drop=True)
neg = preds[preds['Score'] < 0.143].reset_index(drop=True)

print(len(pos))
print(len(neg))

91217
550187


In [6]:
t1 = pos[pos['Score'] >= 0.375].reset_index(drop=True)
t2 = pos[(pos['Score'] >= 0.212) & (pos['Score'] < 0.375)].reset_index(drop=True)
t3 = pos[(pos['Score'] >= 0.143) & (pos['Score'] < 0.212)].reset_index(drop=True)

print(len(t1))
print(len(t2))
print(len(t3))

3738
26044
61435


In [7]:
## Positive samples
preds_f = pos[['Pro1ID', 'Pro2ID', 'Score']]
preds_f = preds_f.rename(columns = {'Pro1ID': 'Protein1_ID', 'Pro2ID': 'Protein2_ID'}).sort_values(by=['Score'], ascending=False)
preds_f

Unnamed: 0,Protein1_ID,Protein2_ID,Score
0,Q99729,P03496,0.990
2,Q14103,P03496,0.988
1,P51991,P03496,0.988
3,Q12906,P03496,0.985
4,P09651,P03496,0.983
...,...,...,...
89799,O15162,I6TAH8,0.143
89798,Q7Z460,C3W5R9,0.143
89797,Q9UI30,P03431,0.143
89796,P42262,P03508,0.143


In [8]:
## Map to index
def map_to_index(ip_df):
    
    m_df = pd.merge(ip_df, protein_list, on=['Protein1_ID'])
    m_df = pd.merge(m_df, protein_list, left_on=['Protein2_ID'], right_on=['Protein1_ID'])

    m_df = m_df[['Unnamed: 0_x', 'Unnamed: 0_y']]
    m_df.columns = ['Protein1_ID', 'Protein2_ID']
    
    return m_df

In [9]:
## All positives
preds_idx = map_to_index(preds_f)
preds_idx

Unnamed: 0,Protein1_ID,Protein2_ID
0,9561,7
1,11092,7
2,2803,7
3,10381,7
4,1068,7
...,...,...
91212,11443,30
91213,1201,30
91214,4141,30
91215,3817,30


In [10]:
## Get number of nodes involved in interaction

print(len(preds_idx['Protein2_ID'].unique()))
print(len(preds_idx['Protein1_ID'].unique()))

num_nodes = len(preds_idx['Protein1_ID'].unique()) + len(preds_idx['Protein2_ID'].unique())
print(num_nodes)

41
12397
12438


In [11]:
## Create edgelist
edge_arr = preds_idx.to_numpy()
edge_arr

array([[ 9561,     7],
       [11092,     7],
       [ 2803,     7],
       ...,
       [ 4141,    30],
       [ 3817,    30],
       [  178,    30]], dtype=int64)

## Edge list

In [77]:
tt_df = pd.read_csv('./hvppi_preds/hvppi_edgelist_idx.csv').sort_values(by=['Score'], ascending=False)
tt_df

Unnamed: 0.1,Unnamed: 0,Protein1_ID,Protein2_ID,Score,Class,Label,Class_2,Class_3,Label_3,Label_2
0,0,9561,7,0.990,0.99,2,0.99,pos,1,5
1,1,11092,7,0.988,0.99,2,0.99,pos,1,5
2,2,2803,7,0.988,0.99,2,0.99,pos,1,5
3,3,10381,7,0.985,0.99,2,0.99,pos,1,5
4,4,1068,7,0.983,0.99,2,0.99,pos,1,5
...,...,...,...,...,...,...,...,...,...,...
271999,271999,14710,30,0.001,neg,0,<=0.047,neg,0,0
271998,271998,4733,11,0.001,neg,0,<=0.047,neg,0,0
271997,271997,1422,11,0.001,neg,0,<=0.047,neg,0,0
271996,271996,14299,11,0.001,neg,0,<=0.047,neg,0,0


In [79]:
e_pos = tt_df[tt_df['Score'] >= 0.143].reset_index(drop=True)
e_pos

Unnamed: 0.1,Unnamed: 0,Protein1_ID,Protein2_ID,Score,Class,Label,Class_2,Class_3,Label_3,Label_2
0,0,9561,7,0.990,0.99,2,0.99,pos,1,5
1,1,11092,7,0.988,0.99,2,0.99,pos,1,5
2,2,2803,7,0.988,0.99,2,0.99,pos,1,5
3,3,10381,7,0.985,0.99,2,0.99,pos,1,5
4,4,1068,7,0.983,0.99,2,0.99,pos,1,5
...,...,...,...,...,...,...,...,...,...,...
48877,48300,8841,9,0.143,0.90,3,0.90,pos,1,3
48878,48301,7721,9,0.143,0.90,3,0.90,pos,1,3
48879,48302,3274,9,0.143,0.90,3,0.90,pos,1,3
48880,48303,3431,9,0.143,0.90,3,0.90,pos,1,3


In [80]:
e_pos_idx = e_pos[['Protein1_ID', 'Protein2_ID']]
e_pos_idx

Unnamed: 0,Protein1_ID,Protein2_ID
0,9561,7
1,11092,7
2,2803,7
3,10381,7
4,1068,7
...,...,...
48877,8841,9
48878,7721,9
48879,3274,9
48880,3431,9


In [81]:
## Create edgelist
edge_arr = e_pos_idx.to_numpy()
edge_arr

array([[ 9561,     7],
       [11092,     7],
       [ 2803,     7],
       ...,
       [ 3274,     9],
       [ 3431,     9],
       [ 3191,     9]], dtype=int64)

## networkx

In [82]:
## Construct graph
G = nx.from_edgelist(edge_arr)

In [83]:
## Get Degree of each node

my_degrees = G.degree()
degree_values = dict(my_degrees).values() ## Convert to dictionary

list_my_degs = list(my_degrees)
print(list_my_degs)

[(9561, 13), (7, 10982), (11092, 12), (2803, 14), (10381, 16), (1068, 16), (6280, 15), (13956, 15), (8653, 15), (1665, 11), (1249, 16), (10363, 13), (6973, 10), (1841, 13), (10138, 16), (9, 6411), (1641, 12), (11386, 10), (6779, 13), (2002, 13), (3833, 13), (15351, 11), (6259, 14), (5171, 12), (478, 19), (13380, 10), (15256, 14), (1382, 13), (9575, 13), (1400, 13), (6162, 14), (10814, 15), (4583, 10), (7756, 11), (5375, 17), (1248, 14), (15262, 15), (5763, 11), (1844, 12), (8326, 6), (15263, 15), (791, 10), (4596, 14), (951, 14), (5885, 13), (3041, 12), (759, 12), (10928, 10), (4597, 14), (14994, 13), (10790, 12), (1379, 13), (8381, 12), (1754, 11), (2056, 12), (2055, 17), (1921, 12), (3925, 12), (294, 9), (1126, 15), (6763, 14), (3907, 14), (4879, 8), (15101, 8), (1540, 9), (7748, 15), (8310, 9), (3087, 13), (10755, 17), (4, 1487), (952, 15), (11924, 6), (12976, 16), (743, 17), (6868, 17), (953, 6), (8961, 15), (10742, 11), (2556, 9), (12680, 5), (9011, 9), (3410, 7), (1983, 10), (390

In [84]:
degree_values

dict_values([13, 10982, 12, 14, 16, 16, 15, 15, 15, 11, 16, 13, 10, 13, 16, 6411, 12, 10, 13, 13, 13, 11, 14, 12, 19, 10, 14, 13, 13, 13, 14, 15, 10, 11, 17, 14, 15, 11, 12, 6, 15, 10, 14, 14, 13, 12, 12, 10, 14, 13, 12, 13, 12, 11, 12, 17, 12, 12, 9, 15, 14, 14, 8, 8, 9, 15, 9, 13, 17, 1487, 15, 6, 16, 17, 17, 6, 15, 11, 9, 5, 9, 7, 10, 9, 11, 17, 14, 12, 8, 9, 8, 13, 6, 4, 11, 16, 17, 7, 15, 10, 9, 14, 12, 15, 15, 16, 14, 14, 14, 13, 8, 14, 15, 13, 8, 5, 11, 11, 16, 18, 13, 9721, 9, 6, 4978, 13, 11, 8, 15, 15, 13, 8, 11, 12, 11, 14, 14, 16, 12, 18, 10, 12, 6, 10, 8, 13, 9, 6, 15, 7, 7, 10, 10, 8, 6, 12, 11, 9, 12, 8, 6, 1741, 9, 18, 12, 5, 7, 14, 11, 16, 13, 8, 7205, 11, 9, 13, 6, 9, 10, 12, 5, 9, 17, 12, 18, 13, 14, 14, 10, 7, 13, 11, 7, 7, 17, 7, 14, 8, 15, 5, 16, 17, 8, 14, 13, 14, 8, 9, 4, 872, 13, 7, 8, 692, 11, 12, 10, 17, 14, 3, 5, 7, 12, 13, 16, 5, 9, 15, 15, 5, 18, 4, 15, 14, 11, 7, 11, 16, 7, 9, 10, 10, 9, 16, 14, 13, 14, 7, 6, 9, 3, 11, 13, 4, 15, 8, 6, 14, 7, 13, 9, 5, 6,

In [85]:
list_degree_values = list(degree_values)
list_degree_values

[13,
 10982,
 12,
 14,
 16,
 16,
 15,
 15,
 15,
 11,
 16,
 13,
 10,
 13,
 16,
 6411,
 12,
 10,
 13,
 13,
 13,
 11,
 14,
 12,
 19,
 10,
 14,
 13,
 13,
 13,
 14,
 15,
 10,
 11,
 17,
 14,
 15,
 11,
 12,
 6,
 15,
 10,
 14,
 14,
 13,
 12,
 12,
 10,
 14,
 13,
 12,
 13,
 12,
 11,
 12,
 17,
 12,
 12,
 9,
 15,
 14,
 14,
 8,
 8,
 9,
 15,
 9,
 13,
 17,
 1487,
 15,
 6,
 16,
 17,
 17,
 6,
 15,
 11,
 9,
 5,
 9,
 7,
 10,
 9,
 11,
 17,
 14,
 12,
 8,
 9,
 8,
 13,
 6,
 4,
 11,
 16,
 17,
 7,
 15,
 10,
 9,
 14,
 12,
 15,
 15,
 16,
 14,
 14,
 14,
 13,
 8,
 14,
 15,
 13,
 8,
 5,
 11,
 11,
 16,
 18,
 13,
 9721,
 9,
 6,
 4978,
 13,
 11,
 8,
 15,
 15,
 13,
 8,
 11,
 12,
 11,
 14,
 14,
 16,
 12,
 18,
 10,
 12,
 6,
 10,
 8,
 13,
 9,
 6,
 15,
 7,
 7,
 10,
 10,
 8,
 6,
 12,
 11,
 9,
 12,
 8,
 6,
 1741,
 9,
 18,
 12,
 5,
 7,
 14,
 11,
 16,
 13,
 8,
 7205,
 11,
 9,
 13,
 6,
 9,
 10,
 12,
 5,
 9,
 17,
 12,
 18,
 13,
 14,
 14,
 10,
 7,
 13,
 11,
 7,
 7,
 17,
 7,
 14,
 8,
 15,
 5,
 16,
 17,
 8,
 14,
 13,
 14,
 8,
 9,
 

### Filter degree range

In [86]:
from statistics import mean

'''
    IAV node 
'''
## Filter IAV (index, degree) tuple
f_iav_nodes = filter(lambda c: c[0] <= 40, list_my_degs)
iav_nodes_lst = list(f_iav_nodes)
print(iav_nodes_lst)

# print(iav_nodes_lst[0][1]) ## Get degree of each node

### Average degree
all_iav_nodes = []

for i in range(len(iav_nodes_lst)):
    all_iav_nodes.append(iav_nodes_lst[i][1])
    
print(mean(all_iav_nodes))

[(7, 10982), (9, 6411), (4, 1487), (1, 9721), (0, 4978), (8, 1741), (2, 7205), (31, 872), (6, 692), (11, 482), (5, 382), (3, 350), (38, 324), (35, 151), (10, 626), (28, 301), (19, 239), (25, 138), (17, 109), (27, 45), (22, 73), (40, 64), (30, 79), (34, 49), (33, 185), (18, 42), (12, 73), (36, 114), (24, 90), (14, 74), (20, 311), (32, 62), (16, 73), (21, 100), (39, 72), (26, 41), (37, 36), (23, 45), (13, 49), (29, 8), (15, 6)]
1192.2439024390244


In [87]:
'''
    Human node
'''

f_human_nodes = filter(lambda c: c[0] > 40, list_my_degs)
human_nodes_lst = list(f_human_nodes)
print(len(human_nodes_lst)) ## 12, 438 - 41 = 12, 397

### Average degree
all_human_nodes = []

for i in range(len(human_nodes_lst)):
    all_human_nodes.append(human_nodes_lst[i][1])
    
print(mean(all_human_nodes))

12396
3.9433688286544046


In [46]:
## Filter list
filtered = filter(lambda f_deg: f_deg >= 10, list_degree_values)
list_filtered = list(filtered)

# print(list_filtered)
print(len(list_filtered))

3292


In [47]:
## Loop get index 
idx_arr = []

for i in range(len(list_filtered)):
    node_idx = list_degree_values.index(list_filtered[i]) ## Get tuple
    
#     print(list(my_degrees)[node_idx][0]) ## (index, degree)
    print(list(my_degrees)[node_idx])
    idx_arr.append(list(my_degrees)[node_idx][0])
    
# print(idx_arr)

(9561, 40)
(7, 10985)
(11092, 39)
(11092, 39)
(9561, 40)
(9561, 40)
(6280, 41)
(9561, 40)
(6280, 41)
(1665, 30)
(6280, 41)
(1841, 35)
(6973, 31)
(9561, 40)
(6280, 41)
(1641, 38)
(11386, 26)
(6779, 36)
(11092, 39)
(11386, 26)
(15351, 32)
(5171, 28)
(6259, 37)
(13380, 25)
(6280, 41)
(1641, 38)
(11092, 39)
(11386, 26)
(9561, 40)
(11092, 39)
(1641, 38)
(4583, 23)
(13380, 25)
(6779, 36)
(6280, 41)
(6259, 37)
(4583, 23)
(6259, 37)
(8326, 15)
(6259, 37)
(6779, 36)
(11092, 39)
(11092, 39)
(11092, 39)
(10928, 24)
(759, 34)
(4583, 23)
(11092, 39)
(14994, 33)
(6973, 31)
(759, 34)
(6779, 36)
(1841, 35)
(9561, 40)
(5171, 28)
(1921, 22)
(1921, 22)
(6779, 36)
(6280, 41)
(6280, 41)
(4879, 20)
(6280, 41)
(15101, 21)
(11386, 26)
(11092, 39)
(8310, 17)
(14994, 33)
(6280, 41)
(9561, 40)
(8326, 15)
(11092, 39)
(6280, 41)
(6280, 41)
(953, 14)
(6280, 41)
(1921, 22)
(12680, 11)
(10928, 24)
(4879, 20)
(4879, 20)
(4583, 23)
(13380, 25)
(9561, 40)
(1641, 38)
(6779, 36)
(1921, 22)
(12180, 27)
(9561, 40)
(1921, 22

(759, 34)
(4583, 23)
(11386, 26)
(11060, 18)
(6470, 19)
(6470, 19)
(8326, 15)
(13380, 25)
(13380, 25)
(9561, 40)
(6838, 12)
(6838, 12)
(11060, 18)
(12680, 11)
(11092, 39)
(8326, 15)
(5855, 16)
(8326, 15)
(12680, 11)
(8310, 17)
(6838, 12)
(6470, 19)
(6838, 12)
(13380, 25)
(1841, 35)
(8310, 17)
(14994, 33)
(4879, 20)
(5855, 16)
(7090, 13)
(1665, 30)
(8310, 17)
(1641, 38)
(5855, 16)
(6838, 12)
(8326, 15)
(5855, 16)
(5171, 28)
(1841, 35)
(7090, 13)
(8326, 15)
(759, 34)
(12680, 11)
(6838, 12)
(12680, 11)
(8326, 15)
(6470, 19)
(7090, 13)
(5855, 16)
(11092, 39)
(12680, 11)
(15351, 32)
(15101, 21)
(603, 10)
(1665, 30)
(1641, 38)
(4879, 20)
(13380, 25)
(7090, 13)
(953, 14)
(11092, 39)
(6838, 12)
(8326, 15)
(5855, 16)
(6838, 12)
(12180, 27)
(5855, 16)
(6838, 12)
(8326, 15)
(9561, 40)
(9561, 40)
(9561, 40)
(6973, 31)
(5171, 28)
(6838, 12)
(11092, 39)
(6838, 12)
(6280, 41)
(6779, 36)
(6838, 12)
(11060, 18)
(1641, 38)
(7090, 13)
(11092, 39)
(1921, 22)
(12680, 11)
(953, 14)
(12180, 27)
(603, 10)
(95

(759, 34)
(603, 10)
(9561, 40)
(6973, 31)
(7090, 13)
(603, 10)
(603, 10)
(12680, 11)
(12680, 11)
(603, 10)
(1665, 30)
(11060, 18)
(6838, 12)
(603, 10)
(1665, 30)
(7090, 13)
(6470, 19)
(6838, 12)
(8326, 15)
(603, 10)
(11092, 39)
(6838, 12)
(953, 14)
(6470, 19)
(603, 10)
(12680, 11)
(6259, 37)
(5855, 16)
(12680, 11)
(603, 10)
(6779, 36)
(603, 10)
(12680, 11)
(603, 10)
(6838, 12)
(12680, 11)
(12180, 27)
(6838, 12)
(603, 10)
(12680, 11)
(6280, 41)
(603, 10)
(12680, 11)
(603, 10)
(1641, 38)
(8326, 15)
(6838, 12)
(8326, 15)
(9561, 40)
(1665, 30)
(603, 10)
(603, 10)
(6838, 12)
(6779, 36)
(6838, 12)
(4583, 23)
(603, 10)
(6779, 36)
(603, 10)
(12680, 11)
(603, 10)
(5855, 16)
(7090, 13)
(8310, 17)
(603, 10)
(759, 34)
(12680, 11)
(603, 10)
(15351, 32)
(12680, 11)
(6470, 19)
(6838, 12)
(10928, 24)
(603, 10)
(603, 10)
(603, 10)
(11060, 18)
(6259, 37)
(603, 10)
(12680, 11)
(4879, 20)
(5855, 16)
(7090, 13)
(6838, 12)
(10928, 24)
(6838, 12)
(12680, 11)
(12680, 11)
(11060, 18)
(603, 10)
(6779, 36)
(953,

(14994, 33)
(6838, 12)
(603, 10)
(12680, 11)
(6838, 12)
(953, 14)
(12680, 11)
(12680, 11)
(12680, 11)
(6838, 12)
(953, 14)
(12680, 11)
(603, 10)
(6838, 12)
(12680, 11)
(7090, 13)
(603, 10)
(15351, 32)
(11386, 26)
(12680, 11)
(5855, 16)
(6838, 12)
(12180, 27)
(1841, 35)
(15101, 21)
(603, 10)
(12680, 11)
(12680, 11)
(12680, 11)
(6838, 12)
(12680, 11)
(8326, 15)
(1665, 30)
(8310, 17)
(12680, 11)
(6838, 12)
(12680, 11)
(8310, 17)
(953, 14)
(603, 10)
(603, 10)
(8326, 15)
(6838, 12)
(6838, 12)
(603, 10)
(6838, 12)
(12680, 11)
(6838, 12)
(603, 10)
(6838, 12)
(953, 14)
(603, 10)
(12680, 11)
(12680, 11)
(1641, 38)
(603, 10)
(603, 10)
(953, 14)
(603, 10)
(603, 10)
(603, 10)
(6838, 12)
(603, 10)
(11060, 18)
(6838, 12)
(12680, 11)
(6838, 12)
(603, 10)
(15326, 29)
(12680, 11)
(6779, 36)
(603, 10)
(4879, 20)
(953, 14)
(1841, 35)
(6838, 12)
(12680, 11)
(953, 14)
(603, 10)
(12680, 11)
(12680, 11)
(12180, 27)
(603, 10)
(12680, 11)
(12680, 11)
(7090, 13)
(603, 10)
(953, 14)
(1921, 22)
(7090, 13)
(4583, 

(12180, 27)
(12680, 11)
(12680, 11)
(603, 10)
(6838, 12)
(6838, 12)
(7090, 13)
(12680, 11)
(603, 10)
(12680, 11)
(603, 10)
(603, 10)
(6838, 12)
(603, 10)
(12680, 11)
(12680, 11)
(5855, 16)
(12680, 11)
(603, 10)
(12680, 11)
(603, 10)
(12680, 11)
(603, 10)
(7090, 13)
(603, 10)
(6838, 12)
(6838, 12)
(6838, 12)
(603, 10)
(6838, 12)
(603, 10)
(603, 10)
(6838, 12)
(953, 14)
(1921, 22)
(7090, 13)
(15351, 32)
(7090, 13)
(11060, 18)
(12680, 11)
(12680, 11)
(603, 10)
(603, 10)
(603, 10)
(6838, 12)
(6838, 12)
(6838, 12)
(6838, 12)
(603, 10)
(12680, 11)
(12680, 11)
(8310, 17)
(8326, 15)
(603, 10)
(6838, 12)
(12680, 11)
(11060, 18)
(7090, 13)
(953, 14)
(6838, 12)
(12680, 11)
(12680, 11)
(6838, 12)
(6838, 12)
(603, 10)
(603, 10)
(12680, 11)
(12680, 11)
(603, 10)
(603, 10)
(603, 10)
(6838, 12)
(12680, 11)
(603, 10)
(12680, 11)
(12680, 11)
(12680, 11)
(953, 14)
(12680, 11)
(603, 10)
(7090, 13)
(603, 10)
(603, 10)
(12680, 11)
(5855, 16)
(12680, 11)
(12680, 11)
(8310, 17)
(603, 10)
(6838, 12)
(603, 10)


In [None]:
## Get IAV index
count_iav = filter(lambda f_iav: f_iav <= 40, idx_arr)
list_count_iav = list(count_iav)
print(list_count_iav)
print(len(list_count_iav))

## Get Human index
count_human = filter(lambda f_human: f_human > 40, idx_arr)
list_count_human = list(count_human)
# print(list_count_human)
print(len(list_count_human))

In [None]:
'''
    IAV (idx 0 to 40)
    Human (idx 41 to 15, 684)
'''

In [25]:
get_max = max(degree_values)
get_min = min(degree_values)

print(get_max, get_min)

## Get index
idx_max = list_degree_values.index(get_max)
idx_min = list_degree_values.index(get_min)
print(idx_max, idx_min)

print(list(my_degrees)[idx_max], list(my_degrees)[idx_min])

10985 1
1 3019
(7, 10985) (7998, 1)


In [18]:
get = list((my_degrees))[idx_max]

## Retrieve protein UniProt ID
protein_list[protein_list['Unnamed: 0'] == get[0]]

Unnamed: 0.1,Unnamed: 0,Protein1_ID
7,7,P03496


In [19]:
## Average degree

print(sum(degree_values))
print(len(degree_values))

avg_degree = sum(degree_values)/len(degree_values)
avg_degree

182434
12438


14.667470654446053

In [20]:
# edgelist = [(0,1)] # single edge (0,1)
# G = nx.from_edgelist(edgelist)

# G.degree[0]  # node 0 has degree 1

# list(G.degree([0, 1, 2]))

## Edgelist

In [None]:
## All edges
tt_df = pd.read_csv('./hvppi_preds/hvppi_edgelist_idx.csv').sort_values(by=['Score'], ascending=False)
tt_df

In [None]:
e_pos = tt_df[tt_df['Score'] >= 0.143]
e_pos

In [None]:
print(len(e_pos['Protein2_ID'].unique()))
print(len(e_pos['Protein1_ID'].unique()))

num_nodes = len(e_pos['Protein1_ID'].unique()) + len(e_pos['Protein2_ID'].unique())
print(num_nodes)

In [None]:
t1 = e_pos[e_pos['Score'] >= 0.375].reset_index(drop=True)
t2 = e_pos[(e_pos['Score'] >= 0.212) & (e_pos['Score'] < 0.375)].reset_index(drop=True)
t3 = e_pos[(e_pos['Score'] >= 0.143) & (e_pos['Score'] < 0.212)].reset_index(drop=True)

print(len(t1))
print(len(t2))
print(len(t3))

In [None]:
e_neg = tt_df[tt_df['Score'] < 0.143].reset_index(drop=True)
e_neg

In [None]:
edgelist = e_pos[['Protein1_ID', 'Protein2_ID']].to_numpy()
edgelist

In [None]:
## networkx
print(len(edgelist))
G = nx.from_edgelist(edgelist)

my_degrees = G.degree()
degree_values = dict(my_degrees).values()

avg_degree = sum(degree_values)/len(degree_values)
print(avg_degree)

In [None]:
degree_values

In [None]:
list_degree_values = list(degree_values)
# list_degree_values

In [None]:
get_max = max(degree_values)
get_min = min(degree_values)

print(get_max, get_min)

## Get index
idx_max = list_degree_values.index(get_max)
idx_min = list_degree_values.index(get_min)
print(idx_max, idx_min)

print(list((my_degrees))[idx_max], list((my_degrees))[idx_min])

In [None]:
degree_values

In [None]:
print(len(degree_values))

In [None]:
avg_degree = sum(degree_values)/len(degree_values)
avg_degree

In [None]:
# test_df = pd.read_csv('edges/Sept_2022_new/balanced/idx/b_test_idx.csv') ## Network Reconstruction
test_df = pd.read_csv('edges/Experimentally_Verified_Test/testset_1.csv')
test_df

In [None]:
p1 = test_df[test_df['label'] == 0]
p1

In [None]:
nodes = list(p1['Protein1_ID'].unique()) + list(p1['Protein2_ID'].unique())
nodes

In [None]:
nodes_f = set(nodes)
len(nodes_f)

In [None]:
neg_nodes = pd.concat

In [None]:
print(len(p1['Protein1_ID'].unique()), len(p1['Protein2_ID'].unique()))

In [None]:
len(p1['Protein1_ID'].unique()) + len(p1['Protein2_ID'].unique())

In [None]:
pos_org = pd.read_csv('edges/Sept_2022_new/balanced/idx/b_pos_idx.csv')
pos_org

In [None]:
print(len(pos_org['Protein1_ID'].unique()), len(pos_org['Protein2_ID'].unique()))

In [None]:
len(pos_org['Protein1_ID'].unique()) + len(pos_org['Protein2_ID'].unique())

In [None]:
test_df = pd.read_csv('edges/Sept_2022_new/balanced/idx/b_test_idx.csv') ## Network Reconstruction
# test_df = pd.read_csv('edges/Experimentally_Verified_Test/testset_1.csv')
test_df

In [None]:
test_df_pos = test_df[test_df['label'] == 1]
test_df_pos

In [None]:
edgelist = test_df_pos[['Protein1_ID', 'Protein2_ID']].to_numpy()
edgelist

In [None]:
## networkx
G = nx.from_edgelist(edgelist)

my_degrees = G.degree()
degree_values = dict(my_degrees).values()
# print(degree_values)
print(len(degree_values))

avg_degree = sum(degree_values)/len(degree_values)
avg_degree

In [None]:
pos_t = pd.read_csv('./edges/unbalanced_dataset/Dec_2022/NEW_pos_test.csv')
pos_t

In [None]:
pos_raw = pd.read_csv('./edges/unbalanced_dataset/Dec_2022/NEW_pos_train_val.csv')
pos_raw

In [None]:
edgelist = pos_t[['Protein1_ID', 'Protein2_ID']].to_numpy()
edgelist

In [None]:
## networkx
G = nx.from_edgelist(edgelist)

my_degrees = G.degree()
degree_values = dict(my_degrees).values()
# print(degree_values)
print(len(degree_values))

avg_degree = sum(degree_values)/len(degree_values)
avg_degree