# Topological Features of Human PPI Network

Source: http://hprd.org/RELEASE9/

In [1]:
import os

import pandas as pd
import numpy as np
import networkx as nx

In [2]:
# Set up directories
parent_dir = os.path.dirname(os.getcwd())

dir_in = os.path.join(parent_dir, 'data', 'HPRD')
dir_out = os.path.join(parent_dir, 'data')

## Preprocessing of HPRD data

In [3]:
# Read human PPI binary interactions
f_in = os.path.join(dir_in, 'BINARY_PROTEIN_PROTEIN_INTERACTIONS.txt')

df = pd.read_csv(f_in, sep='\t', header=None)[[1, 4]] # get only the HPRD IDs

In [4]:
# Read ID mappings
f_in = os.path.join(dir_in, 'HPRD_ID_MAPPINGS.txt')

df_map = pd.read_csv(f_in, sep='\t', header=None)[[0, 6]] # get HPRD and SwissProt columns
map_dict = {hprd: uniprot for hprd, uniprot in df_map.values}

# Replace HPRD IDs with Uniprot IDs
## Note: entries with multiple Uniprot IDs are left as they are
df.replace(map_dict, inplace=True)
df.columns = ['UniprotID_1', 'UniprotID_2']

In [5]:
# Filter out interactions with unavailable Uniprot IDs 
n = len(df)
df = df[~(df=='-').any(axis=1)]
print('Removed %i interactions with no available Uniprot IDs' % (n - len(df)))

df.head()

Removed 121 interactions with no available Uniprot IDs


Unnamed: 0,UniprotID_1,UniprotID_2
0,P00352,P00352
1,"B4E3U0,Q13683,Q4LE35","P02708,Q53SH4"
2,"Q9ULJ8,A1L494,B7ZLX4",P63261
3,P10124,"P16070,O95370"
4,"Q14451,Q53YD3",P04626


## Calculation of Human Interactome graph properties

Graph topological properties:
- Degree centrality
- Betweenness centrality
- Clustering coefficient

In [6]:
# Create graph from DataFrame
G = nx.Graph(df.values.tolist())
print('Human PPI network summary:\n%i proteins and %i interactions'\
      % (len(G.nodes), len(G.edges)))

Human PPI network summary:
9588 proteins and 39046 interactions


In [7]:
# Compute topological properties
graph_props = {
    'Eigenvector_centrality': nx.eigenvector_centrality,
    'Degree_centrality': nx.degree_centrality,
    'Clustering_coefficient': nx.clustering,
    'Betweenness_centrality': nx.betweenness_centrality
}

df = pd.DataFrame([], index=G.nodes)

# Add computed properties to DataFrame
for prop, func in graph_props.items():
    df[prop] = pd.Series(func(G))
    print('%s %s calculations complete' % tuple(prop.split('_')))

df.head()

Eigenvector centrality calculations complete
Degree centrality calculations complete
Clustering coefficient calculations complete
Betweenness centrality calculations complete


Unnamed: 0,Eigenvector_centrality,Degree_centrality,Clustering_coefficient,Betweenness_centrality
P00352,2.207378e-32,0.000209,0.0,0.0
"B4E3U0,Q13683,Q4LE35",0.001048165,0.000626,0.166667,0.000298
"P02708,Q53SH4",0.0001204406,0.000417,0.0,0.000401
"Q9ULJ8,A1L494,B7ZLX4",0.00192215,0.000939,0.238095,3.8e-05
P63261,0.006808116,0.004381,0.025641,0.002309


In [8]:
# Save the results into a file
f_out = os.path.join(dir_out, 'human_ppi_topology.tsv')
df.to_csv(f_out, sep='\t')