# 6. Analysis of the Predicted HP-PPI Network

Network analysis on the human-*Streptococcus pneumoniae* strain D39 protein-protein interaction network with NetworkX module

In [1]:
import os
import joblib

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import networkx as nx
from networkx.algorithms import bipartite
from pyvis.network import Network

In [2]:
# Set up directories
parent_dir = os.path.dirname(os.getcwd())
dir_in = dir_out = os.path.join(parent_dir, 'analyses')

In [3]:
# Load predicted network as DataFrame
f_in = os.path.join(dir_in, 'predicted_interactions.tsv')
df = pd.read_csv(f_in, sep='\t')

# Get sets of unique proteins from each organism
pathogen_proteins = list(set(df.Pathogen_Protein))
human_proteins = list(set(df.Human_Protein))

print('Loaded %i interactions involving %i STRP2 proteins and %i human proteins\n'\
      % (len(df), len(pathogen_proteins), len(human_proteins)))
df.head()

Loaded 5823 interactions involving 30 STRP2 proteins and 324 human proteins



Unnamed: 0,Pathogen_Protein,Human_Protein
0,ENO_STRP2,FCAR_HUMAN
1,ENO_STRP2,FCG2B_HUMAN
2,ENO_STRP2,FCERG_HUMAN
3,ENO_STRP2,FCGRB_HUMAN
4,ENO_STRP2,CLUS_HUMAN


In [4]:
# Save list of proteins
for organism in ['Pathogen', 'Human']:
    f_out = os.path.join(dir_out, 'protein_lists', '%s_protein_list' % organism)
    protein_list = set(df['%s_Protein' % organism])
    with open(f_out, 'w') as f:
        f.write('\n'.join(protein_list))

477

3725

## Create graph of protein interactions

In [5]:
# Rename proteins by gene names
for organism in ['strp2', 'human']:
    f_in = os.path.join(dir_in, 'protein_lists', '%s_gene_mapping.tab' % organism)
    
    df_map = pd.read_csv(f_in, sep='\t')
    map_dict = {uniprot: gene for uniprot, gene in df_map.values}
    df.replace(map_dict, inplace=True)

df.head(5)

Unnamed: 0,Pathogen_Protein,Human_Protein
0,eno,FCAR
1,eno,FCGR2B
2,eno,FCER1G
3,eno,FCGR1B
4,eno,CLU


In [6]:
# Initialize a bipartite graph 
pathogen_proteins = list(set(df.Pathogen_Protein))
human_proteins = list(set(df.Human_Protein))
B = nx.Graph()

# Add proteins as nodes with a bipartite attribute
B.add_nodes_from(pathogen_proteins, bipartite=0)
B.add_nodes_from(human_proteins, bipartite=1)

# Add interactions as edges between nodes
B.add_edges_from(df.values)

In [7]:
# Analyze topological properties
properties = {
    'Degree_centrality': bipartite.degree_centrality,
    'Betweenness_centrality': bipartite.betweenness_centrality
}

# Create an empty DataFrame to store properties of pathogen proteins
df = pd.DataFrame([], index=pathogen_proteins)

for prop, func in properties.items():
    props = func(B, nodes=pathogen_proteins)
    df_prop = pd.DataFrame.from_dict(props, orient='index', columns=[prop])
    
    # Obtain only pathogen proteins
    df_prop = df_prop[df_prop.index.isin(pathogen_proteins)]
    
    # Concatenate to the empty DataFrame
    df = pd.concat([df, df_prop], axis=1, sort=False)

In [8]:
# Examine top-scoring properties
df.index.name = 'Pathogen_protein'
df.sort_values(by='Degree_centrality', ascending=False, inplace=True)
df[:10]

Unnamed: 0_level_0,Degree_centrality,Betweenness_centrality
Pathogen_protein,Unnamed: 1_level_1,Unnamed: 2_level_1
bgaA,1.0,0.451853
SPD_0250,0.775316,0.081955
prtA,0.699367,0.02925
pcpA,0.693038,0.026479
pck,0.68038,0.02239
SPD_0537,0.670886,0.019983
eno,0.648734,0.015419
gatA,0.639241,0.013786
nanB,0.636076,0.013316
SPD_1321,0.636076,0.013316


In [9]:
# Save centralities
f_out = os.path.join(dir_out, 'network_centrality')
df.to_csv(f_out, sep='\t')

In [10]:
# Get enriched human proteins
f_in = os.path.join(dir_in, 'KEGG_analysis', 'KEGG_pathway.txt')

df_kegg = pd.read_csv(f_in, sep='\t')

i = 0
term = df_kegg.loc[i].Term.split(':')[1]
term
human_kegg = df_kegg.loc[i].Genes.split(', ')

'Complement and coagulation cascades'

In [11]:
# Create subgraph
m = len(human_kegg) # number of human proteins
n = 10 # number of pathogen proteins
pathogen_top10 = df.index[:n].tolist()

subgraph = B.subgraph(pathogen_top10 + human_kegg)

# Visualize the network
net = Network()
net.inherit_edge_colors_from(False)
net.add_nodes(pathogen_top10, value=[10]*n, color=['blue']*n)
net.add_nodes(human_kegg, value=[10]*m, color=['orange']*m)
net.add_edges(subgraph.edges())

net.show_buttons()
net.show('subgraph.html')

<hr></hr>