In [1]:
import pandas as pd 
import networkx as nx
from matplotlib.pyplot import figure

In [2]:
data = pd.read_csv('C:/Users/lucy_/Documents/GitHub/Bioinf1/data/disease_interactome.tsv',sep='\t')

In [3]:
data.head()

Unnamed: 0,interactor A gene symbol,interactor B gene symbol,interactor A Uniprot AC,interactor B Uniprot AC
0,A2M,ENO2,P01023,P09104
1,ABCA1,IKBKG,O95477,Q9Y6K9
2,ABL1,BCL2L1,P00519,Q07817
3,ABL1,PRDX1,P00519,Q06830
4,ABL1,RB1,P00519,P06400


In [4]:
nx_data = data[['interactor A gene symbol', 'interactor B gene symbol']]

In [5]:
nx_data.head()

Unnamed: 0,interactor A gene symbol,interactor B gene symbol
0,A2M,ENO2
1,ABCA1,IKBKG
2,ABL1,BCL2L1
3,ABL1,PRDX1
4,ABL1,RB1


# 2.1.a

In [6]:
G = nx.Graph()

In [9]:
G = nx.from_pandas_edgelist(nx_data, 'interactor A gene symbol', 'interactor B gene symbol')

In [12]:
nx.write_graphml(G, "g.graphml")

In [13]:
nodes_cardinality = G.number_of_nodes()
edges_cardinality = G.number_of_edges()

In [14]:
connected_componets_cardinality = nx.number_connected_components(G)

In [15]:
isolated_nodes_cardinality = len(list(nx.isolates(G)))

In [16]:
average_path_len = nx.average_shortest_path_length(G)

In [65]:
#average_degree = nx.average_degree_connectivity(G)

In [17]:
average_degree = sum(dict(G.degree()).values())/float(len(G))

In [18]:
average_clustering_coefficient = nx.average_clustering(G)

In [19]:
network_diameter = nx.diameter(G)

In [20]:
network_radius = nx.radius(G)

In [21]:
centralization = (nodes_cardinality/(nodes_cardinality-2))*(sorted(G.degree, key=lambda x: x[1], reverse=True)[0][1])/(nodes_cardinality-1)- nx.density(G)

In [22]:
keys = ['nodes_cardinality','edges_cardinality','connected_componets_cardinality','isolated_nodes_cardinality','average_path_len' ,'average_degree',
       'average_clustering_coefficient','network_diameter' ,'network_radius,centralization', 'centralization']
values = [nodes_cardinality,edges_cardinality,connected_componets_cardinality,isolated_nodes_cardinality,average_path_len ,average_degree,
       average_clustering_coefficient,network_diameter ,network_radius,centralization, centralization]

In [23]:
final_values = pd.DataFrame.from_dict({keys[i]: values[i] for i in range(len(keys))}, orient='index')

In [24]:
final_values

Unnamed: 0,0
average_degree,4.183919
connected_componets_cardinality,1.0
centralization,0.309076
nodes_cardinality,6318.0
"network_radius,centralization",4.0
network_diameter,8.0
edges_cardinality,13217.0
average_path_len,3.163104
average_clustering_coefficient,0.191274
isolated_nodes_cardinality,0.0


In [29]:
final_values.to_csv('graph_final_values.tsv', sep ='\t')

# 2.1.b

## 2.1.b.i

Since the network doesn't have a LCC, the 2.1.b.i is equal to the 2.1.a

## 2.1.b.ii

In [30]:
nodes_degree = dict(G.degree())

In [31]:
nodes_betweenness_centrality = nx.betweenness_centrality(G)

In [32]:
eigen_vector_centrality = nx.eigenvector_centrality(G)

In [33]:
closeness_centrality = nx.closeness_centrality(G)

In [34]:
betweennes_degree_ratio = {key: nodes_betweenness_centrality[key]/nodes_degree.get(key, 0) for key in nodes_betweenness_centrality}

In [35]:
dicts = [nodes_degree,nodes_betweenness_centrality, eigen_vector_centrality, closeness_centrality,betweennes_degree_ratio]
local = pd.concat([pd.Series(d) for d in dicts],axis=1).fillna(0).T
local.index = ['nodes_degree','nodes_betweenness_centrality', 'eigen_vector_centrality', 'closeness_centrality','betweennes_degree_ratio']
local = local.transpose()

In [36]:
local.head()

Unnamed: 0,nodes_degree,nodes_betweenness_centrality,eigen_vector_centrality,closeness_centrality,betweennes_degree_ratio
A1BG,1.0,0.0,0.000635,0.285115,0.0
A2M,5.0,0.000741,0.006783,0.350535,0.000148
A2ML1,1.0,0.0,0.008514,0.330111,0.0
AAAS,3.0,0.000167,0.01618,0.386054,5.6e-05
AADAT,1.0,0.0,0.000232,0.249427,0.0


In [42]:
sorted_local = local.sort_values(by=['nodes_betweenness_centrality'],ascending=False)

In [46]:
sorted_local = sorted_local.head(20)

In [48]:
sorted_local.to_csv('data\sorted_local.csv')

In [52]:
sorted_local = sorted_local.round(5)

In [61]:
sorted_local['nodes_degree'] = sorted_local['nodes_degree'].astype(int)

In [64]:
sorted_local.to_csv('data\sorted_local.csv')

In [65]:
sorted_local

Unnamed: 0,nodes_degree,nodes_betweenness_centrality,eigen_vector_centrality,closeness_centrality,betweennes_degree_ratio
MYC,1956,0.35205,0.47437,0.49275,0.00018
KRAS,1593,0.27662,0.30863,0.44779,0.00017
TP53,1328,0.22744,0.28933,0.47593,0.00017
CTNNB1,708,0.08584,0.18391,0.45121,0.00012
CDH1,657,0.08236,0.13068,0.41339,0.00013
HRAS,633,0.07918,0.11843,0.42877,0.00013
IKBKG,393,0.06577,0.06384,0.41209,0.00017
MAPK1,330,0.04663,0.07341,0.44735,0.00014
STAT3,294,0.03999,0.05674,0.39772,0.00014
PML,276,0.03591,0.07554,0.44891,0.00013
