In [17]:
import itertools
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
%matplotlib inline

In [18]:
doc = pd.read_csv('../Data(net)/doc_sample.csv')
doc_net = pd.read_csv('../Data(net)/doc_network.csv')

## Initializing Graph G1

In [19]:
# Graph G1 with nodes as doctors and edges as Hospitals
G1 = nx.Graph()
G1.name = 'G1 (Node:Doctor, Edge:Hospital)'

In [20]:
# making a list of all hospitals for which each hospital is associated

grp_list = list(doc.groupby('DOC_ID'))
total_doc_ids = 6789
doc_hosp = []

for doc_id in range(total_doc_ids+1):
    listing = list(grp_list[doc_id][1]['HOSPITAL'])
    doc_hosp.append(listing)

In [21]:
#Creating Nodes

nodes_attr = []
for row in range(len(doc)):
    attr = list(zip(doc.columns[1:], doc.iloc[row,1:]))
    attr = dict(attr)
    nodes_attr.append(attr)

nodes = list(zip(doc['DOC_ID'], nodes_attr))
G1.add_nodes_from(nodes)

#modifying hospital attribute of nodes to add all the hospitals
for node in G1.nodes():
    G1.node[node]['HOSPITAL'] = doc_hosp[node]

In [22]:
print('Total Doctors(with repeatition): %d\nTotal Doctors(without repeatition): %d' %(len(doc), len(G1.nodes())))

Total Doctors(with repeatition): 7415
Total Doctors(without repeatition): 6790


In [23]:
G1.node[12]

{'EXPERIENCE': nan,
 'HOSPITAL': ['Mata Chanan Devi Hospital', 'Primus Super Speciality Hospital'],
 'NAME': 'Dr Rajat Goel',
 'QUALIFICATION': nan,
 'SPECIALTIES': 'GeneralSurgeon,LaparoscopicSurgeon,BariatricSurgeon'}

In [24]:
#Creating Edges

for i,group in doc_net.groupby('HOSP_ID')['DOC_ID']:
    for u,v in itertools.combinations(group, 2):
        set_u = set(G1.node[u]['HOSPITAL'])
        set_v = set(G1.node[v]['HOSPITAL'])
        common_hosp = list(set_u.intersection(set_v))
        G1.add_edge(u, v, attr_dict={'HOSPITAL':common_hosp})


In [25]:
#Summary of Graph G1
print(nx.info(G1))

Name: G1 (Node:Doctor, Edge:Hospital)
Type: Graph
Number of nodes: 6790
Number of edges: 440777
Average degree: 129.8312


In [26]:
# nx.write_edgelist(G1, 'G1.edges')

## Proving Small World Analysis for Indian Doctors Network
Any two people in the world taken at random could be connected through a chain of six links (on average). The small-world phenomenon formalises the anecdotal notion that ‘you are only ever “six degrees of separation” away from anyone else on the planet.

In [27]:
random_doctor_id = np.random.randint(len(G1.nodes()))
G1.node[random_doctor_id]

{'EXPERIENCE': nan,
 'HOSPITAL': ['Premier Hospital'],
 'NAME': 'Dr. Raghunath',
 'QUALIFICATION': nan,
 'SPECIALTIES': 'Pediatrician'}

In [28]:
# closeness centrality
closeness_centrality = nx.closeness_centrality(G1)[random_doctor_id]
#avg_distance = 1/closeness_centrality
#avg_distance

Average distance of above doctor is less than 6 !!

### Local Clustering or Community Detection 
*using Gephi 0.91*


<center>
    <img src="images/G1.png">
</center>


    - different local clusters doctors are represented with different colors
    - size of the node is directly proportional to the betweeness of that doctor in the whole network

In [35]:
k = len(G1.edges())/len(G1.nodes())
k

64.91561119293078

In [30]:
larg_conn_comp = max(nx.connected_component_subgraphs(G1), key=len)
L_actual = nx.average_shortest_path_length(larg_conn_comp)
L_actual

3.4887507883332343

In [31]:
n = len(G1.nodes())
edge_creation_prob = np.log(n)/np.log(k)
#random_larg_conn_comp = nx.fast_gnp_random_graph(n, edge_creation_prob)
#L_random = nx.average_shortest_path_length(random_larg_conn_comp)
L_random = edge_creation_prob
L_random

2.1143110163959897

In [32]:
C_actual = nx.average_clustering(larg_conn_comp, weight='weight')
C_actual

0.9711208282161774

In [33]:
edge_creation_prob = k/n
#random_larg_conn_comp = nx.fast_gnp_random_graph(n, edge_creation_prob)
#C_random = nx.average_clustering(random_larg_conn_comp, weight='weight')
C_random = edge_creation_prob
C_random

0.009560472929739439

In [34]:
sw = (C_actual/L_actual)*(L_random/C_random)
sw

61.559178795807618

Calculating degree cetrality

In [38]:
# top 5 doctors sorted according to their degrees
sorted(G1.degree().items(), key=lambda x:x[1], reverse=True)[:5]

[(3636, 839), (1857, 746), (232, 708), (2946, 691), (772, 689)]

In [39]:
degree_centrality = nx.degree_centrality(G1)
print(sorted(degree_centrality.items(), key=lambda x:x[1], reverse=True)[:5])
G1.node[17]

[(3636, 0.12358226542937105), (1857, 0.10988363529238475), (232, 0.1042863455589925), (2946, 0.10178229488879069), (772, 0.10148770069229636)]


{'EXPERIENCE': 23.0,
 'HOSPITAL': ['Max Super Speciality Hospital'],
 'NAME': 'Dr Sunil Kumar Chakravarty',
 'QUALIFICATION': 'MBBS MD - Dermatology , Venereology & Leprosy DNB - Dermatology & Venereology ',
 'SPECIALTIES': 'Gastroenterologist'}

2. Closeness Centrality

In [41]:
closeness_centrality = nx.closeness_centrality(G1)
sorted(closeness_centrality.items(), key=lambda x:x[1], reverse=True)[:5]    #top5 

[(3636, 0.4004551440976643),
 (2993, 0.38039245512341135),
 (772, 0.37961244526428367),
 (2946, 0.37785772331174733),
 (122, 0.3763890423325413)]

Betweenness centrality

In [44]:
betweeness_centrality = nx.betweenness_centrality(G1)
sorted(betweeness_centrality.items(), key=lambda x:x[1], reverse=True)[:5]    #top5 

[(4295, 0.06307412969632163),
 (747, 0.061583123651714554),
 (3913, 0.04960398710107837),
 (3384, 0.046661307328883045),
 (1862, 0.04517116404930446)]