In [16]:
import itertools
import pandas as pd
import numpy as np
import networkx as nx
import nxviz as nv

import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
%matplotlib inline

In [17]:
doc = pd.read_csv('../Data(net)/doc_sample.csv')
doc_net = pd.read_csv('../Data(net)/doc_network.csv')

## Initializing Graph G1

In [18]:
# Graph G1 with nodes as doctors and edges as Hospitals
G1 = nx.Graph()
G1.name = 'G1 (Node:Doctor, Edge:Hospital)'

In [19]:
# making a list of all hospitals for which each hospital is associated

grp_list = list(doc.groupby('DOC_ID'))
total_doc_ids = 6789
doc_hosp = []

for doc_id in range(total_doc_ids+1):
    listing = list(grp_list[doc_id][1]['HOSPITAL'])
    doc_hosp.append(listing)

In [20]:
#Creating Nodes

nodes_attr = []
for row in range(len(doc)):
    attr = list(zip(doc.columns[1:], doc.iloc[row,1:]))
    attr = dict(attr)
    nodes_attr.append(attr)

nodes = list(zip(doc['DOC_ID'], nodes_attr))
G1.add_nodes_from(nodes)

#modifying hospital attribute of nodes to add all the hospitals
for node in G1.nodes():
    G1.node[node]['HOSPITAL'] = doc_hosp[node]

In [21]:
print('Total Doctors(with repeatition): %d\nTotal Doctors(without repeatition): %d' %(len(doc), len(G1.nodes())))

Total Doctors(with repeatition): 7415
Total Doctors(without repeatition): 6790


In [22]:
G1.node[12]

{'EXPERIENCE': nan,
 'HOSPITAL': ['Mata Chanan Devi Hospital', 'Primus Super Speciality Hospital'],
 'NAME': 'Dr Rajat Goel',
 'QUALIFICATION': nan,
 'SPECIALTIES': 'GeneralSurgeon,LaparoscopicSurgeon,BariatricSurgeon'}

In [23]:
#Creating Edges

for i,group in doc_net.groupby('HOSP_ID')['DOC_ID']:
    for u,v in itertools.combinations(group, 2):
        set_u = set(G1.node[u]['HOSPITAL'])
        set_v = set(G1.node[v]['HOSPITAL'])
        common_hosp = list(set_u.intersection(set_v))
        G1.add_edge(u, v, attr_dict={'HOSPITAL':common_hosp})


In [24]:
#Summary of Graph G1
print(nx.info(G1))

Name: G1 (Node:Doctor, Edge:Hospital)
Type: Graph
Number of nodes: 6790
Number of edges: 440777
Average degree: 129.8312


In [25]:
# nx.write_edgelist(G1, 'G1.edges')

## Proving Small World Analysis for Indian Doctors Network
Any two people in the world taken at random could be connected through a chain of six links (on average). The small-world phenomenon formalises the anecdotal notion that ‘you are only ever “six degrees of separation” away from anyone else on the planet.

In [26]:
random_doctor_id = np.random.randint(len(G1.nodes()))
G1.node[random_doctor_id]

{'EXPERIENCE': 41.0,
 'HOSPITAL': ['Apollo Hospital'],
 'NAME': 'Dr. R. Charumathi',
 'QUALIFICATION': 'MBBS MS - General Surgery MCh - Plastic Surgery DNB - Plastic Surgery ',
 'SPECIALTIES': 'Gynecologist,Obstetrician'}

In [27]:
# closeness centrality
closeness_centrality = nx.closeness_centrality(G1)[random_doctor_id]
avg_distance = 1/closeness_centrality
avg_distance

3.3252022606095166

Average distance of above doctor is less than 6 !!

### Local Clustering or Community Detection 
*using Gephi 0.91*


<center>
    <img src="images/G1.png">
</center>


    - different local clusters doctors are represented with different colors
    - size of the node is directly proportional to the betweeness of that doctor in the whole network