In [1]:
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import plotly.graph_objects as go
import cufflinks as cf
import seaborn as sns
import plotly.express as px
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import networkx as nx
import matplotlib.pyplot as plt
from copy import deepcopy
init_notebook_mode(connected=True)
cf.go_offline()
%matplotlib inline

In [2]:
#Utilities to be used.
count_el_df = lambda df, col, el: len(df[df[col] == el])
style_df = lambda df: df.head(5).style.set_table_styles(
    [{
        'selector':
        'th',
        'props': [('background', '#FFFEE3'), ('color', 'black'),
                  ('font-family', 'verdana')]
    }, {
        'selector': 'td',
        'props': [('font-family', 'verdana')]
    }, {
        'selector': 'tr:nth-of-type(odd)',
        'props': [('background', '#ADD8E6')]
    }, {
        'selector': 'tr:nth-of-type(even)',
        'props': [('background', 'white')]
    }, {
        'selector': 'tr:hover',
        'props': [('background-color', '#FFFEE3')]
    }])
pd.set_option('max_rows', None)


#A class that helps with processing the csv files.
class process_csv:
    def __init__(self, path):
        self.path = path
        self.features = []

    def read_csv_to_df(self):
        #A function that takes in csv files and returns a dataframe after some processing.
        self.features = [*pd.read_csv(self.path, nrows=1)]
        self.features.pop(
            0)  #Removes the first coulmn since it is not a feature.
        df = pd.read_csv(self.path, usecols=[col for col in self.features])
        return df

In [3]:
outbreak_data = process_csv(r"Outbreak_India.csv")
outbreak_df = outbreak_data.read_csv_to_df()
outbreak_df.head()

Unnamed: 0,Name of \n State/UT,Name of \n District,Disease/ Illness,No. of Cases,No. of Deaths,Date of \n start of \n outbreak,Date Reported,Current Status,Comments
0,Karnataka,? Shimoga,Typhoid Fever,38,0,10-05-14,Reported late,Under \n Control,Cases of fever reported from Surya Nursing Col...
1,Jammu & \n Kashmir,09 Districts,Dengue,66,0,02-09-15,Reported late,Under Surveillance,Cases of fever with headache and body ache wer...
2,Jharkhand,10 Districts,Japanease Encephalitis,180,2,18-04-15,Reported late,Under surveillance,"Cases of fever reported from Ranchi,East, Sing..."
3,Jharkhand,20 Districts,Dengue,169,0,21-07-15,Reported late,Under surveillance,Cases of fever with joint pain reported from R...
4,West Bengal,24 South Paragnas,Food Poisoning,65,0,30-04-18,01-05-18,Under Surveillanc e,Cases of vomiting with loose stools reported a...


In [4]:
case_df = outbreak_df.filter([
    'Name of \n State/UT', 'Disease/ Illness', 'No. of Cases', 'No. of Deaths'
])
case_df.rename(
    {
        'Name of \n State/UT': 'State',
        'Disease/ Illness': 'Disease',
        'No. of Cases': 'Cases',
        'No. of Deaths': 'Deaths'
    },
    axis=1,
    inplace=True)

case_df['Cases'] = pd.to_numeric(case_df['Cases'],
                                 errors='coerce').astype('Int64')
case_df['Deaths'] = pd.to_numeric(case_df['Deaths'],
                                  errors='coerce').astype('Int64')
case_df = case_df.dropna()

case_df['State'] = case_df['State'].replace('\n', '', regex=True).replace(
    '   ', '', regex=True).replace('M anipur', 'Manipur', regex=True)
case_df['Disease'] = case_df['Disease'].replace('\n', '', regex=True).replace(
    ' +', '', regex=True).replace('FoodPoisioning',
                                  'FoodPoisoning',
                                  regex=True)

case_df = case_df.groupby(['State',
                           'Disease'])[['Cases',
                                        'Deaths']].sum().reset_index()

case_df['Fatality_Ratio'] = case_df['Deaths'] / case_df['Cases'] * 100
style_df(case_df)

Unnamed: 0,State,Disease,Cases,Deaths,Fatality_Ratio
0,Andaman & Nicobar Islands,AcuteDiarrhoealDisease,210,0,0.0
1,Andaman & Nicobar Islands,"Uneasiness,VomitingandDiarrhea",25,0,0.0
2,Andhra Pradesh,AcuteDiarrhoealDisease,11476,48,0.418264
3,Andhra Pradesh,AlcoholPoisoning,99,18,18.181818
4,Andhra Pradesh,AmoebicDysentry,17,0,0.0


In [5]:
#Selecting the states that have more than 0.6 fatality rate
#states = list(set(case_df['State'].tolist()))
fatal_df = case_df[case_df.Fatality_Ratio > 0.6]
fatal_df = fatal_df.filter(['State', 'Disease', 'Fatality_Ratio'])
#fatal_df['Disease'] = fatal_df['Disease'].replace('[a-z][A-Z]','_',regex= True)
style_df(fatal_df)

Unnamed: 0,State,Disease,Fatality_Ratio
3,Andhra Pradesh,AlcoholPoisoning,18.181818
8,Andhra Pradesh,Cholera,1.5
11,Andhra Pradesh,Diarrhoea,4.081633
12,Andhra Pradesh,Diphtheria,33.333333
14,Andhra Pradesh,Fever,0.997862


In [6]:
#A list of states and diseases.
states_tot = list(set(fatal_df['State'].tolist()))
# states = []
# for state in states_tot:
#     if count_el_df(fatal_df,'State',state):
#         states.append(state)
diseases = list(set(fatal_df['Disease'].tolist()))

In [7]:
#A dictionary for storing the states above 0.6 fatal ratio for various diseses.
disease_dict = {}
for disease in diseases:
    found_dis = (fatal_df['Disease'] == disease)
    temp_df = fatal_df.loc[found_dis, ['State']]
    states_list = (temp_df['State'].tolist())
    if len(states_list
           ) != 1:  #Ignoring diseases which occurs only in a single state.
        disease_dict[disease] = states_list

In [8]:
states = list(set(sum(disease_dict.values(), [])))

In [9]:
#Defining a graph.
G = nx.Graph()

#Adding nodes
G.add_nodes_from(states)

#Adding edges
for disease, states in disease_dict.items():
    edge_nodes = deepcopy(states)
    num = len(edge_nodes)
    for _ in range(num):
        n1 = edge_nodes.pop(0)
        edge = ((n1, node) for node in edge_nodes)
        G.add_edges_from(edge)

In [10]:
pos1 = nx.fruchterman_reingold_layout(G)    #Adding coordinates to nodes.


In [11]:
def get_nodes(Graph, pos):
    node_x = []
    node_y = []
    for state in Graph.nodes():
        node_x.append(pos[state][0])
        node_y.append(pos[state][1])
    return node_x, node_y


def get_edge(Graph, pos):
    edge_x = []
    edge_y = []
    for edge in Graph.edges():
        edge_x += [pos[edge[0]][0], pos[edge[1]][0], None]
        edge_y += [pos[edge[0]][1], pos[edge[1]][1], None]
    return edge_x, edge_y

In [12]:
node_x, node_y = get_nodes(G,pos1)
node_trace = go.Scatter(
    x=node_x,
    y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale= 'Viridis',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(thickness=15,
                      title='Node Connections',
                      xanchor='left',
                      titleside='right'),
        line_width=2))

In [13]:
edge_x,edge_y = get_edge(G,pos1)
edge_trace = go.Scatter(x=edge_x,
                        y=edge_y,
                        line=dict(width=0.5, color='#888'),
                        hoverinfo='none',
                        mode='lines')

In [14]:
node_adjacencies = []
node_text = []
states = list(set(sum(disease_dict.values(), [])))
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    node_name = states[node]
    node_text.append(str(node_name) +' '+ 'has' + ' ' + str(len(adjacencies[1])) +' ' +'connections.')

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

In [17]:
fig = go.Figure(
    data=[edge_trace, node_trace],
    layout=go.Layout(
        showlegend=False,
        paper_bgcolor = 'rgb(250,250,200)',
        hovermode='closest',
        height =800,
        width = 800,
        margin=dict(b=20, l=10, r=5, t=40),
        annotations=[
            dict(
                text=
                "Nodes represents states",
                showarrow=False,
                xref="paper",
                yref="paper",
                x=0.005,
                y=-0.002)
        ],
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
fig.update_layout(title='Network of Disease Outbreak in India',
        titlefont_size=16, title_x=0.45)
fig.show()