## Libraries

In [None]:
# numerical analysis
import numpy as np
# processing and storing in dataframe
import pandas as pd

# basic plotting
import matplotlib.pyplot as plt
# advanced plotting
import seaborn as sns
# interactive plotting
import plotly.express as px

# dealing with geographic data
import geopandas as gpd
# to get geolocation 
from geopandas.tools import geocode

## Utility Functions

In [None]:
def plot_cfr(epidemic, mid, end):
    '''Plot case fatality rate'''
    inc_day = []
    for i in range(1, 101):
        if(i<=mid):
            inc_day.append(2)
        elif(i<end):
            inc_day.append(1)
        else:
            inc_day.append(0)
    inc_day = np.array(inc_day)
    inc_day = inc_day.reshape(20, 5).T
    
    fig, ax = plt.subplots(figsize=(10, 5))
    ax=sns.heatmap(inc_day, linewidths=2, square=True, xticklabels='', 
                   yticklabels='', ax=ax, cbar=False, 
                   cmap=['darkgray', 'coral','orangered'])
    ax.set_title(epidemic, fontdict={'fontsize':16}, loc='left', pad=15)
    
    plt.show()

In [None]:
def plot_cal(title, start, end):
    '''Plot incubation period'''
    inc_day = []
    for i in range(1, 29):
        if(i>=start and i<=end):
            inc_day.append(1)
        else:
            inc_day.append(0)
            
    inc_day = np.array(inc_day)
    inc_day = inc_day.reshape(4, 7)
    
    fig, ax = plt.subplots(figsize=(6, 3))
    ax=sns.heatmap(inc_day, linewidths=2, square=True, 
                   xticklabels='', yticklabels='', ax=ax,
                   cbar=False, cmap=['whitesmoke','royalblue'])
    ax.set_title(title, fontdict={'fontsize':16}, loc='left', pad=15)
    
    plt.show()

## Dataset

### Import Data

In [None]:
# importing datasets
df = pd.read_csv('../input/ebola-outbreak-20142016-complete-dataset/ebola_2014_2016_clean.csv', 
                 parse_dates=['Date'])

# first few rows
df.head()

### Data Cleaning

In [None]:
# rename columns
df.columns = ['Country', 'Date', 'Cumulative no. cases', 'Cumulative no. deaths']

# fix data
for col in ['Cumulative no. cases', 'Cumulative no. deaths']:
    # fill na with 0
    df[col] = df[col].fillna(0)
    # convert data type to 'int'
    df[col] = df[col].astype('int')
    
# group by date and country
df = df.groupby(['Date', 'Country'])['Cumulative no. cases', 'Cumulative no. deaths'].sum()

# reset index
df = df.reset_index()

# CFR
df['CFR'] = round((df['Cumulative no. deaths'] / df['Cumulative no. cases']) * 100, 2)

# first few rows
df.head()

In [None]:
# new cases ======================================================
temp = df.groupby(['Country', 'Date', ])['Cumulative no. cases', 'Cumulative no. deaths']
temp = temp.sum().diff().reset_index()

mask = temp['Country'] != temp['Country'].shift(1)

temp.loc[mask, 'Cumulative no. cases'] = np.nan
temp.loc[mask, 'Cumulative no. deaths'] = np.nan

# renaming columns
temp.columns = ['Country', 'Date', 'New cases', 'New deaths']

# merging new values
df = pd.merge(df, temp, on=['Country', 'Date'])

# filling na with 0
df = df.fillna(0)

# fixing data types
cols = ['New cases', 'New deaths']
df[cols] = df[cols].astype('int')

df['New cases'] = df['New cases'].apply(lambda x: 0 if x<0 else x)

df.head()

### Geo Map

In [None]:
# load countries map
world_map = gpd.read_file('../input/human-development-index-hdi/countries.geojson')
# avoid Antartica
world_map = world_map[world_map['name']!='Antarctica']
# select only important columns
world_map = world_map[['name', 'continent', 'geometry']]
# first few rows
world_map.head()

### Location Data
> (sometimes geocode doesn't work)

In [None]:
# First EVD reported locations
# ============================

# print(geocode('Nzara, South Sudan'))
# # > 4.6340° N, 28.2644° E

# print(geocode('Yambuku, DRC'))
# # > 2.8250° N, 22.2250° E

In [None]:
# First cases of Wester AFrica Ebola Outbreak of 2013-2014
# ========================================================

# print(geocode('Meliandou, Guinea'))
# # > 8.6161° N, 10.0612° W

## Ebola Virus Disease (EVD)

> ### First Found 
> * The Ebola virus causes an acute, serious illness which is often fatal if untreated.  
> * EVD first appeared in 1976 in 2 simultaneous outbreaks, one in what is now **Nzara, South Sudan**, and the other in **Yambuku, DRC**.  
> * The latter occurred in a village near the **Ebola River**, from which the disease takes its name.  

In [None]:
fig, ax = plt.subplots(figsize=(18, 14))
sns.set_style('whitegrid')

world_map.plot(ax=ax, color='gainsboro')
ax.set_title('EVD first appeared in 1976 in 2 simultaneous outbreaks, \n  1. Nzara, South Sudan, \n  2. Yambuku, DRC', 
             loc='left', fontdict={'fontsize': 16, 
                                   'fontfamily': 'monospace', 
                                   'fontweight': 'bold',
                                   'color': 'black'})

ax.scatter(28.2644, 4.6340, color='orangered', s=150, alpha=0.8)
ax.text(34.2644, 5.6340, 'Nzara, South Sudan', 
        fontfamily='monospace', fontsize=12, fontweight='bold',
        color='white', backgroundcolor='black')

ax.scatter(22.2250, 2.8250, color='orangered', s=150, alpha=0.8)
ax.text(-18.2250, -1.8250, 'Yambuku, DRC', 
        fontfamily='monospace', fontsize=12, fontweight='bold',
        color='white', backgroundcolor='black')

ax.set_axis_off()

### Transmission
> One can get EVD  
> * through direct contact with infected animal (bat or nonhuman primate) or  
> * contact with sick or dead person infected with EVD .

### Vaccines
> * Vaccines to protect against Ebola are under development and have been used to help control the spread of Ebola outbreaks in Guinea and in the Democratic Republic of the Congo (DRC)

### CFR (the proportion of people who die from a specified disease among all individuals diagnosed with the disease)

> * The average CFR of EVD is around 50%. CFR have varied from 25% to 90% in past outbreaks.

In [None]:
plot_cfr('The average CFR of EVD is around 50%', 50, 50)

### Incubation period (time interval from infection with the virus to onset of symptoms)
> Incubation period of Ebola is from 2 to 21 days. A person infected with Ebola cannot spread the disease until they develop symptoms.

In [None]:
plot_cal('Incubation period of Ebola is from 2 to 21 days', 2, 21)

## 2014-2016 Ebola outbreak in West Africa

### First found
> * On **March 23, 2014**, the WHO reported cases of Ebola Virus Disease (EVD) in the forested rural region of southeastern **Guinea**. 
> * The identification of these early cases marked the beginning of the West Africa Ebola epidemic, the largest in history. 
> * The initial case, or **index patient**, was reported in December 2013. 
> * An 18-month-old boy from a small village in Guinea is believed to have been infected by bats. 


### Symptoms  
> * Symptoms of EVD can be sudden and include: Fever, Fatigue, Muscle pain, Headache, Sore throat.  
> * This is followed by: Vomiting, Diarrhoea, Rash, Symptoms of impaired kidney and liver function  
> * In some cases, both internal and external bleeding (for example, oozing from the gums, or blood in the stools).  
> * Laboratory findings include low white blood cell and platelet counts and elevated liver enzymes.  

## Exploring the Dataset

In [None]:
latest = df[df['Date']==max(df['Date'])]
latest = latest.sort_values('Cumulative no. cases', ascending=False)
latest = latest.reset_index()
latest

In [None]:
fig = px.choropleth(latest, locations="Country", locationmode='country names',
                    color="Cumulative no. cases", hover_name="Country", 
                    color_continuous_scale="Sunsetdark", 
                    title='Geographical distribution of Cumulative no. of cases')
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [None]:
fig = px.choropleth(df, locations="Country", color="Cumulative no. cases",
                    locationmode='country names', hover_name="Country", 
                    animation_frame=df["Date"].dt.strftime('%Y-%m-%d'),
                    title='Ebola spread over time', color_continuous_scale="Sunsetdark")
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [None]:
fig = px.treemap(latest.sort_values(by="Cumulative no. cases", ascending=False).reset_index(drop=True), 
                 path=["Country"], values="Cumulative no. cases", 
                 title='Proportion of Cumulative no. of cases',
                 color_discrete_sequence = px.colors.qualitative.Dark2)
fig.data[0].textinfo = 'label+text+value'
fig.show()

In [None]:
# fig, ax = plt.subplots(figsize=(15, 7))
# sns.set_style('darkgrid')

# sns.barplot(data=latest, x='Cumulative no. cases', y='Country', color='midnightblue', ax=ax)
# sns.barplot(data=latest, x='Cumulative no. deaths', y='Country', color='royalblue', ax=ax)

# for ind, row in latest.iterrows():
#     s = str(row['Cumulative no. cases']) + ' (' + str(row['Cumulative no. deaths']) + ')'
#     ax.text(row['Cumulative no. cases']+100, ind+0.1, s, fontsize=12)
    
# ax.set_title('No. of Cases (No. of Deaths in brackets)', loc='left', pad=10, fontsize=16)
# ax.set_xlabel('')
# ax.set_ylabel('')

# plt.show()

In [None]:
def plot_barh(col):
    '''Plot horizontal bar chart on cumulative count on given column'''
    temp_df = latest.sort_values(col, ascending=False)
#     temp_df = temp_df[temp_df[col]!=0]
    fig =  px.bar(temp_df, x=col, y='Country', orientation='h', 
                  color='Country', text=col, title=col, width=700, 
                  hover_data = ['Cumulative no. cases', 
                                'Cumulative no. deaths', 
                                'CFR'],
                  color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.update_traces(textposition='auto')
    fig.update_layout(xaxis_title="", yaxis_title="", showlegend=False,
                      uniformtext_minsize=8, uniformtext_mode='hide')
    fig.show()

In [None]:
plot_barh('Cumulative no. cases')

In [None]:
plot_barh('Cumulative no. deaths')

In [None]:
plot_barh('CFR')

# Over the time

In [None]:
temp = df.groupby('Date')['Cumulative no. cases', 'Cumulative no. deaths'].sum()
temp = temp.reset_index()
temp = temp.melt(id_vars='Date', value_vars=['Cumulative no. cases', 'Cumulative no. deaths'],
                var_name='Reported', value_name='Count')
fig = px.line(temp, x="Date", y="Count", color='Reported', facet_col="Reported",
              title='Number of cases')
fig.show()

In [None]:
def plot_stacked(col):
    temp = df.groupby([df['Date'].dt.strftime('%Y-%m-01'), 'Country']) \
            ['Cumulative no. cases', 'Cumulative no. deaths', 'New cases', 'New deaths'].max()
    temp = temp.reset_index()
    
    fig = px.area(temp, x="Date", y=col, color='Country', orientation='v',
             title='Cases', color_discrete_sequence = px.colors.qualitative.Prism)
    fig.show()

In [None]:
plot_stacked('Cumulative no. cases')

In [None]:
plot_stacked('New cases')

## Highly affected countries

In [None]:
high_df = latest.iloc[:3]
high_melt = high_df.melt(id_vars = 'Country', 
                         value_vars=['Cumulative no. cases', 'Cumulative no. deaths'], 
                         var_name='Case', value_name='Count')

In [None]:
fig = px.bar(high_melt, x='Count', y='Country', color='Case', text='Count',
             orientation='h', width=700, height=400, barmode='group', 
             color_discrete_sequence=['#444444', 'orangered'])
fig.update_layout(title='No. of cases and deaths', xaxis_title="", yaxis_title="")
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.show()

## Less affected countries

In [None]:
low_df = latest.iloc[3:]
low_melt = low_df.melt(id_vars = 'Country', 
                       value_vars=['Cumulative no. cases', 'Cumulative no. deaths'], 
                       var_name='Case', value_name='Count')

In [None]:
fig = px.bar(low_melt, x='Count', y='Country', color='Case', text='Count',
             orientation='h', width=700, barmode='group', 
             color_discrete_sequence=['#444444', 'orangered'])
fig.update_layout(title='No. of cases and deaths', xaxis_title="", yaxis_title="")
fig.update_layout(barmode='group', yaxis={'categoryorder':'total ascending'})
fig.show()

## References
> * Wikipedia Link : https://en.wikipedia.org/wiki/Western_African_Ebola_virus_epidemic  
> * WHO Link : https://www.who.int/csr/disease/ebola/en/  
> * CDC Link : https://www.cdc.gov/vhf/ebola/index.html  
> * Kaggle Dataset : https://www.kaggle.com/imdevskp/ebola-outbreak-20142016-complete-dataset