# Statistics and Line Graphs of Ebola Virus
#### The purpose of this activity is to show what are the top 5 countries that had suffered with Ebola virus over the years, months and weeks

### Dependencies and starter code

In [None]:
%matplotlib notebook

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress


In [None]:
# Get the reference files pulling in output file from created from previous notebook.
reference = "outputs/ebola1.csv"
metadata = pd.read_csv(reference)
metadata

## Statistics

In [None]:
# Generate a summary statistics table of sum, mean, median, variance, standard deviation in each country

#Using groupby "Country" and calculating the stats
stats=metadata.groupby(["Country"])["Deaths"].agg(['sum','mean', 'median', 'var', 'std'])

#Label and Format the columns with two decimal place format
stats.columns=['Sum','Mean','Median', 'Variance','STD']
decimals =['Mean','Median', 'Variance','STD']
stats[decimals]=stats[decimals].applymap(lambda x:'{:.2f}'.format(x))
stats.to_csv("outputs/ebola_stats.csv")
stats.head(20)

## Top Five

In [None]:
# Generate a bar plot by using the total deaths by year
deaths=metadata.groupby(["Country"])["Deaths"].agg(['sum'])
# five largest values in column age 
top5=deaths.nlargest(5, ['sum']) 
top5

In [None]:
#Filtering only the 5 top countries with the number of deaths

top_five=["Liberia","Sierra Leone", "Guinea","Nigeria","Mali"]
top_deaths=metadata[metadata.Country.isin(top_five)]
top_deaths=top_deaths[["Country","Cases","Deaths","Recovered","week_number","month","year"]]

top_deaths.to_csv("outputs/ebola_top5.csv")
top_deaths.head(10)

#### Ebola Deaths in Week Number

In [None]:
# plot data
fig, ax = plt.subplots(figsize=(10,4))
# use unstack()
top_deaths.groupby(['week_number','Country']).sum()['Deaths'].unstack().plot(ax=ax)
ax.set_xlabel('Week Number')
ax.set_ylabel('Number of Deaths')
plt.title('Ebola Deaths by Country by Week')
plt.savefig("plots/Week - EbolaDeathsCountry.png")

#### Ebola Deaths by Months

In [None]:
# plot data
fig, ax = plt.subplots(figsize=(10,4))
# use unstack()
top_deaths.groupby(['month','Country']).sum()['Deaths'].unstack().plot(ax=ax)
ax.set_xlabel('Month Number')
ax.set_ylabel('Number of Deaths')
plt.title('Ebola Deaths by Country by Month')
plt.savefig("plots/Months - EbolaDeathsCountry.png")