https://www.kaggle.com/imdevskp/cholera-dataset/tasks?taskId=1242

**Task Details**

Use EDA, visualization tools to find and accurately show which are the countries that haven't had a Cholera case reported in the past 10 years?

In [None]:
# import os
# import numpy as np
import pandas as pd
# import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
df = pd.read_csv('../input/cholera-dataset/data.csv')

In [None]:
df.info(null_counts=True)

### Renaming columns to the more convenient names

In [None]:
df.rename(
    columns={
        'Number of reported cases of cholera': 'Cases',
        'Number of reported deaths from cholera': 'Deaths',
        'Cholera case fatality rate': 'Fatality Rate'
    },
    inplace=True
)
df.head(2)

### Converting Cases and Deaths columns'values to a numeric type

In [None]:
df['Cases'] = pd.to_numeric(df['Cases'], errors='coerce')
df['Deaths'] = pd.to_numeric(df['Deaths'], errors='coerce')

### Showing available years' range (but not for each country)

In [None]:
print(df['Year'].min(), '-', df['Year'].max())

### Summing reported cases and deaths for each country since 2006

In [None]:
df_sum = df \
    .loc[df['Year'] >= 2006] \
    .groupby(by=['Country', 'WHO Region'], as_index=False) \
    .agg({ 'Cases': pd.Series.sum, 'Deaths': pd.Series.sum })
df_sum

### Showing available regions

In [None]:
regions = df['WHO Region'].unique()
regions.tolist()

### Plotting shares of total cases per country for each region

In [None]:
fig, axes = plt.subplots(3, 2, figsize=(20, 20))

for (i, region) in enumerate(regions):
    ax = axes[i % 3, i // 3]
    ax.set_title(region)
    df_pie = df_sum \
        .loc[(df_sum['WHO Region'] == region) & (df_sum['Cases'] > df_sum['Cases'].quantile(0.25))] \
        .sort_values(by='Cases', ascending=False)
    ax.pie(df_pie['Cases'], labels=df_pie['Country'], startangle=0)

fig.subplots_adjust(wspace=.2);

### The countries that haven't had a Cholera case reported in the past 10 years

In [None]:
df_2006_no_cases = df_sum.loc[df_sum['Cases'] == 0]
df_2006_no_cases