# Exploratory Data Analysis of Cause of Deaths all over the world

Just a quick exploration of the data. Using plotly to create interactive plots.
Thanks for viewing!

Be happy and keep learning!

In [1]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore')

In [2]:
# reading the data
df = pd.read_csv('../input/cause-of-deaths-around-the-world/cause_of_deaths.csv')
df.sample(5)

Unnamed: 0,Country/Territory,Code,Year,Meningitis,Alzheimer's Disease and Other Dementias,Parkinson's Disease,Nutritional Deficiencies,Malaria,Drowning,Interpersonal Violence,...,Diabetes Mellitus,Chronic Kidney Disease,Poisonings,Protein-Energy Malnutrition,Road Injuries,Chronic Respiratory Diseases,Cirrhosis and Other Chronic Liver Diseases,Digestive Diseases,"Fire, Heat, and Hot Substances",Acute Hepatitis
5646,Turkmenistan,TKM,1996,160,285,55,32,1,547,261,...,391,443,64,14,493,815,1200,1481,356,116
924,Cambodia,KHM,2014,257,1461,352,581,118,1113,514,...,2984,1886,73,506,2828,4379,7369,10941,211,349
4965,Solomon Islands,SLB,2005,24,14,14,33,220,101,44,...,277,96,14,33,127,282,101,152,17,13
5097,South Sudan,SSD,2017,2083,436,92,2485,4983,251,1065,...,1080,892,170,2442,955,1293,1634,2612,214,196
1105,Chile,CHL,2015,117,4061,1168,512,0,362,821,...,3368,4473,37,487,2314,5685,4803,7708,333,5


In [3]:
# checking missing values
df.isnull().sum().sort_values(ascending=False)

Country/Territory                             0
Chronic Kidney Disease                        0
Exposure to Forces of Nature                  0
Diarrheal Diseases                            0
Environmental Heat and Cold Exposure          0
Neoplasms                                     0
Conflict and Terrorism                        0
Diabetes Mellitus                             0
Poisonings                                    0
Code                                          0
Protein-Energy Malnutrition                   0
Road Injuries                                 0
Chronic Respiratory Diseases                  0
Cirrhosis and Other Chronic Liver Diseases    0
Digestive Diseases                            0
Fire, Heat, and Hot Substances                0
Self-harm                                     0
Alcohol Use Disorders                         0
Neonatal Disorders                            0
Lower Respiratory Infections                  0
Cardiovascular Diseases                 

In [4]:
df['Country/Territory'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra',
       'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo', 'Cook Islands', 'Costa Rica', "Cote d'Ivoire",
       'Croatia', 'Cuba', 'Cyprus', 'Czechia',
       'Democratic Republic of Congo', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia',
       'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia',
       'Germany', 'Ghana', 'Greece', 'Greenland', 'G

In [5]:
# checking the number of countries
len(df['Country/Territory'].unique())

204

In [6]:
# plot top 10 illnesses in any country in all years
def top_10_illnesses(country):
    df_country = df[df['Country/Territory'] == country]
    min_year, max_year = df_country['Year'].min(), df_country['Year'].max()
    fig = px.bar(df_country.iloc[:, 2:].sum().sort_values(ascending=False).head(10), title='Top 10 Cause of Deaths in {} between {} and {}'.format(country, min_year, max_year), color_discrete_sequence=px.colors.qualitative.Pastel, labels={'value': 'Amount of deaths', 'index': 'Cause of Deaths'})
    fig.update_layout(showlegend=False)
    fig.show()

In [7]:
top_10_illnesses('Mexico')

In [8]:
top_10_illnesses('United States')

In [9]:
top_10_illnesses('Canada')

In [10]:
# plot top 10 illnesses in coountry in a specific year
def top_10_illnesses_year(country, year):
    df_country = df[df['Country/Territory'] == country]
    df_country_year = df_country[df_country['Year'] == year]
    fig = px.bar(df_country_year.iloc[:, 2:].sum().sort_values(ascending=False).head(10), title='Top 10 Cause of Deaths in {} in {}'.format(country, year), color_discrete_sequence=px.colors.qualitative.Pastel, labels={'value': 'Amount of deaths', 'index': 'Cause of Deaths'})
    fig.update_layout(showlegend=False)
    fig.show()

In [11]:
top_10_illnesses_year('Mexico', 2019)

In [12]:
top_10_illnesses_year('United States', 2019)

In [13]:
top_10_illnesses_year('Canada', 2019)