In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import warnings
import plotly.graph_objects as go

from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

df = pd.read_csv("cause_of_deaths.csv")

col_list = df.columns.values.tolist()

#col_list.remove('Country/Territory')
col_list.remove('Code')
col_list.remove('Year')

df['total'] = df[col_list].sum(axis=1)

populatie = pd.read_csv("populatie.csv")

populatie.drop(['Series Name', 'Series Code'], axis=1, inplace=True)

column_list = populatie.columns.tolist()
id_vars_list = column_list[:2] + column_list[-1:]

populatie2 = pd.melt(populatie, id_vars=id_vars_list, 
             value_name="year").drop(['variable'],axis=1).sort_values('year')

populatie_jaren = populatie[['1990 [YR1990]', '1991 [YR1991]',
       '1992 [YR1992]', '1993 [YR1993]', '1994 [YR1994]', '1995 [YR1995]',
       '1996 [YR1996]', '1997 [YR1997]', '1998 [YR1998]', '1999 [YR1999]',
       '2000 [YR2000]', '2001 [YR2001]', '2002 [YR2002]', '2003 [YR2003]',
       '2004 [YR2004]', '2005 [YR2005]', '2006 [YR2006]', '2007 [YR2007]',
       '2008 [YR2008]', '2009 [YR2009]', '2010 [YR2010]', '2011 [YR2011]',
       '2012 [YR2012]', '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]',
       '2016 [YR2016]', '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]',
       '2020 [YR2020]', '2021 [YR2021]']]

populatie2 = pd.melt(populatie, id_vars=['Country Name', 'Country Code'], value_vars=['1990 [YR1990]', '1991 [YR1991]',
       '1992 [YR1992]', '1993 [YR1993]', '1994 [YR1994]', '1995 [YR1995]',
       '1996 [YR1996]', '1997 [YR1997]', '1998 [YR1998]', '1999 [YR1999]',
       '2000 [YR2000]', '2001 [YR2001]', '2002 [YR2002]', '2003 [YR2003]',
       '2004 [YR2004]', '2005 [YR2005]', '2006 [YR2006]', '2007 [YR2007]',
       '2008 [YR2008]', '2009 [YR2009]', '2010 [YR2010]', '2011 [YR2011]',
       '2012 [YR2012]', '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]',
       '2016 [YR2016]', '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]',
       '2020 [YR2020]', '2021 [YR2021]'], var_name='Year', value_name='populatie')

populatie2

populatie2["Year"].replace(r"\D+", "", regex=True, inplace=True)
populatie2

populatie2['Year'] = [x[:4] for x in populatie2['Year']]

populatie2

populatie2["Year"] = populatie2["Year"].astype(int)

df2 = df.merge(populatie2, how='left', left_on=['Code', 'Year'], right_on=['Country Code', 'Year'])
df2

df2 = df2.dropna()

fig = px.choropleth(df2, locations="Code",
                    animation_frame="Year",
                    color="Malaria", 
                    hover_name="Country/Territory", 
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

df2.info()

df2['populatie'] = df2['populatie'].str.strip()

type(df2['populatie'][0])

df2["populatie"] = df2['populatie'].str.replace(r'\D', '')

df2["populatie"] = df2['populatie'].replace('', np.nan)

df2.dropna(subset=['populatie'], inplace=True)

df2['populatie']= df2['populatie'].astype(float)
df2['populatie']= df2['populatie'].astype(int)

df2['Meningitis per 100k'] = df2['Meningitis']/df2['populatie']*100000
df2["Alzheimer's Disease and Other Dementias per 100k"] = df2["Alzheimer's Disease and Other Dementias"]/df2['populatie']*100000

df2.columns

list_of_columns = ['Meningitis',
       "Alzheimer's Disease and Other Dementias", "Parkinson's Disease",
       'Nutritional Deficiencies', 'Malaria', 'Drowning',
       'Interpersonal Violence', 'Maternal Disorders', 'HIV/AIDS',
       'Drug Use Disorders', 'Tuberculosis', 'Cardiovascular Diseases',
       'Lower Respiratory Infections', 'Neonatal Disorders',
       'Alcohol Use Disorders', 'Self-harm', 'Exposure to Forces of Nature',
       'Diarrheal Diseases', 'Environmental Heat and Cold Exposure',
       'Neoplasms', 'Conflict and Terrorism', 'Diabetes Mellitus',
       'Chronic Kidney Disease', 'Poisonings', 'Protein-Energy Malnutrition',
       'Road Injuries', 'Chronic Respiratory Diseases',
       'Cirrhosis and Other Chronic Liver Diseases', 'Digestive Diseases',
       'Fire, Heat, and Hot Substances', 'Acute Hepatitis']

def ziekte_per_aantal(df, list_of_columns):
    new_df = pd.DataFrame()
    new_df[['Country/Territory', 'Code', 'Year']] = df[['Country/Territory', 'Code', 'Year']]
    for i in range(len(list_of_columns)):
        new_name = list_of_columns[i] + " per 100k"
        new_df[new_name] = df[list_of_columns[i]]/df['populatie']*100000
        
    return new_df


df_100k = ziekte_per_aantal(df2, list_of_columns)
df_100k

fig = px.choropleth(df2, locations="Code",
                    animation_frame="Year",
                    color="Drowning", 
                    hover_name="Country/Territory", 
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

fig = px.choropleth(df2, locations="Code",
                    animation_frame="Year",
                    color="Drowning per 100k", 
                    hover_name="Country/Territory", 
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

country_data = df2.loc[df2['Country/Territory'] == 'Afghanistan']

country_data.head()

country_data = df2.loc[df2['Country/Territory'] == 'Afghanistan']
fig = px.bar(country_data, x = "Year" , y="Meningitis", color = "Country/Territory")
fig.show()

df2.head()

fig = px.histogram(df2, x="Meningitis", nbins=20)
fig.show()

fig = px.box(df2, x="Meningitis")
fig.show()

fig = px.strip(df2,
         x='Meningitis',
         color='Country/Territory',
         stripmode='overlay')

fig.add_trace(go.Box(y=df.query('Meningitis == "trace 0"')['Meningitis'], name='trace 0'))

fig.update_layout(showlegend=False)


fig.show()

fig = px.scatter(df2, x="populatie", y="Meningitis", color = "Country/Territory", trendline = "ols")

fig.update_layout(showlegend=False)

fig.show()

fig = px.scatter(df2, x="Year", y="Meningitis", color = "Country/Territory", trendline = "ols")

fig.update_layout(showlegend=False)

fig.show()

country_data = df2.loc[df2['Country/Territory'] == 'India']
fig = px.scatter(country_data, x = "populatie" , y="Meningitis", color = "Country/Territory", trendline = "ols")
fig.show()

country_data = df2.loc[df2['Country/Territory'] == 'India']
fig = px.scatter(country_data, x = "Year" , y="Meningitis", color = "Country/Territory", trendline = "ols")
fig.show()

fig = px.scatter(df2, x="populatie", y="Meningitis per 100k", color = "Country/Territory", trendline = "ols")

fig.update_layout(showlegend=False)

fig.show()

fig = px.scatter(df2, x="Year", y="Meningitis per 100k", color = "Country/Territory", trendline = "ols")

fig.update_layout(showlegend=False)

fig.show()

country_data = df2.loc[df2['Country/Territory'] == 'Afghanistan']
fig = px.scatter(country_data, x = "populatie" , y="Conflict and Terrorism per 100k", color = "Country/Territory", trendline = "ols")
fig.show()

country_data = df2.loc[df2['Country/Territory'] == 'Afghanistan']
fig = px.scatter(country_data, x = "Year" , y="Conflict and Terrorism per 100k", color = "Country/Territory", trendline = "ols")
fig.show()

df2






df_100k_deseases = df_100k.drop(columns=['Code', 'Year'], axis=1)
df_100k_deseases

data = pd.DataFrame(df_100k_deseases.groupby(['Country/Territory'])['Meningitis per 100k'].sum().sort_values(ascending =False)[:5]).reset_index()

fig = px.bar(data ,x = 'Country/Territory' , y='Meningitis per 100k', color='Country/Territory')
fig.update_layout(title=go.layout.Title(text="COUNTRIES WITH HIGHEST Meningitis per 100k DEATHS"))
fig.update_xaxes(tickangle=45)
fig.show()

data

