In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#import dataset
path = '/kaggle/input/malaria-in-africa/DatasetAfricaMalaria.csv'
df = pd.read_csv(path)

In [None]:
df.head(5)

In [None]:
#shape of data
df.shape

In [None]:
df.dtypes

In [None]:
#convert year to date time 
from datetime import datetime

df['Year'] = pd.to_datetime(df.Year,format='%Y')

df.dtypes

In [None]:
#import regular packages
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#import the plotly packages
import plotly.offline as py
from plotly import tools
import plotly.graph_objs as go
import plotly.express as px

In [None]:
#Malaria data for plotting
Malaria_data = df[['Country Name','Year','Country Code','Incidence of malaria (per 1,000 population at risk)','Malaria cases reported','Use of insecticide-treated bed nets (% of under-5 population)','Children with fever receiving antimalarial drugs (% of children under age 5 with fever)','Intermittent preventive treatment (IPT) of malaria in pregnancy (% of pregnant women)','latitude','longitude','geometry']]

In [None]:
Malaria_data.dtypes

In [None]:
#convert year to string
Malaria_data.Year = Malaria_data.Year.astype(str)

In [None]:
#Incidence of Malaria at risk
fig1 = px.choropleth(Malaria_data,locations=Malaria_data['Country Code'],color=Malaria_data['Incidence of malaria (per 1,000 population at risk)'],color_continuous_scale='Blues',locationmode='ISO-3',scope='africa',animation_frame=Malaria_data['Year'],title="Incidence of Malaria at risk in Africa",labels={'color':'Incidence of Malaria'})

fig1.show()

In [None]:
#Malaria Cases Reported by country
fig2 = px.choropleth(Malaria_data,locations=Malaria_data['Country Code'],color=Malaria_data['Malaria cases reported'],color_continuous_scale='Blues',locationmode='ISO-3',scope='africa',animation_frame=Malaria_data['Year'],title="Malaria Cases in Africa",labels={'color':'number of Malaria cases'})

fig2.show()

In [None]:
#Use of insecticide-treated bed nets
fig3 = px.choropleth(Malaria_data,locations=Malaria_data['Country Code'],color=Malaria_data['Use of insecticide-treated bed nets (% of under-5 population)'],color_continuous_scale='Blues',locationmode='ISO-3',scope='africa',animation_frame=Malaria_data['Year'],title="Malaria in Africa: Use of Insecticide-treated Bed Nets",labels={'color':'Use of insecticide-treated bed nets'})

fig3.show()

In [None]:
#Children with fever receiving antimalarial drugs (% of children under age 5 with fever)
fig4 = px.choropleth(Malaria_data,locations=Malaria_data['Country Code'],color=Malaria_data['Children with fever receiving antimalarial drugs (% of children under age 5 with fever)'],color_continuous_scale='Blues',locationmode='ISO-3',scope='africa',animation_frame=Malaria_data['Year'],title="Malaria in Africa: Children with Fever receiving Antimalarial Drugs",labels={'color':'Children with fever receiving antimalarial drugs'})

fig4.show()

In [None]:
#Intermittent preventive treatment (IPT) of malaria in pregnancy (% of pregnant women) 
fig5 = px.choropleth(Malaria_data,locations=Malaria_data['Country Code'],color=Malaria_data['Intermittent preventive treatment (IPT) of malaria in pregnancy (% of pregnant women)'],color_continuous_scale='Blues',locationmode='ISO-3',scope='africa',animation_frame=Malaria_data['Year'],title="Malaria in Africa: Intermittent Preventive Treatment of Malaria in Pregnancy",labels={'color':'Intermittent preventive treatment (IPT) of malaria in pregnancy'})

fig5.show()

In [None]:
#some more data exploreation
df.info()

In [None]:
df.describe()

In [None]:
#unique years
df.Year.unique()

In [None]:
#unique countries
df['Country Name'].unique()

In [None]:
#the length of the dataset
length_dataset = len(df)
print(length_dataset)

In [None]:
#count the missing values for each column
missing_values_count = df.isnull().sum()
print(missing_values_count)

In [None]:
#count missing values for malaria cases reported per year
df['Malaria cases reported'].isnull().groupby(df['Year']).sum()

In [None]:
#drop countries with missing values for no incidence of Malaria
new_malaria_data_v1 = df[df['Incidence of malaria (per 1,000 population at risk)'].notna()]
new_malaria_data_v1 = new_malaria_data_v1[new_malaria_data_v1['Incidence of malaria (per 1,000 population at risk)'] != 0]
print(new_malaria_data_v1.head(5))

#also drop countries with missing values or null values for malaria cases reported (after dropping no incidence of malaria)
new_malaria_data_v2 = df[df['Malaria cases reported'].notna()]
new_malaria_data_v2 = new_malaria_data_v2[new_malaria_data_v2['Malaria cases reported'] != 0]
print(new_malaria_data_v2.head(5))

In [None]:
#Scatterplot Incidence of Malaria and Malaria Cases Reported
plt.scatter(x=df['Incidence of malaria (per 1,000 population at risk)'],y=df['Malaria cases reported'],color='red')

plt.title("Correlation Incidence of Malaria and Malaria Cases Reported")
plt.xlabel("Malaria Cases Reported")
plt.ylabel("Incidence of Malaria")
plt.show()


In [None]:
#Trend in cases of Malaria in Africa each year
Malaria = df[['Year','Malaria cases reported']]
Malaria = Malaria.rename(columns={'Malaria cases reported':'Malaria_cases_reported'})
Malaria_cases_yearly = Malaria.groupby(Malaria.Year).Malaria_cases_reported.sum()
Malaria_cases_yearly.reset_index()
Malaria_cases_yearly = Malaria_cases_yearly.to_frame()

#plot over the years
sns.lineplot(data=Malaria_cases_yearly.Malaria_cases_reported)

plt.title("Malaria cases in Africa per Year")
plt.xlabel("Year")
plt.ylabel("Malaria cases")
plt.show()

In [None]:
#count the least missing values per country for the use of insecticide-treated bed net
df['Use of insecticide-treated bed nets (% of under-5 population)'].isnull().groupby(df['Country Name']).sum().sort_values()

In [None]:
#regplot
#drop missing values first
Malaria_treated_nets = df[df['Use of insecticide-treated bed nets (% of under-5 population)'].notna()]
Malaria_antimalarial_medication = df[df['Children with fever receiving antimalarial drugs (% of children under age 5 with fever)'].notna()]
Malaria_IPT = df[df['Intermittent preventive treatment (IPT) of malaria in pregnancy (% of pregnant women)'].notna()]

#plotting for malaria cases reported
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(20,20))

sns.regplot(x=Malaria_treated_nets['Use of insecticide-treated bed nets (% of under-5 population)'],y=Malaria_treated_nets['Malaria cases reported'],data=Malaria_treated_nets,ax=ax1)
sns.regplot(x=Malaria_antimalarial_medication['Children with fever receiving antimalarial drugs (% of children under age 5 with fever)'],y=Malaria_antimalarial_medication['Malaria cases reported'],data=Malaria_antimalarial_medication,ax=ax2)
sns.regplot(x=Malaria_IPT['Intermittent preventive treatment (IPT) of malaria in pregnancy (% of pregnant women)'],y=Malaria_IPT['Malaria cases reported'],data=Malaria_IPT,ax=ax3)

plt.show()

In [None]:
#plotting for malaria incidence risk
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(20,20))

sns.regplot(x=Malaria_treated_nets['Use of insecticide-treated bed nets (% of under-5 population)'],y=Malaria_treated_nets['Incidence of malaria (per 1,000 population at risk)'],data=Malaria_treated_nets,ax=ax1)
sns.regplot(x=Malaria_antimalarial_medication['Children with fever receiving antimalarial drugs (% of children under age 5 with fever)'],y=Malaria_antimalarial_medication['Incidence of malaria (per 1,000 population at risk)'],data=Malaria_antimalarial_medication,ax=ax2)
sns.regplot(x=Malaria_IPT['Intermittent preventive treatment (IPT) of malaria in pregnancy (% of pregnant women)'],y=Malaria_IPT['Incidence of malaria (per 1,000 population at risk)'],data=Malaria_IPT,ax=ax3)

plt.show()

In [None]:
#distribution plots
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(10,20))

sns.distplot(Malaria_treated_nets['Use of insecticide-treated bed nets (% of under-5 population)'],kde=False, ax=ax1)
sns.distplot(Malaria_antimalarial_medication['Children with fever receiving antimalarial drugs (% of children under age 5 with fever)'],kde=False,ax=ax2)
sns.distplot(Malaria_IPT['Intermittent preventive treatment (IPT) of malaria in pregnancy (% of pregnant women)'],kde=False,ax=ax3)

plt.show()