In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
est = pd.read_csv("/kaggle/input/malnutrition-across-the-globe/malnutrition-estimates.csv")
avg_data =pd.read_csv("/kaggle/input/malnutrition-across-the-globe/country-wise-average.csv")

 #  Some terms related to malnutrition

### wasting: 
Low weight-for-height is known as wasting. It usually indicates recent and severe weight loss, because a person has not had enough food to eat and/or they have had an infectious disease.

### severe wasting :
This is characterized by a massive loss of body fat and muscle tissue. Children who are severely wasted look almost elderly and their bodies are extremely thin and skeletal.

### stunting : 
Stunting is when a child has a low height for their age, usually due to malnutrition, repeated infections, and/or poor social stimulation.If stunting is a low height for a child's weight, wasting is low weight for a child's height. 

### Underweight : 
Children with low weight-for-age are known as underweight. A child who is underweight may be stunted, wasted, or both.

### Overweight: 
Overweight and obesity is when a person is too heavy for his or her height. 


In [None]:
df=est.copy()
est.isnull().any()

In [None]:
# Replacing NaN values of some columns with zero
df['Severe Wasting']= df['Severe Wasting'].fillna(0.0)
df['Wasting'] = df['Wasting'].fillna(0.0)
df['Overweight'] = df['Overweight'].fillna(0.0)
df['Stunting'] = df['Stunting'].fillna(0.0)
df['Underweight'] = df['Underweight'].fillna(0.0)


#  -> Malnutrition in India
###  We have data from years 1989, 1992, 1993, 1997, 1999, 2006, 2014, 2015, 2017

In [None]:
# list of undernutrition parameters
param_list =[ 'Overweight','Severe Wasting','Stunting', 'Underweight', 'Wasting']


df2 =df.loc[df.Country == 'INDIA']

ind_data =pd.DataFrame(columns =['parameter','Year','value'])

years =[1989, 1992, 1993, 1997, 1999, 2006, 2014, 2015, 2017]

# storing data year wise
for param in param_list:
    for year in years:
        value=df2.loc[df2.Year == year][param]
        value_dict = {'parameter':param,'Year':year,'value':value}
        ind_data = pd.concat([ind_data, pd.DataFrame(data=[value_dict])])
    
        
plt.figure(figsize =(10,8))
sns.barplot(data =ind_data, x='Year', y='value', hue='parameter')
plt.show()

## Observations from this plotting:
A significant %  of children still have 'Stunting','Underweight' and 'Wasting' undernutrition problems in  our country.

There is a huge decrease in 'Stunting %' (from 65% to 35%) and 'Underweight %'(from 55% to 38%) over the years.
'Overweight %' is very less in the country around 2-3%. Also, only 5% children suffer from  'Severe Wasting' in the country. 

But the most strange pattern we can observe here is that there is a very little change in 'Wasting %' over the years.

# -> Malnutrition across different income groups

### undernutrition  categories : 
stunting, wasting, overweight, Underweight, and severe wasting

### Income Classification:
Low income = 0, 
Lower middle income = 1, 
Upper middle income = 2, 
High income = 3,

In [None]:

data =pd.DataFrame(columns =['parameter','Income Classification','average'])

for param in param_list:
    for i in range(4):
        #value=df.loc[df['Income Classification']==i][param].mean()
        value = avg_data.loc[avg_data['Income Classification']==i][param].mean()
        value_dict = {'parameter':param,'Income Classification':i,'average':value}
        data = pd.concat([data, pd.DataFrame(data=[value_dict])])
        #data = pd.concat([data, pd.DataFrame.from_records([value_dict])])
        

plt.figure(figsize =(8,8))
sns.barplot(data =data, x='Income Classification', y='average', hue='parameter')
plt.show()


## Observations from this plotting:
### Every country in the world is affected by one or more forms of malnutrition.

We can observe that 'Stunting %' and 'Underweight %' is very high in LOW and LOWER MIDDLE income countries. Whereas 'Overweight %' is comparatively higher in UPPER MIDDLE and HIGH income countries. 

'Wasting %' and 'Severe Wasting %' is also higher in LOW and LOWER MIDDLE income countries.

Overall we can see that LOW and LOWER MIDDLE income countries are the ones which are most affected by malnutrition.

# -> Analysis of countries which are both LDC and LIFD
### based on recent survey (survey year= 2018 or 2019)
 >Low Income Food Deficient (LIFD)(true=1, false=0)
  , Least Developed Countries (LDC)(true=1, false=0)
  
Our data, malnutrition-estimates.csv has recent survey of only 32 countries

In [None]:
df = df.loc[df.Year >=2018]
temp = df.loc[(df.LDC == 1) & (df.LIFD ==1)]

# 10 contries in which Stunting percentage is highest
stunting_df = temp.sort_values(by='Stunting', ascending =False).head(10)
underweight_df =temp.sort_values(by='Underweight', ascending =False).head(10)
overweight_df=temp.sort_values(by='Overweight', ascending =False).head(10)
severe_wasting_df=temp.sort_values(by='Severe Wasting', ascending =False).head(10)
wasting_df =temp.sort_values(by='Wasting', ascending =False).head(10)

fig =plt.figure(figsize = (20,11))
ax1 = fig.add_subplot(3,2,1)
ax2 =fig.add_subplot(3,2,2)
ax3 =fig.add_subplot(3,2,3)
ax4 =fig.add_subplot(3,2,4)
ax5 =fig.add_subplot(3,2,5)

sns.barplot(data =wasting_df,ax=ax1, y='Country', x='Wasting', orient='h')
sns.barplot(data =severe_wasting_df,ax=ax2, y='Country', x='Severe Wasting', orient='h')
sns.barplot(data =stunting_df,ax=ax3, y='Country', x='Stunting', orient='h')
sns.barplot(data =underweight_df,ax=ax4, y='Country', x='Underweight', orient='h')
sns.barplot(data =overweight_df,ax=ax5, y='Country', x='Overweight', orient='h')
plt.show()

## Observations from this plotting:
### countries which are most affected from malnutrition are: 
>Niger, Ethiopia, Afganistan, Mauritania, Guinea, Mali, Burkina Faso, Bangaldesh, Senegal, Central African Republic, Madagascar

###  Highest % of undernutrition categories:
> Stunting: 56%, Wasting: 14%, Severe Wasting: 36%, Underweight: 37%, Overweight: 6.8%
 Overweight % is very less in such countries(<=8%)

# -> Malnutrition in LLDCs and SIDSs:
 Land Locked Developing Countries (LLDC)  ('LLDC or SID2' =1),
 Small Island Developing States (SIDS)  ('LLDC or SID2'= 2),

In [None]:
df = est.copy()
df = df.loc[df.Year >=2017]
data2 =pd.DataFrame(columns =['parameter','LLDC or SID2','average'])

for param in param_list:
    for i in [0.0,1.0,2.0]:
        #value=df.loc[df['Income Classification']==i][param].mean()
        value=df.loc[df['LLDC or SID2']==i][param].mean()
        value_dict = {'parameter':param,'LLDC or SID2':i,'average':value}
        
        data2 = pd.concat([data2, pd.DataFrame(data=[value_dict])])
        #data = pd.concat([data, pd.DataFrame.from_records([value_dict])])
        
#sns.factorplot(data =data, y='average', x='parameter', col='Income Classification', kind='bar')
plt.figure(figsize =(8,8))
sns.barplot(data =data2,  y='average', x='LLDC or SID2', hue='parameter')
plt.show()

# Recent surveys do not have data about SID countries

## Observations from this plotting:
LLDC countries have a higher percentange of 'Stunting' undernutrition ( about 30% of the children suffer from 'Stunting'). 'Severe Wasting %' and 'Overweight %' are very less in LLDC countries. About 16% children are 'Underweighted'.  

# -> Countries which are very less affected from malnutrition
Countries in which percentage of undernutrition parameter is very less or closed to zero.


In [None]:
avg_data.sort_values(by=['Severe Wasting','Stunting','Wasting','Underweight','Overweight']).head(6)

# ->Most malnourished countries

In [None]:
avg_data.sort_values(by=['Severe Wasting','Stunting','Wasting','Underweight','Overweight'], ascending =False).head(10)

Feel free to drop any suggestions regarding improvements. 

Do upvote if you found this analysis interesting :).