# Analysing Life Expectancy

## Importing packages

In [None]:
#Import python libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import svm #Import svm model
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
import seaborn as sns
import matplotlib.pyplot as plt

## Importing data

**Import the file 'life_expectancy.csv'**

In [None]:
#Import the heart data
data = pd.read_csv("../input/life-expectancy/life_expectancy.csv")
data.head()

## Basic Statistics

In [None]:
data.info()

<font color='blue'>**OBSERVATION :** There is no mising data</font>

In [None]:
data.shape

In [None]:
data.describe()

<font color='blue'>**OBSERVATION :** <br>
Minimum Life expectancy is 36 and maximum is 89 with average of 69 years<br>
The data ranges from 2000 to 2015 <br>
...<br></font>

In [None]:
data.select_dtypes(include=['object']).describe(include='all')

<font color='blue'>**OBSERVATION :** <br>
The data relates to 193 countries <br>
...<br></font>

## Correlation

**Find the correlation of 'data'**

In [None]:
data.corr()

**Find the Heatmap of correlation of 'data'**

In [None]:
ax = plt.subplots(figsize=(12, 8));
ax = sns.heatmap(data.corr(), annot = True) ;

### Correlation of 'Life_expectancy' with other featurs

In [None]:
data.corr()['Life_expectancy'].sort_values(ascending=False)

<font color='blue'>**OBSERVATION :** The featues with correlation value near to 1 have strong +ve correlation and near to -1 have strong -ve correlation</font>

### Comparing 'Schooling','Income composition of resources','Life_expectancy' using pairplots

In [None]:
sns.pairplot(data=data[['Schooling','Income composition of resources','Life_expectancy']], height=3)

<font color='blue'>**OBSERVATION :** <br>
    People who have higher Life expectancy have higher Schooling and higher Income composition <br>
    Life expectancy of most of the countries is between 70 to 80 years<br>
    Most of the people have schooling between 10 to 15 years<br>
</font>

### Comparing 'BMI','Diphtheria','Life_expectancy' using pairplots

In [None]:
sns.pairplot(data=data[['BMI','Diphtheria','Life_expectancy']], height=3)

<font color='blue'>**OBSERVATION :** <br>
    People who have higher Life expectancy have Diphtheria on the higher side <br>
    People who have higher Life expectancy have BMI on the lower side<br>
    There is significant population whose BMI is between 50 to 70<br>
</font>

### Comparing 'Polio','GDP','Life_expectancy' using pairplots

In [None]:
sns.pairplot(data=data[['Polio','GDP','Life_expectancy']], height=3)

<font color='blue'>**OBSERVATION :** <br>
    People with higher Life expectancy have Polio on the higher side<br>
    Life expectancy is proportional to GDP<br>
   <br>
</font>

### Comparing 'Alcohol','percentage expenditure','Life_expectancy' using pairplots

In [None]:
sns.pairplot(data=data[['Alcohol','percentage expenditure','Life_expectancy']], height=3)

<font color='blue'>**OBSERVATION :** <br>
    Life expectancy is directly proportional to percentage expenditure<br>
</font>

### Comparing 'HIV/AIDS','Adult Mortality','Life_expectancy' using pairplots

In [None]:
sns.pairplot(data=data[['HIV/AIDS','Adult Mortality','Life_expectancy']], height=3)

<font color='blue'>**OBSERVATION :** <br>
    The impact of HIV/AIDS is less on people with higher Life expectancy<br>
    Adult mortality is directly proportional to HIV/AIDS<br>
</font>

### Comparing 'thinness  1-19 years','thinness 5-9 years','Life_expectancy' using pairplots

In [None]:
sns.pairplot(data=data[['thinness  1-19 years','thinness 5-9 years','Life_expectancy']], height=3)

<font color='blue'>**OBSERVATION :** <br>
    Thinness is not correlated with Life expectancy<br>
</font>

### Comparing 'infant deaths','under-five deaths','Life_expectancy' using pairplots

In [None]:
sns.pairplot(data=data[['infant deaths','under-five deaths','Life_expectancy']], height=3)

<font color='blue'>**OBSERVATION :** Infant deaths are directly proportional to under-five deaths</font>

### Boxplot of 'Status' versus 'Life_expectancy'

In [None]:
plt.figure(figsize=(15,8))
sns.boxplot(x='Status', y='Life_expectancy', data=data)

<font color='blue'>**OBSERVATION :** Developed countries have higher life expectancy</font>

### Barplot of 'Life_expectancy' group by "Country"

In [None]:
plt.figure(figsize=(15,8))
data.groupby(by="Country")['Life_expectancy'].mean().sort_values(ascending=False).head(50).plot.bar()

<font color='blue'>**OBSERVATION :** Japan, Sweden and Switzerland have highest Life expectancy</font>

### Barplot of 'Life_expectancy' group by "Country"

In [None]:
plt.figure(figsize=(15,8))
data.groupby(by="Country")['Life_expectancy'].mean().sort_values().head(50).plot.bar()

<font color='blue'>**OBSERVATION :** Life expectancy is minimum in Sierra Leone and Central African Republic</font>

### Barplot of 'Life_expectancy' of 'Developed' countries, group by "Country"

In [None]:
plt.figure(figsize=(15,8))
data[data['Status'] == 'Developed'].groupby(by="Country")['Life_expectancy'].mean().sort_values(ascending=False).head(50).plot.bar()

<font color='blue'>**OBSERVATION :** Developed countries have overall higher Life expectancy</font>

### Barplot of 'Life_expectancy' of 'Developing' countries, group by "Country"

In [None]:
plt.figure(figsize=(15,8))
data[data['Status'] == 'Developing'].groupby(by="Country")['Life_expectancy'].mean().sort_values().head(50).plot.bar()

<font color='blue'>**OBSERVATION :** Developing countries have overal lower Life expectancy</font>

### Barplot of 'Life_expectancy' , group by "Year"

In [None]:
plt.figure(figsize=(15,8))
data.groupby(by="Year")['Life_expectancy'].mean().sort_values().plot.bar()

<font color='blue'>**OBSERVATION :** Life expectancy gradually increased from 2000 to 2015</font>

### Barplot of 'Life_expectancy' of 'India' , group by "Year"

In [None]:
plt.figure(figsize=(15,8))
data[data['Country'] == 'India'].groupby(by="Year")['Life_expectancy'].mean().sort_values().plot.bar()

<font color='blue'>**OBSERVATION :** Life expectancy in India gradually increased from 2000 to 2015, however it is less than average Life expectancy of the world</font>

### Barplot of 'Life_expectancy' of 'Developed'  countries, group by "Year"

In [None]:
plt.figure(figsize=(15,8))
data[data['Status'] == 'Developed'].groupby(by="Year")['Life_expectancy'].mean().sort_values().plot.bar()

<font color='blue'>**OBSERVATION :** Life expectancy of developing countries gradually increased from 2000 to 2015</font>

### Barplot of 'Life_expectancy' of 'Developing'  countries, group by "Year"

In [None]:
plt.figure(figsize=(15,8))
data[data['Status'] == 'Developing'].groupby(by="Year")['Life_expectancy'].mean().sort_values().plot.bar()

<font color='blue'>**OBSERVATION :** Life expectancy of developing countries gradually increased from 2000 to 2015, however it is lower than Developing coutries</font>