In [None]:
# Life expectancy at birth indicates the number of years a newborn infant would live if 
# prevailing patterns of mortality at the time of its birth were to stay the same throughout its life.

In [None]:
#Dependencies

import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import linregress
import seaborn as sns
from scipy import stats

In [None]:
#Import csv file - Life Expectancy.  This is for all countries 2000-2015
# https://www.kaggle.com/kumarajarshi/life-expectancy-who/version/1

#Import csv file - Life Expectancy since 1970 (44 countries)
csv_path = "Resources/Life_Expectancy.csv"
csv_path_2 = "Resources/GDP_1970_to_2018.csv"
csv_path_3 = "Resources/Life_1970.csv"

life_exp = pd.read_csv(csv_path)
gdp_data = pd.read_csv(csv_path_2)
life_exp_1970 = pd.read_csv(csv_path_3)


In [None]:
#clean-up csv

reduced_life = life_exp[["Country","Year","Status","Life expectancy "]]
cleaned_life = reduced_life.rename(columns={"Life expectancy ":"Life Expectancy", "Country":"Country Name"})
cleaned_life.head()

In [None]:
##life expectancy over time 2000-2015 with all countries

cleaned_life.groupby('Year')['Life Expectancy'].mean().plot(kind='line')
plt.title("Life Expectancy Between 2000-2015")
plt.xlabel("Year")
plt.ylabel("Life Expectancy (Age in Years)")

#plt.savefig("LifeExp_overtime.png", bbox_inches = 'tight', pad_inches = 1.0)
plt.show()

In [None]:
#Get mean life expectancy for all countries

cleaned_life.groupby(["Year"]).mean()

In [None]:
#Add GDP mean

gdp_summary_df = gdp_data.describe()
gdp_summary_df = gdp_summary_df.T
gdp_summary_df = gdp_summary_df.reset_index()
gdp_summary_df = gdp_summary_df.rename(columns={'index':'Year'})
gdp_summary_df.dropna()
gdp_summary_df.head()

In [None]:
#Update life expectancy file so that Year is not an index and is able to merge with GDP file

new_life = cleaned_life.groupby(["Year"]).mean()
new_life.reset_index(level=0, inplace=True)
new_life.head()


In [None]:
#Update gdp summary file so that the Year column is integer and can merge with life expectancy

gdp_summary_df['Year']=gdp_summary_df['Year'].astype(int)

In [None]:
#Merge life expectancy and gdp file by year

life_gdp_merge = pd.merge(new_life, gdp_summary_df, on='Year')
life_gdp_merge.head()

In [None]:
#Graph average life expectancy over time versus average gdp
x_values = life_gdp_merge['Life Expectancy']
y_values = life_gdp_merge['mean']
y_values = y_values.fillna(0)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(69,20),fontsize=14,color="red")
plt.title("Life Expectancy versus GDP")
plt.xlabel("Life Expectancy")
plt.ylabel("GDP (Billion USD)")
print(f"The r-squared is: {rvalue}")

#plt.savefig("LifeExp_vs_GDP.png", bbox_inches = 'tight', pad_inches = 1.0)

plt.show()

In [None]:
#Optional add (for fun)

age = 90
print(f"On average, for the life expectancy of {age} years old, the country GDP would have to be {round(slope * age + intercept,2)} billion USD.")
age2 = 100
print(f"On average, for the life expectancy of {age2} years old, the country GDP would have to be {round(slope * age2 + intercept,2)}. billion USD")

In [None]:
#Life Expectancy over time for 44 countries listed in life exp file from 1970-2017.

life_exp_1970.groupby('TIME')['Value'].mean().plot(kind='line')
plt.title("Life Expectancy Over Time")
plt.xlabel("Year")
plt.ylabel("Life Expectancy (in Years)")

#plt.savefig("LifeExp_all.png", bbox_inches = 'tight', pad_inches = 1.0)
plt.show()

In [None]:
#Read in life expectancy from 1970 file.  File only contains 44 countries, so pulling to help show
#USA trend

usa_1970 = life_exp_1970.loc[life_exp_1970['LOCATION'] =='USA']
usa_1970.head()

In [None]:
#life expectancy over time for both men and women in USA 1970-2017

total_1970 = usa_1970.loc[usa_1970['SUBJECT'] =='TOT']

total_1970.plot(x="TIME", y="Value", kind='line', legend=False)
plt.title("Life Expectancy in USA from 1970")
plt.xlabel("Year")
plt.ylabel("Life Expectancy (in Years)")
#plt.savefig("LifeExp_USA.png", bbox_inches = 'tight', pad_inches = 1.0)

plt.show()

In [None]:
#Filter out for the year 2000 on the life exp file 2000-2015

year_2000 = cleaned_life.loc[life_exp['Year'] ==2000]
year_2000.head()

In [None]:
#Merge life expectancy for the year 2000 and gdp data

merge_2000_gdp = pd.merge(year_2000, gdp_data, on='Country Name')
merge_2000_gdp


In [None]:
#Filtering out USA and Japan since GDP are outliers

merge_2000_gdp = merge_2000_gdp[merge_2000_gdp['Country Name'] != 'United States of America']
merge_2000_gdp = merge_2000_gdp[merge_2000_gdp['Country Name'] != 'Japan']

In [None]:
#Life expectancy versus GDP in 2000

x_values = merge_2000_gdp['Life Expectancy']
y_values = merge_2000_gdp['2000']
y_values = y_values.fillna(0)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, alpha=0.5)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(40,50),fontsize=14,color="red")
plt.title("Life Expectancy versus GDP in 2000")
plt.xlabel("Life Expectancy (in Years)")
plt.ylabel("GDP (in Billions USD)")
print(f"The r-squared is: {rvalue}")

print(f"Slope is: {slope}")

#plt.savefig("LifeExp_vs_GDP_2000.png", bbox_inches = 'tight', pad_inches = 1.0)

plt.show()