In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
from scipy.stats import linregress
from scipy import stats
import pingouin as pg # Install pingouin stats package (pip install pingouin)
import seaborn as sns # Install seaborn data visualization library (pip install seaborn)
from scipy.stats import pearsonr

yr_list= [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015]

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load
data_to_load = "data.csv"

# Read the Population Health Data
health_data_pd = pd.read_csv(data_to_load)

# Display the data table for preview
health_data_pd

In [None]:
# Extracting data from 2005-2015

health_data_decade_df = health_data_pd[['Country Name','Country Code','Indicator Name','Indicator Code',
                                        '2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']]
health_data_decade_df


In [None]:
# Extracting USA's data

countries_sorted_df = health_data_decade_df.groupby('Country Name')
countries_sorted_df
usa_df = countries_sorted_df.get_group('United States') 
usa_df

In [None]:
# Dropping NaN values

dropped_usa_df = usa_df.dropna() 
dropped_usa_df

In [None]:
# Extracting health indicators of interest

decade_health_USA_T = dropped_usa_df.iloc[:, 2:].T
decade_health_USA_T.columns = decade_health_USA_T.iloc[1,:]
decade_health_USA_T = decade_health_USA_T.iloc[2:, :]

decade_health_USA_T_codes = decade_health_USA_T[['SH.IMM.HEPB', 'SH.IMM.MEAS', 'SH.STA.ACSN',
                                                 'SP.DYN.IMRT.IN','SH.H2O.SAFE.ZS','SP.POP.GROW','SP.POP.TOTL','NY.GNP.PCAP.CD']]
decade_health_USA_T_codes

In [None]:
# Plotting Immunization vs Infant Mortality (USA)

fig, (ax1) = plt.subplots(1, sharex=True)
fig.suptitle('Measles Immunization Rate Vs. \n Infant Mortality Rate from 2005-2015 (USA)', fontsize=14, fontweight="bold")

yr_list= [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015]

x = yr_list
y = [ x[0] for x in decade_health_USA_T_codes[['SP.DYN.IMRT.IN']].values]

ax1.set_xlim(min(yr_list)-.5, max(yr_list)+.5)
ax1.plot(x, y, linewidth=1, marker="o")

x = yr_list
y = [ x[0] for x in decade_health_USA_T_codes[['SH.IMM.MEAS']].values]

ax1.plot(x, y, linewidth=1, marker="o", color="r")
ax1.set_ylabel("Rate (per 100)")
ax1.set_xlabel("Year")

plt.savefig("measles_usa.png")

In [None]:
# Calculating r value for Immunization vs Infant Mortality (USA)

import pingouin as pg

x = [ x[0] for x in decade_health_USA_T_codes[['SP.DYN.IMRT.IN']].values]
y = [ x[0] for x in decade_health_USA_T_codes[['SH.IMM.MEAS']].values]

print(pg.corr(x, y))

In [None]:
# Calculating r value and plotting graphs for GNP vs Infant Mortality (USA)

sns.set(style='white', font_scale=1.2)

x = [ x[0] for x in decade_health_USA_T_codes[['NY.GNP.PCAP.CD']].values]
y = [ x[0] for x in decade_health_USA_T_codes[['SH.IMM.MEAS']].values]

print(pg.corr(x, y))

g = sns.JointGrid(x, y)
g = g.plot_joint(sns.regplot, color="xkcd:muted blue")
g = g.plot_marginals(sns.distplot, kde=False, bins=12, color="xkcd:bluey grey")
g.ax_joint.text(150, 95, 'r = 0.45, p < .001', fontstyle='italic')
plt.tight_layout()

plt.savefig("gnp_usa.png")

In [None]:

sns.set(style='white', font_scale=1.2)

x = [ x[0] for x in decade_health_germany_T_codes[['NY.GNP.PCAP.CD']].values]
y = [ x[0] for x in decade_health_germany_T_codes[['SH.IMM.MEAS']].values]
print(pg.corr(x, y))

g = sns.JointGrid(x, y)
g = g.plot_joint(sns.regplot, color="xkcd:muted blue")
g = g.plot_marginals(sns.distplot, kde=False, bins=12, color="xkcd:bluey grey")
g.ax_joint.text(150, 95, 'r = 0.45, p < .001', fontstyle='italic')
plt.tight_layout()

plt.savefig("gnp_germany.png")

In [None]:

sns.set(style='white', font_scale=1.2)

x = [ x[0] for x in decade_health_china_T_codes[['NY.GNP.PCAP.CD']].values]
y = [ x[0] for x in decade_health_china_T_codes[['SH.IMM.MEAS']].values]
print(pg.corr(x, y))

g = sns.JointGrid(x, y)
g = g.plot_joint(sns.regplot, color="xkcd:muted blue")
g = g.plot_marginals(sns.distplot, kde=False, bins=12, color="xkcd:bluey grey")
g.ax_joint.text(150, 95,'r = 0.45,  p < .001', fontstyle='italic')
plt.tight_layout()

plt.savefig("gnp_china.png")

In [None]:

sns.set(style='white', font_scale=1.2)

x = [ x[0] for x in decade_health_brazil_T_codes[['NY.GNP.PCAP.CD']].values]
y = [ x[0] for x in decade_health_brazil_T_codes[['SH.IMM.MEAS']].values]
print(pg.corr(x, y))

g = sns.JointGrid(x, y)
g = g.plot_joint(sns.regplot, color="xkcd:muted blue")
g = g.plot_marginals(sns.distplot, kde=False, bins=12, color="xkcd:bluey grey")
g.ax_joint.text(150, 95, 'r = 0.45, p < .001', fontstyle='italic')
plt.tight_layout()

plt.savefig("gnp_brazil.png")

In [None]:


fig, (ax1) = plt.subplots(1, sharex=True)
fig.suptitle("Immunization Rates from 2005-2015 (USA)", fontsize=16, fontweight="bold")

yr_list= [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015]

x = yr_list
y = [ x[0] for x in decade_health_USA_T_codes[['SH.IMM.HEPB']].values]

print(x)
print(y)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
values = [intercept + slope * data for data in x]
print('r-squared:', r_value**2)

print(values)

ax1.set_xlim(min(yr_list)-.5, max(yr_list)+.5)
ax1.plot(x, y, linewidth=1, marker="o")




x = yr_list
y = [ x[0] for x in decade_health_USA_T_codes[['SH.IMM.MEAS']].values]

print(x)
print(y)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
values = [intercept + slope * data for data in x]
print('r-squared:', r_value**2)

print(values)

ax1.plot(x, y, linewidth=1, marker="o", color="r")
ax1.set_ylabel("Measles Imm. Rate")
ax1.set_xlabel("Year")

In [None]:
germany_df = countries_sorted_df.get_group('Germany')
germany_df

In [None]:
dropped_germany_df = germany_df.dropna() 
dropped_germany_df

In [None]:
decade_health_germany_T = dropped_germany_df.iloc[:, 2:].T
decade_health_germany_T.columns = decade_health_germany_T.iloc[1,:]
decade_health_germany_T = decade_health_germany_T.iloc[2:, :]
decade_health_germany_T

In [None]:
decade_health_germany_T_codes = decade_health_germany_T[['SH.IMM.HEPB', 'SH.IMM.MEAS', 'SH.STA.ACSN','SP.DYN.IMRT.IN','SH.H2O.SAFE.ZS','SP.POP.GROW','SP.POP.TOTL','NY.GNP.PCAP.CD']]
decade_health_germany_T_codes

In [None]:
fig, (ax1) = plt.subplots(1, sharex=True)
fig.suptitle('Measles Immunization Rate Vs. \n Infant Mortality Rate from 2005-2015 (Germany)', fontsize=14, fontweight="bold")

yr_list= [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015]

x = yr_list
y = [ x[0] for x in decade_health_germany_T_codes[['SP.DYN.IMRT.IN']].values*10]

print(x)
print(y)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
values = [intercept + slope * data for data in x]
print('r-squared:', r_value**2)

print(values)

ax1.set_xlim(min(yr_list)-.5, max(yr_list)+.5)
ax1.plot(x, y, linewidth=1, marker="o")

x = yr_list
y = [ x[0] for x in decade_health_germany_T_codes[['SH.IMM.MEAS']].values]

print(x)
print(y)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
values = [intercept + slope * data for data in x]
print('r-squared:', r_value**2)

print(values)

ax1.plot(x, y, linewidth=1, marker="o", color="r")
ax1.set_ylabel("Rate (per 100)")
ax1.set_xlabel("Year")

plt.savefig("measles_germany.png")

In [None]:

x = [ x[0] for x in decade_health_germany_T_codes[['SP.DYN.IMRT.IN']].values]
y = [ x[0] for x in decade_health_germany_T_codes[['SH.IMM.MEAS']].values]
print(pg.corr(x, y))

In [None]:
china_df = countries_sorted_df.get_group('China') 
china_df

In [None]:
dropped_china_df = china_df.dropna() 
dropped_china_df

In [None]:
decade_health_china_T = dropped_china_df.iloc[:, 2:].T
decade_health_china_T.columns = decade_health_china_T.iloc[1,:]
decade_health_china_T = decade_health_china_T.iloc[2:, :]
decade_health_china_T

In [None]:
decade_health_china_T_codes = decade_health_china_T[['SH.IMM.HEPB', 'SH.IMM.MEAS', 'SH.STA.ACSN','SP.DYN.IMRT.IN','SH.H2O.SAFE.ZS','SP.POP.GROW','SP.POP.TOTL','NY.GNP.PCAP.CD']]
decade_health_china_T_codes

In [None]:
decade_health_china_T_codes.iloc[:,1]

In [None]:

fig, (ax1) = plt.subplots(1, sharex=True)
fig.suptitle('Measles Immunization Rate Vs. \n Infant Mortality Rate from 2005-2015 (China)', fontsize=14, fontweight="bold")

yr_list= [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015]

x = yr_list
y = [ x[0] for x in decade_health_china_T_codes[['SP.DYN.IMRT.IN']].values]

print(x)
print(y)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
values = [intercept + slope * data for data in x]
print('r-squared:', r_value**2)

print(values)

ax1.set_xlim(min(yr_list)-.5, max(yr_list)+.5)
ax1.plot(x, y, linewidth=1, marker="o")

x = yr_list
y = [ x[0] for x in decade_health_china_T_codes[['SH.IMM.MEAS']].values]

print(x)
print(y)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
values = [intercept + slope * data for data in x]
print('r-squared:', r_value**2)

print(values)

ax1.plot(x, y, linewidth=1, marker="o", color="r")
ax1.set_ylabel("Rate (per 100)")
ax1.set_xlabel("Year")

plt.savefig("measles_china.png")

In [None]:

x = [ x[0] for x in decade_health_china_T_codes[['SP.DYN.IMRT.IN']].values]
y = [ x[0] for x in decade_health_china_T_codes[['SH.IMM.MEAS']].values]
print(pg.corr(x, y))

In [None]:
brazil_df = countries_sorted_df.get_group('Brazil') 
brazil_df

In [None]:
dropped_brazil_df = brazil_df.dropna() 
dropped_brazil_df

In [None]:
decade_health_brazil_T = dropped_brazil_df.iloc[:, 2:].T
decade_health_brazil_T.columns = decade_health_brazil_T.iloc[1,:]
decade_health_brazil_T = decade_health_brazil_T.iloc[2:, :]
decade_health_brazil_T

In [None]:
decade_health_brazil_T_codes = decade_health_brazil_T[['SH.IMM.HEPB', 'SH.IMM.MEAS', 'SH.STA.ACSN','SP.DYN.IMRT.IN','SH.H2O.SAFE.ZS','SP.POP.GROW','SP.POP.TOTL','NY.GNP.PCAP.CD']]
decade_health_brazil_T_codes

In [None]:

fig, (ax1) = plt.subplots(1, sharex=True)
fig.suptitle('Measles Immunization Rate Vs. \n Infant Mortality Rate from 2005-2015 (Brazil)', fontsize=14, fontweight="bold")

yr_list= [2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015]

x = yr_list
y = [ x[0] for x in decade_health_brazil_T_codes[['SP.DYN.IMRT.IN']].values]

print(x)
print(y)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
values = [intercept + slope * data for data in x]
print('r-squared:', r_value**2)

print(values)

ax1.set_xlim(min(yr_list)-.5, max(yr_list)+.5)
ax1.plot(x, y, linewidth=1, marker="o")

x = yr_list
y = [ x[0] for x in decade_health_brazil_T_codes[['SH.IMM.MEAS']].values]

print(x)
print(y)

slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
values = [intercept + slope * data for data in x]
print('r-squared:', r_value**2)

print(values)

ax1.plot(x, y, linewidth=1, marker="o", color="r")
ax1.set_ylabel("Rate (per 100)")
ax1.set_xlabel("Year")

plt.savefig("measles_brazil.png")

In [None]:

x = [ x[0] for x in decade_health_brazil_T_codes[['SP.DYN.IMRT.IN']].values]
y = [ x[0] for x in decade_health_brazil_T_codes[['SH.IMM.MEAS']].values]
print(pg.corr(x, y))