# Introduction

This project explores the relationship between a country's economic output and the life expectancy of its population.

We will prepare and analyze data, create visualizations, and interpret the results.

Key questions include:

+ Has life expectancy changed over time in six countries?
+ Has GDP changed over time in these countries?
+ Is there a link between GDP and life expectancy?
+ What is the average life expectancy in these countries?
+ How is life expectancy distributed?

**Data sources**

- GDP: [World Bank](https://data.worldbank.org/indicator/NY.GDP.MKTP.CD) and OECD National Accounts.
- Life Expectancy: [World Health Organization](http://apps.who.int/gho/data/node.main.688)


In [None]:
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline

# Load and inspect data
df = pd.read_csv("all_data.csv")
print(df.head())
print(f"Shape: {df.shape}")
print(f"Countries: {df.Country.unique()}")
print(f"Years: {df.Year.unique()}")

# Rename column for convenience
df = df.rename(columns={"Life expectancy at birth (years)": "LEABY"})
print(df.head())

# Distribution plots
def plot_distribution(data, column, xlabel):
    plt.figure(figsize=(8,6))
    sns.histplot(data[column], rug=True, kde=False)
    plt.xlabel(xlabel)
    plt.show()

plot_distribution(df, "GDP", "GDP in Trillions of U.S. Dollars")
plot_distribution(df, "LEABY", "Life expectancy at birth (years)")

# Country means
df_means = df.drop("Year", axis=1).groupby("Country", as_index=False).mean()

def plot_bar(data, x, y, xlabel):
    plt.figure(figsize=(8,6))
    sns.barplot(x=x, y=y, data=data)
    plt.xlabel(xlabel)
    plt.show()

plot_bar(df_means, "LEABY", "Country", "Life expectancy at birth (years)")
plot_bar(df_means, "GDP", "Country", "GDP in Trillions of U.S. Dollars")

# Violin plots
fig, axes = plt.subplots(1, 2, sharey=True, figsize=(15, 5))
sns.violinplot(ax=axes[0], x="GDP", y="Country", data=df)
axes[0].set_xlabel("GDP in Trillions of U.S. Dollars")
sns.violinplot(ax=axes[1], x="LEABY", y="Country", data=df)
axes[1].set_xlabel("Life expectancy at birth (years)")
plt.show()

# Line plot
plt.figure(figsize=(8,6))
sns.lineplot(x="Year", y="LEABY", hue="Country", data=df)
plt.legend(loc="center left", bbox_to_anchor=(1, 0.5), ncol=1)
plt.ylabel("Life expectancy at birth (years)")
plt.show()

# Scatter plot
plt.figure(figsize=(8,6))
sns.scatterplot(x="LEABY", y="GDP", hue="Country", data=df)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), ncol=1)
plt.show()

# Facet grid
graph = sns.FacetGrid(df, col="Country", col_wrap=3, hue="Country", sharey=False, sharex=False)
graph.map(sns.scatterplot, "LEABY", "GDP").add_legend()
graph.set_axis_labels("Life expectancy at birth (years)", "GDP in Trillions of U.S. Dollars")
plt.show()
