# Step 1

## Open empty jupyter notebook and read in the dataframe that contains your gapminder data

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import imageio

In [2]:
df = pd.read_csv('../data/gapminder_total.csv', index_col=0)
df

# Step 2
## Create subset of data for one year

In [3]:
df_subset = df[df['year'] == 1950]
df_subset

# Step 4
## Plot the life expectancy vs fertility rate

In [4]:
sns.scatterplot(x='fertility', y='life expectancy', data=df_subset , color = 'red')

# Step 5
## Using matplotlib add a title and labels. Increase the plot size to make it more readable.

In [5]:
plt.figure(figsize=(8, 4))

sns.scatterplot(x='fertility', y='life expectancy', data=df_subset , color = 'red')

plt.title('Life Expectancy vs Fertility Rate')
plt.ylabel('Fertility')
plt.xlabel('Life Expectancy');


In [6]:
plt.figure(figsize=(8, 4))
sns.scatterplot(x='fertility', y='life expectancy', data=df_subset , hue='continent'
               ).set(title='Life Expectancy vs Fertility Rate');

# Step 6
## Weight the size of the scatter points to the population of each country. This can be done using the size parameter in the sns.scatterplot() function.

## Tip: the code above is correct but the magnitude of the population is astronomically larger than that of the life expectancy and fertility rate. Use some math to fix this.



In [7]:
plt.subplots(figsize=(8,6))
g = sns.scatterplot(x='life expectancy', y='fertility', data=df_subset, hue='continent',
               palette='magma_r', 
               style='continent', alpha = 0.5, size='population', sizes=(15, 325)).set(title='World\'s Life Expectancy vs Fertility Rate 1950');
plt.legend(loc='lower left',frameon= False, fontsize=8)

sns.set_style("darkgrid")


plt.annotate('Brazil',(df_subset[df_subset['country']=="Brazil"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="Brazil"]['fertility']+0.20));
plt.annotate('India',(df_subset[df_subset['country']=="India"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="India"]['fertility']+0.20));
plt.annotate('Zambia',(df_subset[df_subset['country']=="Zambia"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="Zambia"]['fertility']+0.20));
plt.annotate('New Zealand',(df_subset[df_subset['country']=="New Zealand"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="New Zealand"]['fertility']+0.20));
plt.annotate('Italy',(df_subset[df_subset['country']=="Italy"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="Italy"]['fertility']+0.20));


plt.xticks(rotation=45);



In [8]:
PATH =(f'../images')
for year in range(1950, 2017):
    df_r = df[df['year']==year]

    plt.subplots(figsize=(8,6))
    g = sns.scatterplot(x='life expectancy', y='fertility', data=df_r, hue='continent',
               palette='magma_r', 
               style='continent', alpha = 0.5, size='population', sizes=(15, 325)).set(title='World\'s Life Expectancy vs Fertility Rate (1950-2016)');
    plt.legend(loc='lower left',frameon= False, fontsize=8)

    sns.set_style("darkgrid")


    plt.annotate('Brazil',(df_subset[df_subset['country']=="Brazil"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="Brazil"]['fertility']+0.20));
    plt.annotate('India',(df_subset[df_subset['country']=="India"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="India"]['fertility']+0.20));
    plt.annotate('Zambia',(df_subset[df_subset['country']=="Zambia"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="Zambia"]['fertility']+0.20));
    plt.annotate('New Zealand',(df_subset[df_subset['country']=="New Zealand"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="New Zealand"]['fertility']+0.20));
    plt.annotate('Italy',(df_subset[df_subset['country']=="Italy"]['life expectancy'],    # these are coordinates for x
                      df_subset[df_subset['country']=="Italy"]['fertility']+0.20));


    plt.xticks(rotation=45);
    
    filename = f'plot_{year}.png'
    plt.savefig(f'{PATH}/plot_{year}.png', dpi=90)
    plt.close()

    

In [9]:
images = []

for year in range(1950, 2017):
    filename = f'../images/plot_{year}.png'
    images.append(imageio.imread(filename))

    imageio.mimsave('my_progression.gif', images, duration=8)

In [10]:
from IPython.display import Image
Image(filename='my_progression.gif')

# Histogram

In [11]:
PATH =(f'../images')
for year in range(1950, 2017):
    df_r = df[df['year']==year]

    plt.subplots(figsize=(12,10))
    sns.histplot(data= df_r, x="population", y = "continent", hue='continent', bins=5)


# set the x and y labels
    plt.xlabel("Population")
    plt.ylabel("Continent")



    plt.xticks(rotation=45);
    
    filename = f'plot_{year}.png'
    plt.savefig(f'{PATH}/plot_{year}.png', dpi=90)
    plt.close()



In [12]:
images = []

for year in range(1950, 2017):
    filename = f'../images/plot_{year}.png'
    images.append(imageio.imread(filename))

    imageio.mimsave('second_progression.gif', images, duration=8)

In [13]:
from IPython.display import Image
Image(filename='second_progression.gif')