# 06_04: Plotting with Pandas

In [None]:
import math
import collections

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

%matplotlib inline

In [None]:
pd.options.display.max_rows = 16

In [None]:
gapminder = pd.read_csv('gapminder.csv')

In [None]:
gapminder.head()

In [None]:
gapminder.describe()

In [None]:
# create a new Series by doing numpy math on a DataFrame column;
# use dict-like syntax to assign the new Series to a new column in the DataFrame
gapminder['log_gdp_per_day'] = np.log10(gapminder['gdp_per_capita'] / 365.25)

In [None]:
gapminder.head()

In [None]:
gapminder_by_year = gapminder.set_index('year').sort_index()

In [None]:
gapminder_by_country = gapminder.set_index('country').sort_index()

In [None]:
gapminder_by_year.loc[1960].plot.scatter('log_gdp_per_day', 'life_expectancy')

In [None]:
# to superimpose multiple Pandas plots, save the axes object returned by the first,
# pass it as "ax" to further plots

axes = gapminder_by_year.loc[1960].plot.scatter('log_gdp_per_day', 'life_expectancy', label=1960)
gapminder_by_year.loc[2015].plot.scatter('log_gdp_per_day', 'life_expectancy', label=2015, color='C1', ax=axes)

In [None]:
axes = gapminder_by_year.loc[1960].plot.scatter('log_gdp_per_day', 'age5_surviving', label=1960)
gapminder_by_year.loc[2015].plot.scatter('log_gdp_per_day', 'age5_surviving', label=2015, color='C1', ax=axes)

In [None]:
gapminder_by_country.loc['Italy'].plot('year', 'life_expectancy')

In [None]:
gapminder_by_country.loc['Italy'].sort_values('year').plot('year', 'life_expectancy')

In [None]:
axes = gapminder_by_country.loc['Italy'].sort_values('year').plot('year', 'life_expectancy', label='Italy')
gapminder_by_country.loc['China'].sort_values('year').plot('year', 'life_expectancy', label='China', ax=axes)
gapminder_by_country.loc['United States'].sort_values('year').plot('year', 'life_expectancy', label='USA', ax=axes)

pp.axis(xmin=1900)
pp.ylabel('life expectancy')

In [None]:
gapminder.babies_per_woman.mean()

In [None]:
# compute all-countries mean of babies_per_woman after segmenting data by year;
# result is Series indexed by year
gapminder.groupby('year').babies_per_woman.mean()

In [None]:
gapminder.groupby('year').babies_per_woman.mean().plot()
pp.ylabel('babies per woman')

# with secondary_y = True, the second plot generate a second set of axis labels
gapminder.groupby('year').age5_surviving.mean().plot(secondary_y=True)
pp.ylabel('age 5 survival [%]')

In [None]:
# pivot table: segment babies_per_woman data by both year and region, then take mean 
gapminder.pivot_table('babies_per_woman', 'year', 'region')

In [None]:
gapminder.pivot_table('babies_per_woman', 'year', 'region').plot()
pp.title('babies per woman')

In [None]:
gapminder.pivot_table('age5_surviving', 'year', 'region').plot()