# 07_03: Name Popularity

In [None]:
import math
import collections

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

%matplotlib inline

In [None]:
pd.options.display.max_rows = 6

In [None]:
allyears = pd.read_csv('allyears.csv.gz')

In [None]:
allyears

In [None]:
allyears_indexed = allyears.set_index(['sex','name','year']).sort_index()

In [None]:
allyears_indexed.loc[('F','Mary')]

In [None]:
pp.plot(allyears_indexed.loc[('F','Mary')])

In [None]:
# normalize F/Mary time series by the total number of births each year
pp.plot(allyears_indexed.loc[('F','Mary')] / allyears.groupby('year').sum())

In [None]:
# plot number of sex/name babies as a function of year

def plotname(sex, name):
    data = allyears_indexed.loc[(sex,name)]
    
    pp.plot(data.index, data.values, label=name)
    pp.axis(xmin=1880, xmax=2018)

In [None]:
# combine several "plotname()" plots for given sex and list of names

def comparenames(sex, names):
    pp.figure(figsize=(12,2.5))

    for name in names:
        plotname(sex, name)

    pp.legend()

In [None]:
comparenames('M', ['Michael','John','David','Martin'])

In [None]:
comparenames('F', ['Emily','Anna','Claire','Elizabeth'])

In [None]:
claires = ['Claire','Clare','Clara','Chiara','Ciara']

In [None]:
comparenames('F', claires)

In [None]:
allyears_indexed.loc[('F',claires),:]

In [None]:
# "pivot" the third level of the multiindex (years) to create a row of columns;
# result is names (rows) x years (columns)
allyears_indexed.loc[('F',claires),:].unstack(level=2)

In [None]:
# "pivot" the third level of the multiindex (names) to create a row of columns
allyears_indexed.loc[('F',claires),:].unstack(level=1)

In [None]:
# make a stacked (cumulative) area plot using names x years table 

pp.figure(figsize=(12,2.5))
pp.stackplot(range(1880,2019),
             allyears_indexed.loc[('F',claires),:].unstack(level=2));

In [None]:
# fix stacked plot by filling NaNs with zeros, adding labels, setting axis range

pp.figure(figsize=(12,2.5))
pp.stackplot(range(1880,2019),
             allyears_indexed.loc[('F',claires),:].unstack(level=2).fillna(0),
             labels=claires);

pp.legend(loc='upper left')
pp.axis(xmin=1880, xmax=2018);