# Table of Contents
 <p>

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd

years = range(2000, 2011)

pieces = []
columns = ['name', 'sex', 'births']

for year in years:
    path = 'yob{}.txt'.format(year)
    df = pd.read_csv(path, names=columns)
    df['year'] = year
    pieces.append(df)
    # Concatenate everything into a single DataFrame
    names = pd.concat(pieces, ignore_index=True)

# Aggregate data at year and sex level

In [None]:
total_births = names.pivot_table('births', index='year', columns=['sex'], aggfunc=sum)
total_births.plot(title='Total births by sex and year')

# Add a prop column

In [None]:
def add_prop(group):
    births = group.births.astype(float)
    
    group['prop'] = births / births.sum()
    return group

names = names.groupby(['year', 'sex']).apply(add_prop)

# sanity check to verify the prop column sums to 1 within all the groups
np.allclose(names.groupby(['year', 'sex']).prop.sum(), 1)


# Get top 1000 names for each sex/year combination

In [None]:
def get_top1000(group):
    return group.sort_values(by='births', ascending=False)[:1000]

grouped = names.groupby(['year', 'sex'])
top1000 = grouped.apply(get_top1000)

top1000