Name: Travis Lamberte

Project: Animating a data visuization of the baby names data.

In [1]:
import pandas as pd
import os
import glob
import warnings
import bar_chart_race as bcr
import matplotlib
matplotlib.use('module://matplotlib_inline.backend_inline')

# ignore all UserWarnings
warnings.filterwarnings("ignore", category=UserWarning)

# load all files matching the pattern
files = glob.glob('babynames/yob*.txt')

# read and combine all files
dfs = []
for file in files:
    year = int(os.path.basename(file)[3:7])  # correct slice from filename only
    df = pd.read_csv(file, names=['name', 'sex', 'count'])
    df['year'] = year
    dfs.append(df)

# combine into one DataFrame
baby_names = pd.concat(dfs, ignore_index=True)

baby_names = baby_names[baby_names['sex'] == 'M']

baby_names['rank'] = (
    baby_names
    .groupby(['year', 'sex'])['count']
    .rank(method='first', ascending=False)
)

baby_names['name'] = baby_names['name'].str.encode('ascii', errors='ignore').str.decode('ascii')

print(baby_names[['year', 'name', 'count', 'rank']].head(10))

     year     name  count  rank
942  1880     John   9655   1.0
943  1880  William   9532   2.0
944  1880    James   5927   3.0
945  1880  Charles   5348   4.0
946  1880   George   5126   5.0
947  1880    Frank   3242   6.0
948  1880   Joseph   2632   7.0
949  1880   Thomas   2534   8.0
950  1880    Henry   2444   9.0
951  1880   Robert   2415  10.0


In [2]:
# keep only top 10 names per year
top10 = baby_names[baby_names['rank'] <= 10].copy()

# keep only names that appear in the top 10 at least once
def top_n_per_year(df, n=10):
    top_names = (
        df
        .apply(lambda row: row.nlargest(n).index, axis=1)
        .explode()
        .value_counts()
        .index
    )
    return df[top_names]

# for bcr arg
pivot_df = baby_names.pivot(index='year', columns='name', values='count').fillna(0)

pivot_df = top_n_per_year(pivot_df, n=10)

pivot_df.index = pd.to_datetime(pivot_df.index, format='%Y')

In [3]:
# code for wide screen layout = laptop screen device

bcr.bar_chart_race(
    df=pivot_df,
    filename='top_baby_names_for_boys_16_9.mp4',
    orientation='h',
    sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=15,
    period_length=1000,
    period_fmt='%Y',
    figsize=(16,9),
    bar_size=.70,
    cmap='dark12',
    period_label={
        'x': 0.80, 'y': 0.20,
        'ha': 'right', 'va': 'bottom',
        'size': 36, 'color': 'black'
    },
    tick_label_size=24, # graph labels font size
    bar_label_size=24, # animated ticker font size
    shared_fontdict={'family':'Arial','weight':'bold'}
)


In [4]:
# code for mobile device screen - portrait layout
# bcr.bar_chart_race(
#     df=pivot_df,
#     filename='top_baby_names_for_boys_shorts.mp4',
#     orientation='h',
#     sort='desc',
#     n_bars=10,
#     fixed_order=False,
#     fixed_max=True,
#     steps_per_period=15,
#     period_length=1000,
#     period_fmt='%Y',
#     # title='Top Female Names',
#     # title_size=28,
#     # shared_fontdict={'family':'Arial','weight':'bold'},
#     figsize=(5,10),
#     bar_size=.80,
#     cmap='dark12',
#     period_label={
#         'x': 0.80, 'y': 0.20,        # move year to top-left
#         'ha': 'right', 'va': 'bottom',
#         'size': 34, 'color': 'black'
#     },
#     tick_label_size=20, # graph labels font size
#     bar_label_size=20, # animated ticker font size
#     shared_fontdict={'family':'Arial','weight':'bold'}
# )