Name: Travis Lamberte

Project: Animating a data visuization of the baby names data.

In [2]:
import pandas as pd
import os
import glob
import bar_chart_race as bcr
import matplotlib
import warnings
matplotlib.use('module://matplotlib_inline.backend_inline')
warnings.filterwarnings("ignore", category=UserWarning)

# load all files matching the pattern
files = glob.glob('baby_names/yob*.txt')

# read and combine all files
dfs = []
for file in files:
    year = int(os.path.basename(file)[3:7])  # Correct slice from filename only
    df = pd.read_csv(file, names=['name', 'sex', 'count'])
    df['year'] = year
    dfs.append(df)

# combine into one DataFrame
baby_names = pd.concat(dfs, ignore_index=True)

baby_names['rank'] = (
    baby_names
    .groupby(['year', 'sex'])['count']
    .rank(method='first', ascending=False)
)

baby_names = baby_names[baby_names['sex'] == 'F']

baby_names['name'] = baby_names['name'].str.encode('ascii', errors='ignore').str.decode('ascii')

# print(baby_names[['year', 'name', 'count', 'rank']].head(10))

ValueError: No objects to concatenate

In [None]:
# keep only top 7 names per year
top10 = baby_names[baby_names['rank'] <= 7].copy()

# keep only names that appear in the top 7 at least once
def top_n_per_year(df, n=7):
    top_names = (
        df
        .apply(lambda row: row.nlargest(n).index, axis=1)
        .explode()
        .value_counts()
        .index
    )
    return df[top_names]

pivot_df = baby_names.pivot(index='year', columns='name', values='count').fillna(0)

pivot_df = top_n_per_year(pivot_df, n=7)

pivot_df.index = pd.to_datetime(pivot_df.index, format='%Y')

In [None]:
# code for mobile device screen - portrait layout
bcr.bar_chart_race(
    df=pivot_df,
    filename='top_7_baby_names_for_girls_shorts.mp4',
    orientation='h',
    sort='desc',
    n_bars=7,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=30,
    period_length=2000,
    period_fmt='%Y',
    figsize=(5,9),
    bar_size=.45,
    cmap='dark12',
    period_label={
        'x': 0.95, 'y': 0.05,        # move year to top-left
        'ha': 'right', 'va': 'bottom',
        'size': 34, 'color': 'black'
    },
    tick_label_size=20, # graph labels font size
    bar_label_size=20, # animated ticker font size
    shared_fontdict={'family':'Arial','weight':'bold'}
)

In [None]:
# code for wide screen layout = laptop screen device

# bcr.bar_chart_race(
#     df=pivot_df,
#     filename='top_baby_names_for_girls_16_9.mp4',
#     orientation='h',
#     sort='desc',
#     n_bars=10,
#     fixed_order=False,
#     fixed_max=True,
#     steps_per_period=15,
#     period_length=1000,
#     period_fmt='%Y',
#     figsize=(16,9),
#     bar_size=.70,
#     cmap='dark12',
#     period_label={
#         'x': 0.80, 'y': 0.20,
#         'ha': 'right', 'va': 'bottom',
#         'size': 36, 'color': 'black'
#     },
#     tick_label_size=24, # graph labels font size
#     bar_label_size=24, # animated ticker font size
#     shared_fontdict={'family':'Arial','weight':'bold'}
# )