In [1]:
import pandas as pd
import bar_chart_race as bcr
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Load data
df = pd.read_csv("suicide_data.csv")

print(df.shape)
df.head()
print(df["Race"].unique())

(177, 9)
['American Indian or Alaska Native' 'Asian or Pacific Islander'
 'Black or African American' 'White' nan]


In [2]:
# Define mapping for clarity
race_map = {
    "Black or African American": "Black",
    "Asian or Pacific Islander": "Pacific Islander",
    "American Indian or Alaska Native": "Native American",
    "White": "White",
    "Hispanic or Latino": "Hispanic"
}

# Apply mapping
df["Race"] = df["Race"].replace(race_map)

# Replace NaN races with "Other"
df["Race"] = df["Race"].fillna("Other")

# Convert Age Adjusted Rate from per 100k to per 10M
df["Rate_per_10M"] = df["Age Adjusted Rate"] * 100

# Convert Year to integer (no decimals)
df = df.dropna(subset=["Year"])
df["Year"] = df["Year"].astype(int)

pivot = df.pivot_table(
    index="Year",
    columns="Race",
    values="Rate_per_10M",
    aggfunc="mean"
).sort_index()

pivot.index = pivot.index.astype(int)

# Optional: sort by Year
pivot = pivot.sort_index()

print(pivot.head())
print(df["Year"].dtype)

Race  Black  Native American   Other  Pacific Islander   White
Year                                                          
1999  560.0           1010.0  1050.0             600.0  1130.0
2000  550.0            980.0  1040.0             550.0  1130.0
2001  550.0           1050.0  1070.0             540.0  1170.0
2002  530.0           1000.0  1090.0             530.0  1210.0
2003  520.0            950.0  1080.0             560.0  1190.0
int32


In [3]:
def format_period_label(values, ranks):
    year = values.name
    label = str(int(year))  # just show the year as an integer
    return {
        "x": 0.95,   # horizontal position (axes fraction)
        "y": 0.05,   # vertical position (axes fraction)
        "s": label,  # the text string
        "ha": "right",
        "va": "bottom",
        "fontsize": 36
    }

In [4]:
# Assuming your pivoted dataframe looks like:
# index = Year
# columns = ['Native American', 'Pacific Islander', 'Black', 'White', 'Other', 'Hispanic']

bcr.bar_chart_race(
    period_label=False,
    df=pivot,
    filename="suicide_rates_by_race.mp4",
    orientation='h',
    sort='desc',
    n_bars=6,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=20,
    interpolate_period=True,
    period_length=2000,
    title='Suicide Rates by Race/Ethnicity (1999â€“2020)',
    bar_size=.95,
    cmap='dark12',
    period_summary_func=format_period_label
)
