In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# --- 1. Internal Setup (Recreating yearly_dfs for operation) ---
file_name = "../data sources/CDC - physical activity and obesity.csv"
df = pd.read_csv(file_name)
df['YearStart'] = df['YearStart'].astype(int)
unique_years = df['YearStart'].unique()
yearly_dfs = {year: df[df['YearStart'] == year].copy() for year in unique_years}
# --- End Internal Setup ---

# 2. Access the latest year's data (2023)
latest_year = 2023
df_2023 = yearly_dfs[latest_year]

# 3. Define the question and identify the overall rate rows
OBESITY_QUESTION = 'Percent of adults aged 18 years and older who have obesity'

# The overall rate is typically identified by a 'Total' or 'Overall' stratification.
# We'll prioritize 'Total' in StratificationCategory1, then 'Overall' in Stratification1 if necessary.
df_obesity = df_2023[
    (df_2023['Question'] == OBESITY_QUESTION) &
    (df_2023['StratificationCategory1'] == 'Total')
].copy()

# If the 'Total' stratification is not found, we fall back to a common alternative,
# which is often indicated by null values in the demographic stratification columns.
if df_obesity.empty:
    df_obesity = df_2023[
        (df_2023['Question'] == OBESITY_QUESTION) &
        (df_2023['Stratification1'] == 'Total')
    ].copy()


# Final check on data: we should have one row per state.
# Clean up columns and ensure data integrity
df_obesity = df_obesity[['LocationDesc', 'Data_Value']].dropna()
df_obesity.columns = ['State', 'Obesity_Rate']

# 4. Sort the data by Obesity Rate for better visualization
df_obesity_sorted = df_obesity.sort_values('Obesity_Rate', ascending=False)

# 5. Create the horizontal bar plot
plt.figure(figsize=(10, 15)) # Tall figure for all states
sns.barplot(x='Obesity_Rate', y='State', data=df_obesity_sorted, palette='viridis')

plt.title(f'Adult Obesity Rate by State ({latest_year})', fontsize=18)
plt.xlabel('Obesity Rate (%)', fontsize=12)
plt.ylabel('State', fontsize=12)
plt.tight_layout()
# plt.savefig('obesity_rate_by_state_2023.png')
plt.close()

print("Generated obesity_rate_by_state_2023.png")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Obesity_Rate', y='State', data=df_obesity_sorted, palette='viridis')


Generated obesity_rate_by_state_2023.png
