# Try 1.5.3: Stacked Bar Charts

This notebook demonstrates creating stacked bar charts using matplotlib.

To create a stacked bar chart, the `plt.subplots()` of matplotlib is used. Multiple plots share the same set of axes.

**Example**: The code below plots the number of automobile collisions related to speeding and the total number of collisions in the same set of axes.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style for better-looking plots
sns.set_style("whitegrid")

## Example 1: Automobile Collisions by Year

Let's create a stacked bar chart showing speeding-related vs. total collisions.

In [None]:
# Create sample collision data
years = [2014, 2015, 2016, 2017, 2018]
total_collisions = [35000, 36500, 37800, 38200, 39500]
speeding_collisions = [8500, 9100, 9500, 9800, 10200]

# Calculate non-speeding collisions
other_collisions = [total - speeding for total, speeding in zip(total_collisions, speeding_collisions)]

print("Collision Data:")
collision_df = pd.DataFrame({
    'Year': years,
    'Speeding': speeding_collisions,
    'Other': other_collisions,
    'Total': total_collisions
})
print(collision_df)

In [None]:
# Create stacked bar chart
fig, ax = plt.subplots(figsize=(10, 6))

# Create the bars
ax.bar(years, other_collisions, label='Other Causes', color='#1f77b4')
ax.bar(years, speeding_collisions, bottom=other_collisions, label='Speeding-Related', color='#ff7f0e')

# Customize the chart
ax.set_title('Automobile Collisions by Year', fontsize=14, fontweight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Number of Collisions', fontsize=12)
ax.legend()
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print("\nInterpretation: The stacked bars show both the total collisions and")
print("the breakdown between speeding-related and other causes.")

## Example 2: Preterm Births by Age Group

Let's recreate a chart similar to the CDC preterm births example from the textbook.

In [None]:
# Create preterm birth data (percentages)
age_groups = ['All ages', 'Under 20', '20-29', '30-39', '40 and over']
years = ['2014', '2015', '2016']

# Data for early and late preterm births (in percentages)
data_2014 = {
    'Early': [2.93, 3.15, 2.85, 2.98, 3.52],
    'Late': [6.63, 7.10, 6.45, 6.85, 7.82]
}

data_2015 = {
    'Early': [2.76, 3.05, 2.65, 2.85, 3.35],
    'Late': [6.87, 7.25, 6.70, 7.10, 8.15]
}

data_2016 = {
    'Early': [2.75, 3.20, 2.68, 2.82, 3.45],
    'Late': [7.10, 7.20, 6.68, 7.30, 10.37]
}

print("Preterm Birth Percentages by Age Group and Year")

In [None]:
# Create stacked bar chart for preterm births
fig, ax = plt.subplots(figsize=(14, 6))

x = np.arange(len(age_groups))
width = 0.25

# Plot for each year
# 2014
ax.bar(x - width, data_2014['Late'], width, label='Late Preterm 2014', color='#c7e9c0')
ax.bar(x - width, data_2014['Early'], width, bottom=data_2014['Late'], label='Early Preterm 2014', color='#74c476')

# 2015
ax.bar(x, data_2015['Late'], width, label='Late Preterm 2015', color='#bdd7e7')
ax.bar(x, data_2015['Early'], width, bottom=data_2015['Late'], label='Early Preterm 2015', color='#6baed6')

# 2016
ax.bar(x + width, data_2016['Late'], width, label='Late Preterm 2016', color='#fcbba1')
ax.bar(x + width, data_2016['Early'], width, bottom=data_2016['Late'], label='Early Preterm 2016', color='#fc9272')

# Customize the chart
ax.set_title('Preterm Birth Rates by Mother\'s Age Group (2014-2016)', fontsize=14, fontweight='bold')
ax.set_xlabel('Age Group', fontsize=12)
ax.set_ylabel('Percentage (%)', fontsize=12)
ax.set_xticks(x)
ax.set_xticklabels(age_groups)
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

## Example 3: Relative Frequency Stacked Bar Chart

Let's create a relative frequency stacked bar chart showing Massachusetts state spending.

In [None]:
# Create Massachusetts spending data (relative frequencies)
years_ma = [2000, 2005, 2010, 2013]
healthcare_pct = [23, 28, 35, 41]
other_spending_pct = [77, 72, 65, 59]

print("Massachusetts State Spending (% of Total):")
spending_df = pd.DataFrame({
    'Year': years_ma,
    'Healthcare': healthcare_pct,
    'Other Spending': other_spending_pct
})
print(spending_df)

In [None]:
# Create relative frequency stacked bar chart
fig, ax = plt.subplots(figsize=(10, 6))

# Create the bars
ax.bar(years_ma, other_spending_pct, label='Other Spending', color='#3182bd')
ax.bar(years_ma, healthcare_pct, bottom=other_spending_pct, label='Healthcare', color='#e6550d')

# Customize the chart
ax.set_title('Massachusetts State Spending: Healthcare vs. All Other Spending', fontsize=14, fontweight='bold')
ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('% of Total Spending', fontsize=12)
ax.set_ylim(0, 100)
ax.legend(loc='center left')
ax.grid(axis='y', alpha=0.3)

# Add percentage labels on bars
for i, year in enumerate(years_ma):
    # Healthcare percentage label
    ax.text(year, other_spending_pct[i] + healthcare_pct[i]/2, f'{healthcare_pct[i]}%', 
            ha='center', va='center', fontweight='bold', color='white')
    # Other spending percentage label
    ax.text(year, other_spending_pct[i]/2, f'{other_spending_pct[i]}%', 
            ha='center', va='center', fontweight='bold', color='white')

plt.tight_layout()
plt.show()

print("\nKey Observation: Healthcare spending has risen from 23% to 41% of total")
print("state spending between 2000 and 2013.")

## Example 4: Horizontal Stacked Bar Chart

Stacked bar charts can also be created horizontally.

In [None]:
# Create sample data for product sales by region
regions = ['North', 'South', 'East', 'West']
product_a = [150, 180, 200, 170]
product_b = [120, 140, 160, 130]
product_c = [90, 110, 130, 100]

# Create horizontal stacked bar chart
fig, ax = plt.subplots(figsize=(10, 6))

# Create the bars
ax.barh(regions, product_a, label='Product A', color='#8dd3c7')
ax.barh(regions, product_b, left=product_a, label='Product B', color='#ffffb3')
ax.barh(regions, product_c, left=[a+b for a, b in zip(product_a, product_b)], label='Product C', color='#bebada')

# Customize the chart
ax.set_title('Product Sales by Region', fontsize=14, fontweight='bold')
ax.set_xlabel('Sales', fontsize=12)
ax.set_ylabel('Region', fontsize=12)
ax.legend()
ax.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()

## Exercise

Try creating your own stacked bar chart:
1. Create a dataset with multiple categories and subcategories
2. Create a stacked bar chart (vertical or horizontal)
3. Try creating a relative frequency stacked bar chart
4. Add appropriate titles, labels, and legends
5. Consider adding data labels to show exact values or percentages

In [None]:
# Your code here
