# HBV Vaccination and Infection Rates in Children

This notebook explores Hepatitis B virus (HBV) vaccination rates and infection rates in children in the United States.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set style for better-looking plots
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## Load and Explore Data

In [None]:
# Load infection data
infections = pd.read_csv('hbv_infections.csv')
print("Infection data shape:", infections.shape)
print("\nInfection data columns:", infections.columns.tolist())

# Load vaccine data for children
vaccine_children = pd.read_csv('hbv_vaccine_children.csv')
print("\nVaccine data shape:", vaccine_children.shape)
print("Vaccine data columns:", vaccine_children.columns.tolist())

## Filter Data for Children and Hepatitis B

In [None]:
# Filter infection data for children (0-14 age group)
children_infections = infections[infections['Age Group'] == '0-14'].copy()

# Clean year column (remove COVID-19 pandemic note)
children_infections['Year_Clean'] = children_infections['Year'].str.extract(r'(\d{4})', expand=False).astype(int)

# Sort by year
children_infections = children_infections.sort_values('Year_Clean')

print(f"Children infection records: {len(children_infections)}")
print(f"Year range: {children_infections['Year_Clean'].min()} - {children_infections['Year_Clean'].max()}")
children_infections[['Year', 'Cases', 'Rate per 100000']].head(10)

In [None]:
# Filter vaccine data for Hepatitis B and United States
hepb_vaccine = vaccine_children[
    (vaccine_children['Vaccine'] == 'Hep B') & 
    (vaccine_children['Geography'] == 'United States')
].copy()

# Filter for ≥3 Doses at 19 Months (standard completion measure)
hepb_3doses = hepb_vaccine[
    (hepb_vaccine['Dose'] == '≥3 Doses') & 
    (hepb_vaccine['Dimension'] == '19 Months')
].copy()

# Extract year from Birth Year/Birth Cohort column
# Handle both single years and ranges (e.g., '2019-2020')
hepb_3doses['Year'] = hepb_3doses['Birth Year/Birth Cohort'].str.extract(r'(\d{4})', expand=False).astype(int)

# Sort by year
hepb_3doses = hepb_3doses.sort_values('Year')

print(f"HepB vaccine records (≥3 doses at 19 months): {len(hepb_3doses)}")
print(f"Year range: {hepb_3doses['Year'].min()} - {hepb_3doses['Year'].max()}")
hepb_3doses[['Birth Year/Birth Cohort', 'Year', 'Estimate (%)']].head(10)

## Visualization 1: HBV Infection Rates in Children Over Time

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: Number of cases
ax1.plot(children_infections['Year_Clean'], children_infections['Cases'].astype(int), 
         marker='o', linewidth=2, markersize=6, color='#d62728')
ax1.set_xlabel('Year', fontsize=12)
ax1.set_ylabel('Number of Cases', fontsize=12)
ax1.set_title('Acute Hepatitis B Cases in Children (Age 0-14)', fontsize=14, fontweight='bold')
ax1.grid(True, alpha=0.3)

# Plot 2: Rate per 100,000
ax2.plot(children_infections['Year_Clean'], children_infections['Rate per 100000'].astype(float), 
         marker='o', linewidth=2, markersize=6, color='#d62728')
ax2.set_xlabel('Year', fontsize=12)
ax2.set_ylabel('Rate per 100,000 Population', fontsize=12)
ax2.set_title('Acute Hepatitis B Rate in Children (Age 0-14)', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nKey Statistics:")
print(f"Peak cases: {children_infections['Cases'].astype(int).max()} in {children_infections.loc[children_infections['Cases'].astype(int).idxmax(), 'Year']}")
print(f"Recent cases (2023): {children_infections[children_infections['Year_Clean'] == 2023]['Cases'].values[0]}")
print(f"Reduction from peak: {100 * (1 - children_infections[children_infections['Year_Clean'] == 2023]['Cases'].astype(int).values[0] / children_infections['Cases'].astype(int).max()):.1f}%")

## Visualization 2: HBV Vaccination Rates in Children Over Time

In [None]:
plt.figure(figsize=(14, 6))

plt.plot(hepb_3doses['Year'], hepb_3doses['Estimate (%)'].astype(float), 
         marker='o', linewidth=2.5, markersize=8, color='#2ca02c')

plt.xlabel('Birth Year', fontsize=12)
plt.ylabel('Vaccination Coverage (%)', fontsize=12)
plt.title('Hepatitis B Vaccination Coverage in Children\n(≥3 Doses by 19 Months, United States)', 
          fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.ylim(85, 95)

# Add reference line at 90%
plt.axhline(y=90, color='gray', linestyle='--', linewidth=1, alpha=0.7, label='90% Coverage')
plt.legend()

plt.tight_layout()
plt.show()

print(f"\nVaccination Statistics:")
print(f"Lowest coverage: {hepb_3doses['Estimate (%)'].astype(float).min():.1f}% in {hepb_3doses.loc[hepb_3doses['Estimate (%)'].astype(float).idxmin(), 'Birth Year/Birth Cohort']}")
print(f"Highest coverage: {hepb_3doses['Estimate (%)'].astype(float).max():.1f}% in {hepb_3doses.loc[hepb_3doses['Estimate (%)'].astype(float).idxmax(), 'Birth Year/Birth Cohort']}")
print(f"Most recent coverage ({hepb_3doses['Birth Year/Birth Cohort'].iloc[-1]}): {hepb_3doses['Estimate (%)'].iloc[-1]}%")

## Visualization 3: Comparison of Vaccination Coverage and Infection Rates

In [None]:
# Create a combined visualization
fig, ax1 = plt.subplots(figsize=(16, 7))

# Plot infection cases on left axis
color1 = '#d62728'
ax1.set_xlabel('Year', fontsize=13)
ax1.set_ylabel('Acute Hepatitis B Cases in Children (Age 0-14)', fontsize=13, color=color1)
line1 = ax1.plot(children_infections['Year_Clean'], children_infections['Cases'].astype(int), 
         marker='o', linewidth=2.5, markersize=8, color=color1, label='Infection Cases')
ax1.tick_params(axis='y', labelcolor=color1)
ax1.grid(True, alpha=0.3)

# Create second y-axis for vaccination coverage
ax2 = ax1.twinx()
color2 = '#2ca02c'
ax2.set_ylabel('Hepatitis B Vaccination Coverage (%, ≥3 Doses by 19 Months)', fontsize=13, color=color2)
line2 = ax2.plot(hepb_3doses['Year'], hepb_3doses['Estimate (%)'].astype(float), 
         marker='s', linewidth=2.5, markersize=8, color=color2, label='Vaccination Coverage')
ax2.tick_params(axis='y', labelcolor=color2)
ax2.set_ylim(85, 95)

# Add title
plt.title('HBV Vaccination Coverage vs. Infection Cases in Children Over Time', 
          fontsize=15, fontweight='bold', pad=20)

# Combine legends
lines = line1 + line2
labels = [l.get_label() for l in lines]
ax1.legend(lines, labels, loc='upper right', fontsize=11)

plt.tight_layout()
plt.show()

## Additional Analysis: Vaccination Coverage by Different Metrics

In [None]:
# Look at different vaccination milestones
hepb_all_doses = vaccine_children[
    (vaccine_children['Vaccine'] == 'Hep B') & 
    (vaccine_children['Geography'] == 'United States') &
    (vaccine_children['Birth Year/Birth Cohort'].isin(['2020', '2019-2020', '2020-2021']))
].copy()

# Get different milestones
milestones = ['3 Months', '7 Months', '13 Months', '19 Months', '24 Months', '35 Months']
milestone_data = []

for milestone in milestones:
    data = hepb_all_doses[
        (hepb_all_doses['Dimension'] == milestone) & 
        (hepb_all_doses['Dose'] == '≥3 Doses')
    ]
    if len(data) > 0:
        milestone_data.append({
            'Age': milestone,
            'Coverage': data['Estimate (%)'].astype(float).mean()
        })

milestone_df = pd.DataFrame(milestone_data)

if len(milestone_df) > 0:
    plt.figure(figsize=(12, 6))
    bars = plt.bar(milestone_df['Age'], milestone_df['Coverage'], color='#2ca02c', alpha=0.7, edgecolor='black')
    
    # Add value labels on bars
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1f}%',
                ha='center', va='bottom', fontsize=11, fontweight='bold')
    
    plt.xlabel('Age at Vaccination', fontsize=12)
    plt.ylabel('Vaccination Coverage (%)', fontsize=12)
    plt.title('Hepatitis B Vaccination Coverage (≥3 Doses) by Age\n(Recent Birth Cohorts)', 
              fontsize=14, fontweight='bold')
    plt.ylim(0, 100)
    plt.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    plt.show()
else:
    print("No milestone data available for visualization")

## Summary Statistics

In [None]:
print("="*70)
print("SUMMARY OF HBV VACCINATION AND INFECTION IN CHILDREN")
print("="*70)

print("\n📊 INFECTION TRENDS:")
print("-" * 70)
peak_year = children_infections.loc[children_infections['Cases'].astype(int).idxmax(), 'Year']
peak_cases = children_infections['Cases'].astype(int).max()
recent_cases = children_infections[children_infections['Year_Clean'] == 2023]['Cases'].astype(int).values[0]
reduction = 100 * (1 - recent_cases / peak_cases)

print(f"  • Peak infection year: {peak_year} with {peak_cases} cases")
print(f"  • Most recent (2023): {recent_cases} case(s)")
print(f"  • Reduction from peak: {reduction:.1f}%")
print(f"  • Average annual cases (2000-2023): {children_infections['Cases'].astype(int).mean():.1f}")

print("\n💉 VACCINATION COVERAGE:")
print("-" * 70)
recent_coverage = hepb_3doses['Estimate (%)'].iloc[-1]
avg_coverage = hepb_3doses['Estimate (%)'].astype(float).mean()

print(f"  • Most recent coverage ({hepb_3doses['Birth Year/Birth Cohort'].iloc[-1]}): {recent_coverage}%")
print(f"  • Average coverage (all years): {avg_coverage:.1f}%")
print(f"  • Highest coverage: {hepb_3doses['Estimate (%)'].astype(float).max():.1f}%")
print(f"  • Lowest coverage: {hepb_3doses['Estimate (%)'].astype(float).min():.1f}%")

print("\n🎯 KEY INSIGHTS:")
print("-" * 70)
print("  • HBV vaccination coverage has remained consistently high (>88%)")
print("  • Infection cases in children have dramatically decreased over time")
print("  • High vaccination coverage correlates with very low infection rates")
print("  • Recent years show near-elimination of HBV in children (0-14 age group)")
print("="*70)