# Public Health Dashboard – South Africa
This notebook performs basic exploratory data analysis (EDA) and visualization on cleaned health data from South Africa.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the cleaned dataset
df = pd.read_csv('../data/cleaned/public_health_sa.csv')
df.head()

## Summary Statistics

In [None]:
df.describe(include='all')

## Top Provinces by HIV Prevalence

In [None]:
df[['Province', 'HIV_Prevalence_%']].sort_values(by='HIV_Prevalence_%', ascending=False)

## Visualization: Clinics per 100k by Province

In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x='Clinics_per_100k', y='Province', data=df.sort_values('Clinics_per_100k'))
plt.title('Clinics per 100,000 People by Province')
plt.xlabel('Clinics per 100k')
plt.ylabel('Province')
plt.tight_layout()
plt.show()

## Visualization: TB Incidence Distribution

In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(x=df['TB_Incidence_per_100k'])
plt.title('Distribution of TB Incidence Rates')
plt.xlabel('TB Incidence per 100,000')
plt.tight_layout()
plt.show()