In [None]:
"""
Import necessary libraries:
- pandas for data manipulation
- Matplotlib for creating plots
- Seaborn for advanced visualizations
"""

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Data sources

def read_data(filename):
  """Reads data from a CSV file and returns a Pandas DataFrame.
  Args:
    filename: The name of the CSV file to read.
  """

  df = pd.read_csv(filename)
  return df

"""
Loads the Data from the specified CSV file into a pandas DataFrame.
"""
# Load the pandemic data into dataframe
pandemic_data= read_data("df_1.csv")
disease_burden_data= read_data("df_1.csv")
disease_burden_df = pd.DataFrame(disease_burden_data[:4])
total_deaths = disease_burden_df["lost"].sum()

# Load the measles data into dataframe
measles_data=read_data("measles-cases-and-death-rate.csv")
measles_df = pd.DataFrame(measles_data)

# Load the polio data into dataframe
polio_data=read_data("prevalence-of-polio-rates-in-the-united-states.csv")
polio_df = pd.DataFrame(polio_data)

# Load the vaccine data into dataframe
vaccine_data=read_data("deaths prevented by vaccines.csv")
vaccine_df = pd.DataFrame(vaccine_data)

"""
Computes summary statistics for the pandemic data, including:
- Mean
- Median
- Standard deviation
"""
pandemic_df = pd.DataFrame(pandemic_data)
mean_mortality_rate = pandemic_df["Death toll (in millions) "].mean()
median_mortality_rate = pandemic_df["Death toll (in millions) "].median()
std_mortality_rate = pandemic_df["Death toll (in millions) "].std()

# Setting palette to pastel
color = sns.color_palette('pastel')[0:5]

"""
Create the infographics plot from 1 and 2
"""

# Create the infographic plots
plt.figure(figsize=(12, 7))

# Plot 1: Deadliest Pandemics by Estimated Mortality Rate
plt.subplot(2, 2, 1)
sns.barplot(x="Pandemics", y="Death toll (in millions) ", data=pandemic_df[:5])
plt.xlabel("Pandemic")
plt.xticks(rotation = 12)
plt.ylabel("Death toll (in millions)")
plt.title(f"Top 5 Deadliest Pandemics (Avg: {mean_mortality_rate:.2f}%, Median: {median_mortality_rate:.2f}%)", 
          fontsize=10, fontweight="bold", color="#D2042D")

# Plot 2: Global Distribution of Disease Burden by Cause
plt.subplot(2, 2, 2)
plt.pie(disease_burden_df["lost"], labels=disease_burden_df["Pandemics"], autopct="%1.1f%%",
            colors=color)
plt.title(f"Global Disease % Burden by Cause (Total Deaths in Mlns: {total_deaths})", 
          fontsize=12, fontweight="bold", color="#D2042D")

"""
Displaying Conclutions from Plots
"""

plt.text(-0.8, -0.05, 'Conclutions:', fontsize=12, fontweight="bold", color="#D2042D", transform=plt.gca().transAxes)
plt.text(-0.75, -0.15, 'Plot 1: Black death, Spanish flu caused most no. of death globally', fontsize=10, transform=plt.gca().transAxes)
plt.text(-0.75, -0.22, 'Plot 2: The Plague of Justinian was the deadliest pandemic in history, 48.6% mortality rate', fontsize=10, transform=plt.gca().transAxes)
plt.text(-0.75, -0.28, 'Plot 3: Measles and Polio Mortality Rate has declined since 1960s-70s', fontsize=10, transform=plt.gca().transAxes)
plt.text(-0.75, -0.35, 'Plot 4: Vaccines have prevented more than 2 million of deaths from Measles', fontsize=10, transform=plt.gca().transAxes)


"""
Create the infographics plot from 3 and 4
"""

# Plot 3: Trends in US Measles and Polio Mortality Rate
plt.subplot(2, 2, 3)
plt.plot(polio_df["Year"], polio_df["Polio Death Rate"], label="Polio")
plt.plot(measles_df["Year"], measles_df["Reported Measles Death Rate (OWID, 2017)"], label="Measles")
plt.xlabel("Year")
plt.ylabel("Deaths Rate %")
plt.title("Trends in US Diseaes Death Rate (1950-2014)", 
          fontsize=12, fontweight="bold", color="#D2042D")
plt.legend()

# Plot 4: Impact of Vaccines on Disease Control
plt.subplot(2, 2, 4)
sns.barplot(y="disease", x="deaths_prevented", data=vaccine_data, orient = 'h',
            palette = "hot")

plt.ylabel("Disease")
plt.xlabel("Deaths Prevented (Millions)")
plt.title("Estimated Deaths Prevented by Vaccines", 
          fontsize=12, fontweight="bold", color="#D2042D")
plt.text(0.7, -0.32, 'ID: 22071343', fontsize=14, color='green', 
         fontweight="bold", transform=plt.gca().transAxes)

# Adjust layout and display infographic
plt.suptitle("Infographic: Diseases and Pandemics - Severity and Casualties", 
             fontsize=22, fontweight="bold")

sns.set_style("whitegrid", {'grid.linestyle': '-.'})
plt.tight_layout()

# Save the figure
plt.savefig('22071343.png', dpi=300)

# Show the plot
plt.show()

"""

Data References

#Links 1- Polio case and death rates in the US from ourworldindata.org -  https://ourworldindata.org/grapher/prevalence-of-polio-rates-in-the-united-states?tab=table
#Links 2- Rate of reported cases and deaths from measles in the USA from ourworldindata.org - https://ourworldindata.org/grapher/measles-cases-and-death-rate?tab=table 
#Links 3- Deaths Prevented by Vaccines by #naheedali via @KaggleDatasets - https://www.kaggle.com/datasets/naheedali/deaths-prevented-by-vaccines 
#Links 4- Timeline of Historical Pandemics by #thedevastator via @KaggleDatasets - https://www.kaggle.com/datasets/thedevastator/a-comprehensive-history-of-major-disease-outbrea?resource=download&select=df_1.csv 

"""