**Misc install**

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import math

In [2]:
df = pd.read_excel('Health_Science_Dataset.xlsx', header=1)

# String
for col_idx in [6, 7, 8, 9]:
    col_name = df.columns[col_idx]
    df[col_name] = df[col_name].astype(str)

# Dates
for col_idx in [0, 1, 2, 5]:
    col_name = df.columns[col_idx]
    df[col_name] = pd.to_datetime(df[col_name], format='%Y-%m-%d %H:%M:%S', errors='coerce')

# Year
df.iloc[:, 3] = pd.to_datetime(df.iloc[:, 3], format='%Y', errors='coerce')

# Integer
for col_idx in [4] + list(range(10, 16)):
    col_name = df.columns[col_idx]
    df[col_name] = df[col_name].apply(lambda x: int(x) if pd.notna(x) else x)

**Basic Summary Statistics**

In [3]:
North_Carolina= df[df['Jurisdiction'] == 'North Carolina']

In [4]:
def avg_deaths_jurisdiction(df):
    avg_death = df.groupby('Jurisdiction')["COVID-19 Deaths"].mean()

    for Jurisdiction, mean_weight in avg_death.items():
        print(f"The mean COVID-19 deaths for {Jurisdiction} per week is: {math.ceil(mean_weight)}.")
    
    return
avg_deaths_jurisdiction(North_Carolina)

The mean COVID-19 deaths for North Carolina per week is: 97.


In [17]:
def avg_deaths_age_group(df):
    avg_death = df.groupby('Age Group')["COVID-19 Deaths"].mean()

    for age_group, mean_weight in avg_death.items():
        print(f"The mean COVID-19 US deaths for {age_group} per week is: {math.ceil(mean_weight)}.")
    
    return
avg_deaths_age_group(df)

The mean COVID-19 US deaths for 0-17 years per week is: 1.
The mean COVID-19 US deaths for 18-64 years per week is: 103.
The mean COVID-19 US deaths for 65 years and over per week is: 250.
The mean COVID-19 US deaths for All Ages per week is: 322.


In [16]:
def avg_deaths_age_group(df):
    avg_death = df.groupby('MMWRweek')["COVID-19 Deaths"].mean()

    for age_group, mean_weight in avg_death.items():
        print(f"The mean United States COVID-19 deaths for week {age_group} is: {math.ceil(mean_weight)}.")
    
    return
avg_deaths_age_group(df)

The mean United States COVID-19 deaths for week 1 is: 308.
The mean United States COVID-19 deaths for week 2 is: 331.
The mean United States COVID-19 deaths for week 3 is: 335.
The mean United States COVID-19 deaths for week 4 is: 306.
The mean United States COVID-19 deaths for week 5 is: 259.
The mean United States COVID-19 deaths for week 6 is: 217.
The mean United States COVID-19 deaths for week 7 is: 176.
The mean United States COVID-19 deaths for week 8 is: 138.
The mean United States COVID-19 deaths for week 9 is: 108.
The mean United States COVID-19 deaths for week 10 is: 89.
The mean United States COVID-19 deaths for week 11 is: 77.
The mean United States COVID-19 deaths for week 12 is: 73.
The mean United States COVID-19 deaths for week 13 is: 88.
The mean United States COVID-19 deaths for week 14 is: 136.
The mean United States COVID-19 deaths for week 15 is: 182.
The mean United States COVID-19 deaths for week 16 is: 190.
The mean United States COVID-19 deaths for week 17 is

**By year death change**

In [14]:
def death_count_per_year(df):
    # Take the death count for each year.
    year_column = pd.to_datetime(df['MMWRyear'], errors='coerce').dt.year

    temp_df = df.copy()
    temp_df['MMWRyear'] = year_column

    deaths_per_year = temp_df.groupby('MMWRyear')['COVID-19 Deaths'].count()

    for year, death_count in deaths_per_year.items():
        print(f"The amount of United States COVID-19 deaths for {year} is: {math.ceil(death_count)}.")
    return deaths_per_year
death_count_per_year(df)

The amount of United States COVID-19 deaths for 2020 is: 11502.
The amount of United States COVID-19 deaths for 2021 is: 10993.
The amount of United States COVID-19 deaths for 2022 is: 9738.
The amount of United States COVID-19 deaths for 2023 is: 7307.


MMWRyear
2020    11502
2021    10993
2022     9738
2023     7307
Name: COVID-19 Deaths, dtype: int64

In [15]:
def death_change(df):
    # Take the death count from second year and then divide by first.
    deaths_per_year = death_count_per_year(df)

    percent_changes = deaths_per_year.pct_change()*100

    for year, percent_change in percent_changes.items():
        print(f"The percentage change in United States COVID-19 deaths for {int(year)} is: {percent_change:.2f}%.") 

death_change(df)

The amount of United States COVID-19 deaths for 2020 is: 11502.
The amount of United States COVID-19 deaths for 2021 is: 10993.
The amount of United States COVID-19 deaths for 2022 is: 9738.
The amount of United States COVID-19 deaths for 2023 is: 7307.
The percentage change in United States COVID-19 deaths for 2020 is: nan%.
The percentage change in United States COVID-19 deaths for 2021 is: -4.43%.
The percentage change in United States COVID-19 deaths for 2022 is: -11.42%.
The percentage change in United States COVID-19 deaths for 2023 is: -24.96%.
