In [1]:
#install the required packages
#pip install requests
#pip install ipywidgets

In [2]:
#import the required packages
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display

In [3]:
#website with data and api information
#https://data.cdc.gov/NCHS/Provisional-COVID-19-Death-Counts-by-Week-Ending-D/r8kw-7aab/about_data

#initial API url
api_url = 'https://data.cdc.gov/resource/r8kw-7aab.json'
api_response = requests.get(api_url)

#check that api is working
#'200' is good
print(api_response.status_code)

200


In [4]:
#view data in a json format
json_data = api_response.json()
print(json_data[:3])

[{'data_as_of': '2025-01-30T00:00:00.000', 'start_date': '2019-12-29T00:00:00.000', 'end_date': '2020-01-04T00:00:00.000', 'group': 'By Week', 'year': '2019/2020', 'mmwr_week': '1', 'week_ending_date': '2020-01-04T00:00:00.000', 'state': 'United States', 'covid_19_deaths': '0', 'total_deaths': '60170', 'percent_of_expected_deaths': '98.00', 'pneumonia_deaths': '4111', 'pneumonia_and_covid_19_deaths': '0', 'influenza_deaths': '434', 'pneumonia_influenza_or_covid_19_deaths': '4545'}, {'data_as_of': '2025-01-30T00:00:00.000', 'start_date': '2020-01-05T00:00:00.000', 'end_date': '2020-01-11T00:00:00.000', 'group': 'By Week', 'year': '2020', 'mmwr_week': '2', 'week_ending_date': '2020-01-11T00:00:00.000', 'state': 'United States', 'covid_19_deaths': '1', 'total_deaths': '60734', 'percent_of_expected_deaths': '97.00', 'pneumonia_deaths': '4153', 'pneumonia_and_covid_19_deaths': '1', 'influenza_deaths': '475', 'pneumonia_influenza_or_covid_19_deaths': '4628'}, {'data_as_of': '2025-01-30T00:00

In [5]:
#put json data in pandas datafram for better viewing and analyis
usa_df = pd.DataFrame(json_data)
usa_df[:5]

Unnamed: 0,data_as_of,start_date,end_date,group,year,mmwr_week,week_ending_date,state,covid_19_deaths,total_deaths,percent_of_expected_deaths,pneumonia_deaths,pneumonia_and_covid_19_deaths,influenza_deaths,pneumonia_influenza_or_covid_19_deaths,footnote
0,2025-01-30T00:00:00.000,2019-12-29T00:00:00.000,2020-01-04T00:00:00.000,By Week,2019/2020,1,2020-01-04T00:00:00.000,United States,0,60170,98.0,4111,0,434,4545,
1,2025-01-30T00:00:00.000,2020-01-05T00:00:00.000,2020-01-11T00:00:00.000,By Week,2020,2,2020-01-11T00:00:00.000,United States,1,60734,97.0,4153,1,475,4628,
2,2025-01-30T00:00:00.000,2020-01-12T00:00:00.000,2020-01-18T00:00:00.000,By Week,2020,3,2020-01-18T00:00:00.000,United States,2,59362,98.0,4066,2,468,4534,
3,2025-01-30T00:00:00.000,2020-01-19T00:00:00.000,2020-01-25T00:00:00.000,By Week,2020,4,2020-01-25T00:00:00.000,United States,3,59162,99.0,3915,0,500,4418,
4,2025-01-30T00:00:00.000,2020-01-26T00:00:00.000,2020-02-01T00:00:00.000,By Week,2020,5,2020-02-01T00:00:00.000,United States,0,58834,99.0,3818,0,481,4299,


In [6]:
#drop first column, 'data_as_of' not needed
usa_df.drop(columns=['data_as_of', 'mmwr_week', 'week_ending_date'], axis=1, inplace=True)
usa_df[:5]

Unnamed: 0,start_date,end_date,group,year,state,covid_19_deaths,total_deaths,percent_of_expected_deaths,pneumonia_deaths,pneumonia_and_covid_19_deaths,influenza_deaths,pneumonia_influenza_or_covid_19_deaths,footnote
0,2019-12-29T00:00:00.000,2020-01-04T00:00:00.000,By Week,2019/2020,United States,0,60170,98.0,4111,0,434,4545,
1,2020-01-05T00:00:00.000,2020-01-11T00:00:00.000,By Week,2020,United States,1,60734,97.0,4153,1,475,4628,
2,2020-01-12T00:00:00.000,2020-01-18T00:00:00.000,By Week,2020,United States,2,59362,98.0,4066,2,468,4534,
3,2020-01-19T00:00:00.000,2020-01-25T00:00:00.000,By Week,2020,United States,3,59162,99.0,3915,0,500,4418,
4,2020-01-26T00:00:00.000,2020-02-01T00:00:00.000,By Week,2020,United States,0,58834,99.0,3818,0,481,4299,


In [7]:
#create function to format the date data
def short_date(date):
    index = date.find('T')

    new_date = date[:index]

    return new_date

usa_df['start_date'] = usa_df['start_date'].apply(short_date)
usa_df['end_date'] = usa_df['end_date'].apply(short_date)

usa_df[:5]

Unnamed: 0,start_date,end_date,group,year,state,covid_19_deaths,total_deaths,percent_of_expected_deaths,pneumonia_deaths,pneumonia_and_covid_19_deaths,influenza_deaths,pneumonia_influenza_or_covid_19_deaths,footnote
0,2019-12-29,2020-01-04,By Week,2019/2020,United States,0,60170,98.0,4111,0,434,4545,
1,2020-01-05,2020-01-11,By Week,2020,United States,1,60734,97.0,4153,1,475,4628,
2,2020-01-12,2020-01-18,By Week,2020,United States,2,59362,98.0,4066,2,468,4534,
3,2020-01-19,2020-01-25,By Week,2020,United States,3,59162,99.0,3915,0,500,4418,
4,2020-01-26,2020-02-01,By Week,2020,United States,0,58834,99.0,3818,0,481,4299,


In [8]:
#remove unessential columns
usa_df = usa_df.drop(columns=['group', 'state', 'footnote'])
usa_df[:5]

Unnamed: 0,start_date,end_date,year,covid_19_deaths,total_deaths,percent_of_expected_deaths,pneumonia_deaths,pneumonia_and_covid_19_deaths,influenza_deaths,pneumonia_influenza_or_covid_19_deaths
0,2019-12-29,2020-01-04,2019/2020,0,60170,98.0,4111,0,434,4545
1,2020-01-05,2020-01-11,2020,1,60734,97.0,4153,1,475,4628
2,2020-01-12,2020-01-18,2020,2,59362,98.0,4066,2,468,4534
3,2020-01-19,2020-01-25,2020,3,59162,99.0,3915,0,500,4418
4,2020-01-26,2020-02-01,2020,0,58834,99.0,3818,0,481,4299


In [9]:
# 'usa_df' is the df I will work from to create other specific tables and queries

In [10]:
#extract month from 'start_date' column for easier grouping and quering
def find_month(date):

    date = str(date)

    month = date[5:7]

    return month

usa_df['month'] = usa_df['start_date'].apply(find_month)
usa_df[:5]

Unnamed: 0,start_date,end_date,year,covid_19_deaths,total_deaths,percent_of_expected_deaths,pneumonia_deaths,pneumonia_and_covid_19_deaths,influenza_deaths,pneumonia_influenza_or_covid_19_deaths,month
0,2019-12-29,2020-01-04,2019/2020,0,60170,98.0,4111,0,434,4545,12
1,2020-01-05,2020-01-11,2020,1,60734,97.0,4153,1,475,4628,1
2,2020-01-12,2020-01-18,2020,2,59362,98.0,4066,2,468,4534,1
3,2020-01-19,2020-01-25,2020,3,59162,99.0,3915,0,500,4418,1
4,2020-01-26,2020-02-01,2020,0,58834,99.0,3818,0,481,4299,1


In [11]:
#get data from specific Year
def get_year(year):

    year = str(year)

    year_df = usa_df[usa_df['year'] == year]

    return year_df

get_year(2023)

Unnamed: 0,start_date,end_date,year,covid_19_deaths,total_deaths,percent_of_expected_deaths,pneumonia_deaths,pneumonia_and_covid_19_deaths,influenza_deaths,pneumonia_influenza_or_covid_19_deaths,month
157,2023-01-01,2023-01-07,2023,3881,69192,112.00,5527,1439,891,8800,01
158,2023-01-08,2023-01-14,2023,3698,66548,107.00,5210,1406,629,8084,01
159,2023-01-15,2023-01-21,2023,3263,64369,106.00,5037,1276,388,7381,01
160,2023-01-22,2023-01-28,2023,2919,62609,105.00,4613,1086,274,6700,01
161,2023-01-29,2023-02-04,2023,2648,62942,106.00,4493,953,145,6310,01
...,...,...,...,...,...,...,...,...,...,...,...
995,2023-10-29,2023-11-04,2023,31,1291,120.0,63,13,,82,10
996,2023-11-05,2023-11-11,2023,30,1312,116.00,66,12,,85,11
997,2023-11-12,2023-11-18,2023,27,1326,116.00,79,11,,99,11
998,2023-11-19,2023-11-25,2023,37,1283,112.00,76,21,,96,11


In [12]:
#convert 'start_date' and 'end_date' to datetime data type
usa_df['start_date'] = pd.to_datetime(usa_df['start_date'])

usa_df['end_date'] = pd.to_datetime(usa_df['end_date'])

In [13]:
#combine month and year columns into one and delete individual columns
#combine month and year columns into one and delete individual columns

def month_year(month, year):

    combined_date = month + '-' + year

    return combined_date

usa_df['month_date'] = usa_df.apply(lambda row: month_year(row['month'], row['year']), axis=1)
usa_df

Unnamed: 0,start_date,end_date,year,covid_19_deaths,total_deaths,percent_of_expected_deaths,pneumonia_deaths,pneumonia_and_covid_19_deaths,influenza_deaths,pneumonia_influenza_or_covid_19_deaths,month,month_date
0,2019-12-29,2020-01-04,2019/2020,0,60170,98.00,4111,0,434,4545,12,12-2019/2020
1,2020-01-05,2020-01-11,2020,1,60734,97.00,4153,1,475,4628,01,01-2020
2,2020-01-12,2020-01-18,2020,2,59362,98.00,4066,2,468,4534,01,01-2020
3,2020-01-19,2020-01-25,2020,3,59162,99.00,3915,0,500,4418,01,01-2020
4,2020-01-26,2020-02-01,2020,0,58834,99.00,3818,0,481,4299,01,01-2020
...,...,...,...,...,...,...,...,...,...,...,...,...
995,2023-10-29,2023-11-04,2023,31,1291,120.0,63,13,,82,10,10-2023
996,2023-11-05,2023-11-11,2023,30,1312,116.00,66,12,,85,11,11-2023
997,2023-11-12,2023-11-18,2023,27,1326,116.00,79,11,,99,11,11-2023
998,2023-11-19,2023-11-25,2023,37,1283,112.00,76,21,,96,11,11-2023


In [14]:
unique_years = list(set(list(usa_df.get('year'))))
cause_of_death = list(usa_df.columns)[3:10]

In [49]:
import ipywidgets as widgets
from IPython.display import display

#create dropdown for yr
dropdown_yr = widgets.Dropdown(
    options=unique_years,
    value='2024',
    description='Choose Year:'
)

#create dropdown for cod
dropdown_cod = widgets.Dropdown(
    options=cause_of_death,
    value='covid_19_deaths',
    description='Cause of Death:'
)

#create submit button widget
submit_button = widgets.Button(description='Submit')

#output widget to display outcome
output = widgets.Output()

#function to print output based on dropdown value
def choose_yr_cod(value):
    '''
    with output:
        output.clear_output()
        selected_yr = dropdown_yr.value
        selected_cod = dropdown_cod.value
        display(f"The selected option is: {selected_yr} and {selected_cod}")
    '''
    with output:
        output.clear_output()
        selected_yr = dropdown_yr.value
        selected_cod = dropdown_cod.value
        
        #filter for year
        year = str(selected_yr)
        new_df = (usa_df[usa_df['year'] == year]
                  .drop(columns=['start_date', 'end_date', 'month_date', 'year'])
                  .fillna(0)
                  .astype(float)
                  .astype(int)
                 )

        #combine on month
        newer_df = new_df.groupby('month').sum()

        #create line graph 
        line_gr = newer_df.plot(kind='line', y=selected_cod, title='Deaths Per Month', marker='o')

        plt.show(line_gr)
    
#attach button to function above
submit_button.on_click(choose_yr_cod)

#create horizontal box to house widgets
select_menu = widgets.HBox([dropdown_yr, dropdown_cod, submit_button])

display(select_menu, output)

HBox(children=(Dropdown(description='Choose Year:', index=3, options=('2025', '2020/2021', '2019/2020', '2024'…

Output()

In [57]:
#notes for next session

#have the gui be able to have multiple lines in the graph