Authors: Koosha Jadbabaei, Aryaman Sinha

Since: 9/22/20

Sources: https://towardsdatascience.com/bring-your-jupyter-notebook-to-life-with-interactive-widgets-bc12e03f0916

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plots
import math
import ipywidgets as widgets
from IPython.display import display
from datetime import datetime

In [4]:
covid1 = pd.read_csv("owid-covid-data.csv")
covid1

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,ABW,North America,Aruba,2020-03-13,2.0,2.0,,0.0,0.0,,...,35973.781,,,11.62,,,,,76.29,
1,ABW,North America,Aruba,2020-03-19,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,
2,ABW,North America,Aruba,2020-03-20,4.0,2.0,0.286,0.0,0.0,0.0,...,35973.781,,,11.62,,,,,76.29,
3,ABW,North America,Aruba,2020-03-21,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,
4,ABW,North America,Aruba,2020-03-22,,,0.286,,,0.0,...,35973.781,,,11.62,,,,,76.29,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44147,,,International,2020-09-12,696.0,,,7.0,,,...,,,,,,,,,,
44148,,,International,2020-09-13,696.0,,,7.0,,,...,,,,,,,,,,
44149,,,International,2020-09-14,696.0,,,7.0,,,...,,,,,,,,,,
44150,,,International,2020-09-15,696.0,,,7.0,,,...,,,,,,,,,,


In [5]:
def death_rate_calculator(row):
    total_deaths = row["total_deaths"]
    pos_cases = row["total_cases"]
    if (np.isnan(pos_cases) or (np.isnan(total_deaths))):
        return None
    if ((np.isnan(pos_cases) == 0 )or (np.isnan(total_deaths)) == 0):
        return 0
    else:
        return total_deaths / pos_cases
def pos_rate_calculator(row):
    pos_cases = row["total_cases"]
    total_tests = row["total_tests"]
    if (np.isnan(pos_cases) or (np.isnan(total_tests))):
        return None
    if ((np.isnan(pos_cases) == 0 )or (np.isnan(total_tests)) == 0):
        return 0
    else:
        return pos_cases / total_tests
covid1["death_rate"] = covid1.apply(death_rate_calculator, axis = 1)
covid1["positive_rate"] = covid1.apply(pos_rate_calculator, axis = 1)
covid1 = covid1.loc[covid1.location != "International"]
covid1

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,death_rate
0,ABW,North America,Aruba,2020-03-13,2.0,2.0,,0.0,0.0,,...,,,11.62,,,,,76.29,,0.0
1,ABW,North America,Aruba,2020-03-19,,,0.286,,,0.000,...,,,11.62,,,,,76.29,,
2,ABW,North America,Aruba,2020-03-20,4.0,2.0,0.286,0.0,0.0,0.000,...,,,11.62,,,,,76.29,,0.0
3,ABW,North America,Aruba,2020-03-21,,,0.286,,,0.000,...,,,11.62,,,,,76.29,,
4,ABW,North America,Aruba,2020-03-22,,,0.286,,,0.000,...,,,11.62,,,,,76.29,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43886,OWID_WRL,,World,2020-09-12,28519191.0,305101.0,268166.000,916012.0,5881.0,5863.143,...,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,,0.0
43887,OWID_WRL,,World,2020-09-13,28806290.0,287099.0,269207.286,920939.0,4927.0,5800.857,...,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,,0.0
43888,OWID_WRL,,World,2020-09-14,29078936.0,272646.0,272621.143,924817.0,3878.0,5064.857,...,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,,0.0
43889,OWID_WRL,,World,2020-09-15,29320128.0,241192.0,277613.857,929050.0,4233.0,5123.571,...,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,,0.0


In [6]:
covid1.columns

Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'new_tests', 'total_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'tests_per_case', 'positive_rate', 'tests_units', 'stringency_index',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy', 'human_development_index', 'death_rate'],
      dtype='object')

In [7]:
column_names = list(covid1)

In [8]:
death_data = ["new_deaths", "new_deaths_per_million", "total_deaths", "total_deaths_per_million", "death_rate"]
cases_data = ["new_cases", "new_cases_per_million", "total_cases"]
test_data = ["new_tests", "total_tests", "total_tests_per_thousand", "new_tests_per_thousand", "tests_per_case", "positive_rate"]
population_data = ["population", "population_density", "median_age", "aged_65_older", "aged_70_older", "gdp_per_capita", "extreme_poverty"]
misc_data = ["diabetes_prevalence", "female_smokers", "male_smokers", "handwashing_facilities", "hospital_beds_per_thousand", "life_expectancy", "human_development_index", "stringency_index"]

In [9]:
ALL = 'World'
def unique_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.remove("World")
    unique.remove("United States")
    unique.insert(0, ALL)
    unique.insert(1, "United States")
    return unique

def unique_sorted_values_plus_today_norm(array):
    unique = array.unique().tolist()
    unique.sort(reverse = True)
    return unique

def unique_sorted_values_plus_today(array):
    unique = array.unique().tolist()
    unique.sort()
    return unique

In [24]:
d1 = widgets.Dropdown(description = "Choose One", options = ['death', 'cases', 'test', "population", "misc", "correlations"])

output_main = widgets.Output()

filtered1 = pd.DataFrame()

def first_ex(change):
    global filtered1
    output_main.clear_output()
    tempcovid = covid1.copy(deep = True)
    filtered1 = tempcovid
    cols = list(tempcovid)
    if (change.new == 'death'):
         data1 = death_data
    if (change.new == 'cases'):
        data1 = cases_data
    if (change.new == 'test'):
        data1 = test_data
    if (change.new == 'population'):
        data1 = population_data
    if (change.new == 'misc'):
        data1 = misc_data
    templist = ['date', 'location']
    for i in data1:
        templist.append(i)
    filtered1 = filtered1[templist]
    with output_main:
        display(filtered1)
d1.observe(first_ex, names = 'value')
display(d1)
display(output_main)

Dropdown(description='Choose One', options=('death', 'cases', 'test', 'population', 'misc', 'correlations'), v…

Output()

In [12]:
filtered1.head(5)

In [50]:
variables_dict = {"death": death_data, "cases": cases_data, "test": test_data, "population": population_data, "misc" : misc_data}


In [40]:
drp = widgets.Dropdown(description = "Choose One", options = ['Deaths', 'Cases', 'Tests', "Population", "Misc", "Correlations"])

output_values = widgets.Output()

def sec_ex(change):
    data1 = death_data
    output_values.clear_output()
    with output_values:
        data1 = variables_dict[change.new]
#     if (change.new == 'correlation'):
#         data1 = death_data
        d2 = widgets.Dropdown(description = change.new, options = data1)
        def inner(change1):
            output_values.clear_output()
            with output_values:
                display(d2)
                display(covid1[["date", "location", change1.new]])
        d2.observe(inner, names= 'value')
        display(d2)
drp.observe(sec_ex, names = 'value')
display(drp)
display(output_values)

Dropdown(description='Choose One', options=('Deaths', 'Cases', 'Tests', 'Population', 'Misc', 'Correlations'),…

Output()

In [105]:
large_output = widgets.Output()
large_output2 = widgets.Output()
output_value = widgets.Output()
final_output = widgets.Output()

dropdown_category = widgets.Dropdown(description = "Category", options = ['death', 'cases', 'test', "population", "misc", "correlations"])

filtered5 = pd.DataFrame()
filtered = pd.DataFrame()
filt_df = pd.DataFrame()
final_df = pd.DataFrame()
covid_stats_df = pd.DataFrame()

def on_change1(category):
    if category == "correlations":
        correlation()
    else:
        global filtered5
        tempcovid = covid1.copy(deep = True)
        filtered5 = tempcovid
        cols = list(tempcovid)
        large_output.clear_output()
        templist = ['date', 'location']

        data = variables_dict[category]
        for i in data:
            templist.append(i)
        filtered5 = filtered5[templist]

        with large_output:
            display(filtered5)
            data1 = death_data
            output_value.clear_output()
            with output_value:

                data1 = variables_dict[category]
                dropdown_location = widgets.Dropdown(description = "Location", options = unique_sorted_values_plus_ALL(covid1.location))
                def on_change1_5(change1_5):
                    global filtered
                    filtered = filtered5.loc[filtered5.location == change1_5.new]
                    d2 = widgets.Dropdown(description = category, options = data1)

                    def on_change2(change1):

                        global filt_df
                        output_value.clear_output()
                        with output_value:
                            final_output.clear_output()
                            with final_output:

                                if (change1.new == "new_deaths" or change1.new == "new_deaths_per_million" or change1.new == "new_cases" or change1.new == "new_cases_per_million" or change1.new == "new_tests" or change1.new == "new_tests_per_thousand"):
                                    double1 = widgets.Dropdown(description = "Start Date", options = unique_sorted_values_plus_today(covid1.date))
                                    double2 = widgets.Dropdown(description = "End Date", options = unique_sorted_values_plus_today_norm(covid1.date))
                                    def d1_eventhandler(change):
                                        on_change7(change.new, double2.value)    
                                    def d2_eventhandler(change):
                                        on_change7(double1.value, change.new)
                                    def on_change7(change8, change9):
                                        global final_df
                                        final_output.clear_output()
                                        with final_output:
                                            display(double1)
                                            display(double2)
                                            if (change8 > change9):
                                                print("Incorrect date order")
                                            else:
                                                final_df = filt_df.loc[filt_df.date >= change8]
                                                final_df = final_df.loc[final_df.date <= change9]
                                                var = final_df[change1.new].tolist()
                                                summer = 0
                                                counter = 0
                                                for i in var:
                                                    if np.isnan(i):
                                                        counter = counter + 1
                                                if counter > 0:
                                                    print("This value is not available")
                                                else:
                                                    summer = sum(var)
                                                    print("The amount of " + str(change1.new) + " in " + final_df["location"].tolist()[0] + " from " + str(change8) + " to " + str(change9) + " is " + str(summer))
                                                display(final_df)
                                    double1.observe(d1_eventhandler, names = 'value')
                                    double2.observe(d2_eventhandler, names = 'value')
                                    display(double1)
                                    display(double2)
                                else:
                                    single_dropdown = widgets.Dropdown(description = "Date", options = unique_sorted_values_plus_today_norm(covid1.date))
                                    def on_change5(change6):
                                        global final_df
                                        final_output.clear_output()
                                        with final_output:
                                            display(single_dropdown)
                                            final_df = filt_df.loc[filt_df.date == change6.new]
                                            var = final_df[change1.new].tolist()[0]
                                            if np.isnan(var):
                                                print("This value is not available (includes NaN values)")
                                            else:
                                                print("The amount of " + str(change1.new) + " in " + final_df["location"].tolist()[0] + " on " + str(change6.new) + " is " + str(final_df[change1.new].tolist()[0]))
                                            display(final_df)
                                    single_dropdown.observe(on_change5, names = 'value')
                                    display(single_dropdown)
                            display(d2)
            #                 filt_df = filtered.loc[filtered.location == locations]
                            filt_df = filtered[["date", "location", change1.new]]
                    #                 display(filt_df)

                    d2.observe(on_change2, names= 'value')
                    display(d2)
                dropdown_location.observe(on_change1_5, names = 'value')
                display(dropdown_location)

def dropdown_category_eventhandler(change):
    on_change1(change.new)
    

class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

print(color.BOLD + 'Covid Statistic Generator!' + color.END)
    
dropdown_category.observe(dropdown_category_eventhandler, names = 'value')
# dropdown_location.observe(dropdown_location_eventhandler, names = 'value')

display(dropdown_category)
display(output_value)
display(final_output)

[1mCovid Statistic Generator![0m


Dropdown(description='Category', options=('death', 'cases', 'test', 'population', 'misc', 'correlations'), val…

Output()

Output()

Dropdown(description='Category 1', options=('death', 'cases', 'test', 'population', 'misc'), value='death')

Output()

Output()

Button(description='Confirm Variables', style=ButtonStyle())

In [18]:
final_df
# final_df.to_csv("covid_dataset.csv")

In [101]:
def correlation():
    var1_type_drop = widgets.Dropdown(options = ["death", "cases", "test", "population", "misc"], description = "Category 1")
    var1_type = ""
    var1 = ""
    var2_type = ""
    var2 = ""


    output = widgets.Output()
    output2 = widgets.Output()
    def var1type_drop_eventhandler(change):
        output.clear_output()
        with output:
            global var1_type
            var1_type = var1_type_drop.value
            options_list = variables_dict[var1_type]
            var1_drop = widgets.Dropdown(options = options_list, description = "Subcat 1")
            def var1_drop_eventhandler(change):
                with output2:
                    global var1
                    global var2
                    var1 = var1_drop.value
                    if (var1 == var2):
                        print("Invalid choice, please select different variables.")

            var1_drop.observe(var1_drop_eventhandler, names = "value")
            display(var1_drop)

            var2_type_drop = widgets.Dropdown(options = ["death", "cases", "test", "population", "misc", "time"], description = "Category 2")
            def var2type_drop_eventhandler(change):
                output2.clear_output()
                with output2:
                    global var2_type
                    var2_type = var2_type_drop.value
                    if var2_type == "time":
                        options_list2 = ["Custom Range", "All-Time"]
                    else:
                        options_list2 = [i for i in variables_dict[var2_type] if i is not var1]
                    var2_drop = widgets.Dropdown(options = options_list2, description = "Subcat 2")
                    def var2_drop_eventhandler(change):
                        with output2:
                            global var2
                            var2 = var2_drop.value
                            print(var2)
                    var2_drop.observe(var2_drop_eventhandler, names = "value")
                    display(var2_drop)
            var2_type_drop.observe(var2type_drop_eventhandler, names = "value")
            display(var2_type_drop)

    var1_type_drop.observe(var1type_drop_eventhandler, names = "value")
    display(var1_type_drop)

    display(output)
    display(output2)

    confirm = widgets.Button(description = "Confirm Variables")
    display(confirm)

    locations = covid1["location"].unique()
    dates = covid1["date"].unique()
    date1 = ""
    date2 = ""
    location = ""
    out3 = widgets.Output()
    def confirm_eventhandler(obj):
        out3.clear_output()
        with out3:
            if var1 == var2:
                print("Please choose different variables, variable 1 and 2 cannot be the same.")
            elif var1_type in ["Deaths", "Cases", "Tests"]:
                if var2_type == "Time":
                    if var2 == "Custom Range":
                        date1_drop = widgets.Dropdown(options = dates, description = "Start Date")
                        def date1_drop_eventhandler(change):
                            global date1
                            date1 = change.new
                            date2_list = covid1.loc[covid1["date"] > date1]["date"].unique()
                            date2_drop = widgets.Dropdown(options = date2_list, description = "End Date")
                            def date2_drop_eventhandler(change):
                                global date2
                                date2 = change.new
                            date2_drop.observe(date2_drop_eventhandler, names = "value")
                            display(date2_drop)
                        date1_drop.observe(date1_drop_eventhandler, names = "value")
                        display(date1_drop)
                        location_drop = widgets.Dropdown(options = locations, description = "Location")
                        def location_drop_eventhandler(change):
                            global location
                            location = change.new
                        location_drop.observe(location_drop_eventhandler, names = "value")
                        display(location_drop)
    confirm.on_click(confirm_eventhandler)