# Interactive DP Covid Tracker

Source: Sally Wang@Columbia Systems Lab

# Introduction: 

we build this interactive dashboard to demonstrate our differentially private system research on data optimization. This dashboard aims to illustrate two key benefits of our design: 1) we can analyze statistical data without revealing sensitive information of key stakeholders; 2) we optimize privacy budget utilization via data block substitutions to obtain similar results requested by clients. In this interactive dashboard, you'll see a comparative analysis of differentially private and non-private reports regarding queries on global covid datasets. 

This dashboard shows 5 different interactive plots of COVID-19. <br>
1. NP demo: List of Countries by the Number of Cases  <br>
2. NP demo: Check the Curve of Your Own Country <br>
3. DP demo: Summary Statistics <br>
4. DP demo: Worst Hit Countries <br>
5. DP demo: Total Cases on a World Map <br>

In [103]:
# importing libraries

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
from IPython.display import display, HTML

import numpy as np
import pandas as pd
import diffprivlib.tools as dp
import diffprivlib.mechanisms as me
import statistics
import matplotlib.pyplot as plt
import plotly.express as px
import folium
import plotly.graph_objects as go
import seaborn as sns
import ipywidgets as widgets

In [104]:
# loading data 
death_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
confirmed_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
recovered_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
country_df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv')

In [36]:
# data cleaning

# renaming the df column names to lowercase
country_df.columns = map(str.lower, country_df.columns)
confirmed_df.columns = map(str.lower, confirmed_df.columns)
death_df.columns = map(str.lower, death_df.columns)
recovered_df.columns = map(str.lower, recovered_df.columns)

# changing province/state to state and country/region to country
confirmed_df = confirmed_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
recovered_df = confirmed_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
death_df = death_df.rename(columns={'province/state': 'state', 'country/region': 'country'})
country_df = country_df.rename(columns={'country_region': 'country'})

In [37]:
def private_mean(column, privacy_budget: float) -> int:
    x = dp.mean(column, privacy_budget, bounds=(0,27746), dtype="float")
    return x

def mortality_mean(column, privacy_budget: float) -> int:
    x = dp.mean(column, privacy_budget, bounds=(0, 1.2), dtype="float")
    return x
def dp_sum(column, epsilon: float)-> int:
    x = dp.nansum(column, epsilon, bounds=(100, 91563979))
    return x

In [38]:
# total number of confirmed, death and recovered cases
country_df['confirmed'] = country_df['confirmed'].replace(np.nan, 0)
country_df['deaths'] = country_df['deaths'].replace(np.nan, 0)
country_df['incident_rate'] = country_df['incident_rate'].replace(np.nan, 0)
country_df['mortality_rate'] = country_df['mortality_rate'].replace(np.nan, 0)

confirmed_total = dp_sum(country_df['confirmed'].to_numpy(), 1)
deaths_total = dp_sum(country_df['deaths'], 1)
incident_rate = private_mean(country_df['incident_rate'], 1)
mortality_rate = mortality_mean(country_df['mortality_rate'], 1)


# 1. NP: List of Countries by the Number of Cases

As a start, you will view the raw statistics in non-private setting below. You can fill in the text box below to view countries with top number of cases. 

In [39]:
# sorting the values by confirmed descednding order
# country_df.sort_values('confirmed', ascending= False).head(10).style.background_gradient(cmap='copper')


# sorting the values by confirmed descednding order
# country_df.sort_values('confirmed', ascending= False).head(10).style.background_gradient(cmap='copper')
fig = go.FigureWidget( layout=go.Layout() )
def highlight_col(x):
    r = 'background-color: #f2493a'
    y = 'background-color: #5d73fc'
    g = 'background-color: #2bba0f'
    a = 'background-color: #9dba0f'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[:, 4] = y
    df1.iloc[:, 5] = r
    df1.iloc[:, 8] = g
    df1.iloc[:, 11] = a
    return df1

def show_latest_cases(TOP):
    TOP = int(TOP)
    return country_df.sort_values('confirmed', ascending= False).head(TOP).style.apply(highlight_col, axis=None)

interact(show_latest_cases, TOP='10')

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
widgets.VBox([fig], layout=ipywLayout)

interactive(children=(Text(value='10', description='TOP'), Output()), _dom_classes=('widget-interact',))

VBox(children=(FigureWidget({
    'data': [], 'layout': {'template': '...'}
}),), layout=Layout(border='solid …

# 2. NP: Check the Curve of Your Own Country

Enter the name of your country (with first letter capital e.g., Italy) or "World" for the total cases:

In [40]:


def plot_cases_of_a_country(country):
    labels = ['confirmed', 'deaths']
    colors = ['blue', 'red']
    mode_size = [6, 8]
    line_size = [4, 5]
    
    df_list = [confirmed_df, death_df]
    
    fig = go.Figure();
    
    for i, df in enumerate(df_list):

        if country == 'World' or country == 'world':
            x_data = np.array(list(df.iloc[:, 20:].columns))
            y_data = np.sum(np.asarray(df.iloc[:,4:]),axis = 0)
            
        else:    
            x_data = np.array(list(df.iloc[:, 20:].columns))
            y_data = np.sum(np.asarray(df[df['country'] == country].iloc[:,20:]),axis = 0)
            
        fig.add_trace(go.Scatter(x=x_data, y=y_data, mode='lines+markers',
        name=labels[i],
        line=dict(color=colors[i], width=line_size[i]),
        connectgaps=True,
        text = "Total " + str(labels[i]) +": "+ str(y_data[-1])
        ));
    
    fig.update_layout(
        title="Cases of " + country,
        xaxis_title='Date',
        yaxis_title='Number of Confirmed Cases',
        margin=dict(l=20, r=20, t=40, b=20),
        paper_bgcolor="lightgrey",
        width = 800,
        
    );
    
    fig.update_yaxes(type="linear")
    fig.show();
    



In [41]:
    interact(plot_cases_of_a_country, country='World')

    ipywLayout = widgets.Layout(border='solid 2px green')
    ipywLayout.display='none' # uncomment this, run cell again - then the graph/figure disappears
    widgets.VBox([fig], layout=ipywLayout)

interactive(children=(Text(value='World', description='country'), Output()), _dom_classes=('widget-interact',)…

VBox(children=(FigureWidget({
    'data': [], 'layout': {'autosize': True, 'template': '...'}
}),), layout=Lay…

# 2. DP Statistical Summary

The summary below is differentially prviate statistics 

In [42]:
# displaying the total stats

display(HTML("<div style = 'background-color: #d1cdcd; padding: 30px '>" +
             "<span style='color: #5d73fc; font-size:30px;'> Confirmed: "  + str(int(confirmed_total)) +"</span>" +
             "<span # style='color: #f2493a; font-size:30px;margin-left:20px;'> Deaths: " + str(int(deaths_total))+ "</span>"+
             "<span style='color: #2bba0f; font-size:30px; margin-left:20px;'> average incident_rate:" + str(int(incident_rate)) + "</span>"+         
             "<span style='color: #9dba0f; font-size:30px; margin-left:20px;'> average mortality_rate: " + str(round(mortality_rate, 2)) + "</span>"+
             "</div>"))

# 3. DP: Worst Hit Countries

To protect the identity of worst hit countries, we present the following data in differentiallty private setting. You can publicly pressent worst hit countries' covid statistics without revealing further information about those countries. We present worst hit countries by letters only.  

In [100]:
def perturb(column):
    # 0 and 150 are the upper and lower limits for the search bound.
    mu = dp.mean(column, epsilon=1, bounds=(1, 10))
    sigma = dp.var(column, epsilon=1, bounds=(1, 10))
    for i in range(len(column)):
        noise = np.random.normal(mu, sigma)
        column[i] = column[i]+noise
    return column


In [101]:
country_df['confirmed'] = perturb(country_df['confirmed'])
sorted_country_df = country_df.sort_values('confirmed', ascending= False)
sorted_country_df.loc['country'] = sorted_country_df['country'].astype(str).str[0]
 

0      5.038885e+13
1      1.029060e+14
2     -1.102558e+14
3      3.315502e+12
4      1.037222e+12
           ...     
194   -2.713017e+13
195   -7.592421e+12
196   -7.751575e+13
197   -3.533880e+12
198   -4.815771e+13
Name: confirmed, Length: 199, dtype: float64
0      5.038885e+13
1      1.029060e+14
2     -1.102558e+14
3      3.315502e+12
4      1.037222e+12
           ...     
194   -2.713017e+13
195   -7.592421e+12
196   -7.751575e+13
197   -3.533880e+12
198   -4.815771e+13
Name: confirmed, Length: 199, dtype: float64




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [102]:
# plotting the worst hit countries

def bubble_chart(TOP):
    #for i in range(TOP):
     #   dp_max = private_max(1, country_df['confirmed'])
       
     #   sorted_country_df['confirmed'].iloc[i] = perturb(sorted_country_df['confirmed'].iloc[i])
    #sorted_country_df.loc['confirmed']=perturb(sorted_country_df.loc['confirmed'])   
    fig = px.scatter(sorted_country_df.head(TOP), x="country", y="confirmed", size="confirmed", color="country",
            hover_name="country", size_max=60)
    fig.update_layout(
    title=str(TOP) +" Worst Hit Countries",
    xaxis_title="Countries",
    yaxis_title="Confirmed Cases",
    width = 700
    )
    fig.show();

interact(bubble_chart, TOP=10)

ipywLayout = widgets.Layout(border='solid 2px green')
ipywLayout.display='none'
widgets.VBox([fig], layout=ipywLayout)


interactive(children=(IntSlider(value=10, description='TOP', max=30, min=-10), Output()), _dom_classes=('widge…

VBox(children=(FigureWidget({
    'data': [], 'layout': {'autosize': True, 'template': '...'}
}),), layout=Lay…

# 5. DP: Total Cases on a World Map

We present an interactive DP global map on confirmed cases, deaths and death rates. Although the map reveals rough locations of each country, the statistics are differentially private and the name of each country is represented by a letter only. By doing so, this dashboard mamizes data privacy of each country while providing useful information for health organizationss and researchers. 

You can click on the circles to view DP statistics of each country. 

In [91]:
confirmed_df['lat'] = confirmed_df['lat'].replace(np.nan, 0)
confirmed_df['long'] = confirmed_df['long'].replace(np.nan, 0)
confirmed_df['country'] = confirmed_df['country'].astype(str).str[0]

world_map = folium.Map(location=[11,0], tiles="cartodbpositron", zoom_start=2, max_zoom = 6, min_zoom = 2)


for i in range(0,len(confirmed_df)):
    
    folium.Circle(
        location=[confirmed_df.iloc[i]['lat'], confirmed_df.iloc[i]['long']],
        fill=True,
        radius=(int((np.log(confirmed_df.iloc[i,-1]+1.00001)))+0.2)*5000,
        color='red',
        fill_color='indigo',
        tooltip = "<div style='margin: 0; background-color: black; color: white;'>"+
                    "<h4 style='text-align:center;font-weight: bold'>"+confirmed_df.iloc[i]['country'] + "</h4>"
                    "<hr style='margin:10px;color: white;'>"+
                    "<ul style='color: white;;list-style-type:circle;align-item:left;padding-left:20px;padding-right:20px'>"+
                        "<li>Confirmed: "+str(dp_sum(confirmed_df.iloc[:,-1], 1))+"</li>"+
                        "<li>Deaths:   "+str(dp_sum(confirmed_df.iloc[:,-1], 1))+"</li>"+
                        "<li>Death Rate: "+ str(np.round(death_df.iloc[i,-1]/(confirmed_df.iloc[i,-1]+1.00001)*100,2))+ "</li>"+
                    "</ul></div>",
        ).add_to(world_map)

world_map
