# Libraries

In [None]:
#sudo pip install --upgrade ipykernel
#ipython3 notebook
import pandas as pd 
import csv
from datetime import datetime
import requests
import cufflinks as cf
from plotly.offline import iplot
import numpy as np
import plotly.graph_objects as go 
from io import open
from plotly.subplots import make_subplots

import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px


# Loading datasets

## Preprocessing JOHN HOPKINS

In [None]:
CSV_FILES = {
    "confirmed": "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv",
    "deaths": "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv",
    "recovered": "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv"
}

# Initialize the skeleton dict.
data_dict = dict()

# This dictionary will hold all our available dates.
dates_dict = dict()

# This set will hold all the countries/regions we find.
countries = set()

# We will load the first CSV url.
file = list(CSV_FILES.values())[0]

with requests.get(file) as response:

    # Pass the response text into a csv.DictReader object.
    reader = csv.DictReader(response.text.splitlines())

    # Extract the header row and select from the fifth column onwards.
    fields = reader.fieldnames[4:]

    # Convert the header row dates to datetime objects.
    for field in fields:
        dates_dict[field] = "{:%Y-%m-%d}".format(datetime.strptime(field, "%m/%d/%y"))

    # Extract the countries/regions by iterating over all rows.
    for row in reader:
        countries.add(row["Country/Region"])

    # Convert the countries set to a list and sort it.
    countries = sorted(list(countries))

    # Combine every date with every country and fill it with zero values.
    for date in dates_dict.values():

        for country in countries:

            temp_key = "{}_{}".format(date, country)
            data_dict[temp_key] = [0, 0, 0]

# Iterate over our 3 urls.
for kind, url in CSV_FILES.items():

    with requests.get(url) as response:

        # Pass the response text into a csv.DictReader object.
        reader = csv.DictReader(response.text.splitlines())

        # Iterate over each row of the CSV file.
        for row in reader:

            # Iterate over our available dates.
            for k, v in dates_dict.items():

                # Construct the key for our look up.
                temp_key = "{}_{}".format(v, row["Country/Region"])

                # Update the corresponding value depending on the CSV data kind.
                if kind == "confirmed":
                    data_dict[temp_key][0] += int(row[k])
                elif kind == "deaths":
                    data_dict[temp_key][1] += int(row[k])
                elif kind == "recovered":
                    data_dict[temp_key][2] += int(row[k])

# Save our data to a CSV file.
with open("datasets/global_data.csv", "w", encoding="utf-8", newline="") as other_file:

    # Initialize the data list with the header row.
    data_list = data_list = [
        ["isodate", "country", "confirmed", "deaths", "recovered"]]

    # Iterate over our data dict and pass the values to the data list.
    for k, v in data_dict.items():
        isodate, country = k.split("_")
        data_list.append([isodate, country, v[0], v[1], v[2]])

    csv.writer(other_file).writerows(data_list)


# Reading data and preprocessing data formats

In [None]:
df = pd.read_csv("datasets/global_data.csv")
df['isodate'] = pd.to_datetime(df['isodate'], format='%Y/%m/%d')
df = df.sort_values(by=['country', 'isodate'])
df["day"] = ""
df["measures"] = ""
num = 0
country = ""
for ind in df.index: 
    if num == 0 or country != df['country'][ind]:
        country = df['country'][ind]
        num = 1
        
    df['day'][ind] = num
    num += 1
    
df.head()

In [None]:
dfM = pd.read_csv("datasets/dataset_COVID 19 Containment measures data.csv")
dfMoriginal = dfM
dfM.head()

In [None]:
dfM = dfM[['Date Start','Country']]
dfM = dfM.dropna()

for ind in dfM.index: 
    if "United States" in dfM['Country'][ind] or "US:" in dfM['Country'][ind]: 
        dfM['Country'][ind] = "US"
        
dfM['Date Start'] = pd.to_datetime(dfM['Date Start'], format='%b %d, %Y')
dfM = dfM.sort_values(by=['Country', 'Date Start'])

dfM.head(20)

In [None]:
num = 0
country = ""
for indM in dfM.index: 
    if country != dfM['Country'][indM]:
        country = dfM['Country'][indM]
        num = 0    
    for ind in df.loc[df['country'] == country].index: 
        if df['isodate'][ind] == dfM['Date Start'][indM]:
            num = num + 1
            df['measures'][ind] = num
        elif num == 0 or df['isodate'][ind] > dfM['Date Start'][indM]:
            df['measures'][ind] = num

for ind in df.index: 
    if df['measures'][ind] == "": 
        df['measures'][ind] = 0
        
df["measures"] = df["measures"].to_list()

In [None]:
df.loc[df['country'] == "US"]

In [None]:
figVDD = px.scatter(df, 
           x="recovered", y="confirmed", 
           animation_frame="day", animation_group="country",
           size="measures", color="country", hover_name="country",
           size_max=60, 
           range_y=[20000,2000000], 
           range_x=[25000, 50000]
          )
figVDD.update_layout(title_text='Press play to watch the relation between recovered and confirmed cases .')


# BAR

In [None]:
list_measures = dfMoriginal[['Keywords']]
list_measures = list_measures.dropna()
list_measures = list_measures['Keywords'].str.split(', ', expand=True).stack()
list_measures.reset_index(inplace=True, drop=True)

# list_measures.unique()

In [None]:
data = list_measures.groupby(list_measures[:]).filter(lambda x: len(x) > 15)
lm = pd.DataFrame(data, columns = ['keywords'])
lm

In [None]:
data = list_measures.groupby(list_measures[:]).filter(lambda x: len(x) > 15)
lm = pd.DataFrame(data, columns = ['keywords'])

figBar = px.histogram(lm, x='keywords', 
             color='keywords', 
             labels={'keywords':'Measures Categories'})
figBar.update_layout(title_text='Mouse hover to see how much each measure category was implemented by countries.')


# Gráfico relação medidas e casos (barras verticais e linhas horizontais)

In [None]:
df = pd.read_csv("datasets/global_data.csv")
df = df.rename(columns={'country': 'Country'})
dfinal = df.merge(dfMoriginal, on="Country", how = 'inner')
dfinal.tail()

In [None]:
format_ = '%b %d, %Y' # The format 
shapes_ = {}
notes_ = {}

fig = go.Figure()
for country_ in dfinal['Country'].unique():
    dataCountryAtual = dfinal.loc[dfinal['Country'] == country_]
    dataCountryAtual = dataCountryAtual[dataCountryAtual['Date Start'].notna()]
    dataCountryAtual.sort_values("isodate", inplace = True) 

    
    #remove duplicados causados pelo merge das tabelas
    shapes_mit = dataCountryAtual.drop_duplicates(subset ="Date Start") 
    
    shapes_[country_] = []
    notes_[country_] = []
    
    for index, row in shapes_mit.iterrows(): 
        
        #formata data das mitigações pro mesmo formato das datas dos casos
        datetime_str = datetime.strptime(row['Date Start'], format_) 
        new_dt = datetime_str.strftime('%Y-%m-%d')
        #adiciona a um array de datas, as datas das medidas de mitifação pra plotar as barras verticais depois
        notes_[country_].append(new_dt)


In [None]:
dfinal.sort_values("Country", inplace = True) 

In [None]:
from plotly.subplots import make_subplots
fig = go.Figure()
fig = make_subplots(rows=1, cols=1, specs=[[{}]],
                          shared_xaxes=True, shared_yaxes=True,
                          vertical_spacing=0.001)

for country_label in dfinal['Country'].unique():
    dataCountryAtual = dfinal.loc[dfinal['Country'] == country_label]
    dataCountryAtual.sort_values("isodate", inplace = True) 

    #linhas com os números de casos
    trace1 = go.Scatter(
        x=dataCountryAtual['isodate'].tolist(),
        y=dataCountryAtual['confirmed'].tolist(),
        mode="lines",
        name=country_label + ' confirmed cases',
    )
    trace2 = go.Scatter(
        x=dataCountryAtual['isodate'].tolist(),
        y=dataCountryAtual['deaths'].tolist(),
        mode="lines",
        name=country_label + ' death cases',
    )
    #barras verticais com as medidas de mitigação de cada pais
    texto = dataCountryAtual['Description of measure implemented'].str.wrap(30)
    texto = texto.astype(str).apply(lambda x: x.replace('\n', '<br>'))
    
    trace3 = go.Bar(
            x= notes_[country_label],
            y= np.full(len(notes_[country_label]),dataCountryAtual['confirmed'].max()),
            text = texto,
            name=country_label +' measure implemented ',
            marker_color='lightsalmon'
        )
    
    fig.append_trace(trace1, 1, 1)
    fig.append_trace(trace2, 1, 1)
    fig.append_trace(trace3, 1, 1)

In [None]:
list_updatemenus = []
n = 0
for country_label in dfinal['Country'].unique():
    visible = [False] * (3 *len(dfinal['Country'].unique()))
    visible[n] = True
    n = n + 1
    visible[n] = True
    n = n + 1
    visible[n] = True
    n = n + 1
    temp_dict = dict(label = str(country_label),
                 method = 'restyle',
                 args = [{'visible': visible},
                         {'title': 'Country  %s' % country_label}
                        ])
    list_updatemenus.append(temp_dict)

#adiciona menu dropdown
fig.update_layout(title = "Select de country in dropdown menu "+
                  "and pass the cursor hover de bars and see datails of each measure implemented.",
    updatemenus=[
        dict(
            type = "dropdown",
            direction = "down",
            buttons= list_updatemenus)])


In [None]:
import pandas as pd

df_ts_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df_ts_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
df_ts_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

# print(df_ts_confirmed.head())

df_ts_confirmed = df_ts_confirmed.drop(columns=["Lat", "Long"])
df_ts_deaths = df_ts_deaths.drop(columns=["Lat", "Long"])
df_ts_recovered = df_ts_recovered.drop(columns=["Lat", "Long"])

df_ts_confirmed.fillna("", inplace=True)
df_ts_deaths.fillna("", inplace=True)
df_ts_recovered.fillna("", inplace=True)

# print(df_ts_confirmed.head())

df_ts_confirmed.rename(columns={"Country/Region":"country_region", "Province/State":"province_state", "Lat":"lat","Long":"long"  }, inplace=True)
df_ts_deaths.rename(columns={"Country/Region":"country_region", "Province/State":"province_state" }, inplace=True)
df_ts_recovered.rename(columns={"Country/Region":"country_region", "Province/State":"province_state" }, inplace=True)

df_ts_confirmed["country_id"] = df_ts_confirmed["country_region"].str.lower().replace(' ', '_', regex=True) + '_' + df_ts_confirmed["province_state"].str.lower().replace(' ', '_', regex=True)
df_ts_deaths["country_id"] = df_ts_deaths["country_region"].str.lower().replace(' ', '_', regex=True) + '_' + df_ts_deaths["province_state"].str.lower().replace(' ', '_', regex=True)
df_ts_recovered["country_id"] = df_ts_recovered["country_region"].str.lower().replace(' ', '_', regex=True) + '_' + df_ts_recovered["province_state"].str.lower().replace(' ', '_', regex=True)


df_confirmed = df_ts_confirmed.melt(id_vars=["country_id"], var_name="date", value_name="confirmed")
df_deaths = df_ts_deaths.melt(id_vars=["country_id"], var_name="date", value_name="deaths")
df_recovered = df_ts_recovered.melt(id_vars=["country_id"], var_name="date", value_name="recovered")

# columns = df_ts_confirmed.columns
# print(columns)

df_total_confirmed = df_ts_confirmed[['7/1/20']].sum()
print(df_total_confirmed)
df_total_deaths = df_ts_deaths[['7/1/20']].sum()
print(df_total_deaths)
df_total_recovered = df_ts_recovered[['7/1/20']].sum()
print(df_total_recovered)
total_cases_world = df_total_confirmed['7/1/20']
print(total_cases_world)
df_total_active = df_total_confirmed['7/1/20'] - df_total_recovered['7/1/20']
print(df_total_active)
total_recovered_world = df_total_recovered['7/1/20']
total_deaths_world = df_total_deaths['7/1/20']

# Plot map with cases

In [None]:
dfinal.sort_values("isodate", inplace = True)

figMap = go.Figure()

figMap = px.scatter_geo(dfinal, locations = "Country",
                    locationmode = 'country names', 
                      hover_name="Country",
                     color="Country", 
                     size="confirmed",
                     animation_frame="isodate",
                     projection="natural earth", size_max=100)
figMap.update_layout(title='Press play to see on the map the cases evolution since the starts cases in China.')

# Run dashboard

In [None]:
import dash_core_components as dcc
import dash_html_components as html
import pandas as pd
import numpy as np
import calendar
import datetime as dt
from dash.dependencies import Input, Output
import plotly.graph_objects as go

app = dash.Dash()

page_layout = html.Div(children= [

html.Div(children=[

html.Div(children=[

html.Div(children=[html.Center([
html.H3("COVID-19 containment and mitigation measures and cases evolution in global scale"),
html.H3("Dashboard"),
html.H4("Developed by Gabriel Leal, Giovani Betonni and Marilia Silveira to the course Data Visualization PPGCC-PUCRS")])], style= {'width':'22%','display':'inline-block','font-family': "Open Sans"}, className= 'four columns'),

html.Div(children=[

html.Center([
html.B(children= [html.H3(format(total_cases_world,','), style={'color':'#FF3333', 'font-family': "Open Sans"})]),
html.H5("Total cases in the world")])], style= {'width':'22%',
				'marginLeft':'8%',
				'display':'inline-block',
				'border':'thin lightgrey solid',
				'box-shadow': '6px 6px 6px lightgrey',
				'background-color': '#E5ECF6',
			}, className= 'four columns'),

html.Div(children=[

html.Center([
					html.B(children= [html.H3(format(int(total_recovered_world),','), style={'color':'#FF3333'})]),
					html.H5("Total recoveries"),
				])

			], style= {
				'width':'22%',
				'marginLeft':'1%',
				'display':'inline-block',
				'border':'thin lightgrey solid',
				'box-shadow': '6px 6px 6px lightgrey',
				'background-color': '#E5ECF6',
			}, className= 'four columns'),
html.Div(children=[

				html.Center([
					html.B(children= [html.H3(format(total_deaths_world,','), style={'color':'#FF3333'})]),
					html.H5("Total deaths"),
				])

			], style= {
				'width':'22%',
				'display':'inline-block',
				'marginLeft':'1%',
				'border':'thin lightgrey solid',
				'box-shadow': '6px 6px 6px lightgrey',
				'background-color': '#E5ECF6',
			}, className= 'four columns')

		], style= {
			'display':'inline-block',
			'width':'100%',
		}, className= 'row')

	], style= {
		'display':'inline-block',
		'width':'100%',
		'textAlign':'right'
	}),

dcc.Graph(
        id='vdd',
        figure=figVDD
    ),

   dcc.Graph(
       id='bar', 
       figure=figBar
   ),
    dcc.Graph(
       id='subplot', 
       figure=fig
   ),
    dcc.Graph(
       id='map', 
       figure=figMap
   )
    
], style= {'width':'100%','backgroundColor':'white'})


app.layout = page_layout

app.run_server(debug=False, use_reloader=False)  # Turn off reloader if inside Jupyter