In [25]:
#######################
# IMPORTING LIBRARIES #
#######################

import pandas as pd
import numpy as np
from datetime import datetime

import pycountry  # To convert country names to 3 letter code
import webbrowser

import plotly.graph_objs as go
import plotly.express as px
from plotly.subplots import make_subplots

import pickle

In [2]:
################
# DATA PARSING #
################

# Functions for gathering datasets from John Hopkins's repository
base_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"

def worldData(fileName, columnName):
    data = pd.read_csv(base_url + fileName) \
             .melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], var_name='date', value_name=columnName) \
             .fillna('<all>')
    #data['date'] = data['date'].astype('datetime64[ns]')
    return data

def usData(fileName, columnName):
    data = pd.read_csv(base_url + fileName) \
             .melt(id_vars=['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_', 'Combined_Key'], var_name='date', value_name=columnName) \
             .fillna('<all>')
    # Get names of indexes for which column Date has extra strings has value 30
    indexNames = data[data['date'] == 'Population'].index
    # Delete these row indexes from dataFrame
    data.drop(indexNames, inplace=True)
    #data['date'] = data['date'].astype('datetime64[ns]')
    return data

########################

# US data
us_data = usData("time_series_covid19_confirmed_US.csv", "Confirmed") \
    .merge(usData("time_series_covid19_deaths_US.csv", "Deaths"))
us_data.head()

# World data
world_data = worldData("time_series_covid19_confirmed_global.csv", "Confirmed") \
    .merge(worldData("time_series_covid19_deaths_global.csv", "Deaths"))
world_data.head()

########################

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,Confirmed,Deaths
0,<all>,Afghanistan,33.0,65.0,1/22/20,0,0
1,<all>,Albania,41.1533,20.1683,1/22/20,0,0
2,<all>,Algeria,28.0339,1.6596,1/22/20,0,0
3,<all>,Andorra,42.5063,1.5218,1/22/20,0,0
4,<all>,Angola,-11.2027,17.8739,1/22/20,0,0


In [3]:
us_data.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,date,Confirmed,Deaths
0,16,AS,ASM,16,60,<all>,American Samoa,US,-14.271,-170.132,"American Samoa, US",1/22/20,0,0
1,316,GU,GUM,316,66,<all>,Guam,US,13.4443,144.7937,"Guam, US",1/22/20,0,0
2,580,MP,MNP,580,69,<all>,Northern Mariana Islands,US,15.0979,145.6739,"Northern Mariana Islands, US",1/22/20,0,0
3,630,PR,PRI,630,72,<all>,Puerto Rico,US,18.2208,-66.5901,"Puerto Rico, US",1/22/20,0,0
4,850,VI,VIR,850,78,<all>,Virgin Islands,US,18.3358,-64.8963,"Virgin Islands, US",1/22/20,0,0


In [4]:
world_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,Confirmed,Deaths
0,<all>,Afghanistan,33.0,65.0,1/22/20,0,0
1,<all>,Albania,41.1533,20.1683,1/22/20,0,0
2,<all>,Algeria,28.0339,1.6596,1/22/20,0,0
3,<all>,Andorra,42.5063,1.5218,1/22/20,0,0
4,<all>,Angola,-11.2027,17.8739,1/22/20,0,0


In [5]:
world_data.max()

Province/State    Zhejiang
Country/Region    Zimbabwe
Lat                71.7069
Long               178.065
date                4/6/20
Confirmed           366614
Deaths               16523
dtype: object

In [6]:
# Transforming datasets
grouped_countries = world_data.groupby(['date','Country/Region', 'Province/State']).agg({'Confirmed': 'max', 'Deaths': 'max'}).reset_index()
grouped_states = us_data.groupby([ 'date', 'Province_State', 'FIPS', 'Lat', 'Long_']).agg({'Confirmed': 'max', 'Deaths': 'max'}).reset_index()

In [7]:
grouped_countries.head()

Unnamed: 0,date,Country/Region,Province/State,Confirmed,Deaths
0,1/22/20,Afghanistan,<all>,0,0
1,1/22/20,Albania,<all>,0,0
2,1/22/20,Algeria,<all>,0,0
3,1/22/20,Andorra,<all>,0,0
4,1/22/20,Angola,<all>,0,0


In [8]:
grouped_states.head()

Unnamed: 0,date,Province_State,FIPS,Lat,Long_,Confirmed,Deaths
0,1/22/20,Alabama,1001,32.539527,-86.644082,0,0
1,1/22/20,Alabama,1003,30.72775,-87.722071,0,0
2,1/22/20,Alabama,1005,31.868263,-85.387129,0,0
3,1/22/20,Alabama,1007,32.996421,-87.125115,0,0
4,1/22/20,Alabama,1009,33.982109,-86.567906,0,0


In [9]:
# Opening pickled dictionary with demographics data of percentage of population over 65
with open('./pickled_files/age_dict.pkl', 'rb') as f:
    over_65_dict = pickle.load(f)
#over_65_dict

In [10]:
# Opening pickled dictionary with country data
with open('./pickled_files/dev_index_dict.pkl',  'rb') as f:
    dev_index_dict = pickle.load(f)
#dev_index_dict

In [11]:
##############################################
# CODE TO ADD COUNTRY CODES TO ALL COUNTRIES #
##############################################

# import pycountry # To convert country names to 3 letter code

# # Finding 3 letter codes of countries in our dataset
# input_countries = grouped_countries['Country/Region']

# countries = {}
# for country in pycountry.countries:
#     countries[country.name] = country.alpha_3

# codes = [countries.get(country, 'Unknown code') for country in input_countries]

# print(codes)
# grouped_countries['code'] = codes
# grouped_countries.head()

# # Generating a dictionary with countries and codes
# # First finding all countries with codes
# only_countries = grouped_countries.groupby(['Country/Region', 'code']).agg({'Confirmed': 'max', 'Deaths': 'max'}).reset_index()
# with_codes = only_countries[only_countries.code != 'Unknown code'][['Country/Region', 'code']]
# country_code_dict = dict(zip(with_codes['Country/Region'], with_codes['code']))

# with open('pickled_files/country_code_dict.pkl', 'wb') as f:
#     pickle.dump(country_code_dict, f)
# #country_code_dict

# # Checking for missing countries with missing codes
# no_codes = grouped_countries[grouped_countries.code == 'Unknown code'][['Country/Region']]
# countries_to_augment = no_codes['Country/Region'].unique()
# countries_to_augment

# Manually adding missing codes
# codes_to_add = ['BOL', 'BWN', 'MMR', 'COG', 'COD', 'CIV', 'UNK', 'VAT', 'IRN', 'KOR', 'RKS', 'LAO', 'UNK', 'MDA', 'RUS', 'SYR', 'TWN', 'TZA', 'USA', 'VEN', 'VNM', 'PSE']
# augmentation_dict = dict(zip(countries_to_augment, codes_to_add))
# #augmentation_dict

# # Augmenting previous dictionary
# complete_country_code_dict = {**augmentation_dict, **country_code_dict} 
# with open('pickled_files/complete_country_code_dict.pkl', 'wb') as f:
#     pickle.dump(complete_country_code_dict, f)

# #complete_country_code_dict


In [12]:
# Opening pickled country code dictionary
complete_country_code_dict = pd.read_pickle('pickled_files/complete_country_code_dict.pkl')

# Mapping codes to countries in dataset
grouped_countries['code'] = grouped_countries['Country/Region'].map(complete_country_code_dict)
# Double checking missing values
missing_codes = len(grouped_countries[grouped_countries.code == 'Unknown code'][['Country/Region']])
print(f'There are {missing_codes} missing 3-letter codes in dataset')
print('-------')
grouped_countries.head()

There are 0 missing 3-letter codes in dataset
-------


Unnamed: 0,date,Country/Region,Province/State,Confirmed,Deaths,code
0,1/22/20,Afghanistan,<all>,0,0,AFG
1,1/22/20,Albania,<all>,0,0,ALB
2,1/22/20,Algeria,<all>,0,0,DZA
3,1/22/20,Andorra,<all>,0,0,AND
4,1/22/20,Angola,<all>,0,0,AGO


In [45]:
##############
# CHOROPLETH #
##############

global_data = grouped_countries.groupby(['Country/Region', 'code', 'date']).agg({'Confirmed': 'max', 'Deaths': 'max'}).reset_index()

def world_map():
    

    fig = px.choropleth(
                            global_data,
                            locations='code',
                            #title="Custom layout.hoverlabel formatting",
                            hover_name="Country/Region",
                            hover_data=["Confirmed", "Deaths"],
                            color=np.log10(global_data["Confirmed"]),
                            color_continuous_scale='Reds',
                            #range_color=(0, 100),
                            labels={
                                'Confirmed': 'Confirmed Cases <br> (x10) <br>',
                                'Deaths': 'Deaths'
                            },
                            #featureidkey="global_data.Deaths",
                            scope='world',
    #                    animation_frame='code'
                       )
    fig.update_layout(template="plotly_dark", margin={
            "r": 0,
            "t": 0,
            "l": 0,
            "b": 0
        }, # Set to True to show colorscale bar
                          coloraxis_colorbar=dict(
                              title="<b>Confirmed Cases</b> <br>" + "(Log Scale)",
                              tickvals=[1.5, 2.5, 3.5, 4.5],
                              ticktext=["100", "1k", "10k", "100k"],
                              thicknessmode="pixels",
                              thickness=10,
                              lenmode="pixels",
                              len=200,
                          ),
                          hovermode="x",
                          hoverlabel=dict(
                              bgcolor="#BF4025",
                              font_size=16,
                          ),
                          geo=dict(showframe=False,
                                   showcoastlines=False,
                                   projection_type='natural earth'))

    fig.update_traces(hovertemplate='<b>' + global_data['Country/Region'] +
                          '</b>' + '<br>' + 'Confirmed Cases: ' +
                          global_data['Confirmed'].astype(str) + '<br>' +
                          'Deaths: ' + global_data['Deaths'].astype(str))
    return fig

In [47]:
#world_map()

In [18]:
###############
# TOTAL CASES #
###############

total_confirmed = global_data.Confirmed.sum()
total_deaths = global_data.Deaths.sum()
print(f'Total Confirmed Cases: {total_confirmed}')
print(f'Total Deaths: {total_deaths}')

Total Confirmed Cases: 1317098
Total Deaths: 74200


In [44]:
# Last time data was updated
last_update = grouped_countries.date.max()
conv_date = datetime.strptime(last_update, '%d/%m/%y')
print(conv_date)

2020-06-04 00:00:00


In [None]:
#################
# RACING BAR CHARTS #

fig = px.bar(grouped_countries, x="Country/Region", y="Deaths",
  animation_frame="date", animation_group="Country/Region", range_y=[0,20000])
fig.show()

In [None]:
global_data = grouped_countries.groupby(['Country/Region', 'code']).agg({'Confirmed': 'max', 'Deaths': 'max'}).reset_index()
fig = px.scatter_geo(global_data, locations="code", color="Confirmed",
                     hover_name="Country/Region", size="Deaths",
                     projection="natural earth")

fig.update_layout(template="plotly_dark", margin={
            "r": 0,
            "t": 0,
            "l": 0,
            "b": 0
        },coloraxis_showscale=False,
                          coloraxis_colorbar=dict(
#                               title="<b>Confirmed Cases</b> <br>" + "(Log Scale)",
#                               tickvals=[1.5, 2.5, 3.5, 4.5],
#                               ticktext=["100", "1k", "10k", "100k"],
                              thicknessmode="pixels",
                              thickness=10,
                              lenmode="pixels",
                              len=200,
                          ),
                          hovermode="x",
                          hoverlabel=dict(
                              bgcolor="#BF4025",
                              font_size=16,
                          ),
                          geo=dict(showframe=False,
                                   showcoastlines=False,
                                   projection_type='natural earth'))

fig.update_traces(hovertemplate='<b>' + global_data['Country/Region'] +
                          '</b>' + '<br>' + 'Confirmed Cases: ' +
                          global_data['Confirmed'].astype(str) + '<br>' +
                          'Deaths: ' + global_data['Deaths'].astype(str))

fig.show()

In [None]:
global_data.head()

In [None]:
# days = list(grouped_countries.date)

# # make list of countries
# countries = []
# for c in grouped_countries["Country/Region"]:
#     if c not in countries:
#         countries.append(c)
# # make figure
# fig_dict = {
#     "data": [],
#     "layout": {},
#     "frames": []
# }

# # fill in most of layout
# fig_dict["layout"]["xaxis"] = {"range": [30, 85], "title": "Life Expectancy"}
# fig_dict["layout"]["yaxis"] = {"title": "GDP per Capita", "type": "log"}
# fig_dict["layout"]["hovermode"] = "closest"
# fig_dict["layout"]["sliders"] = {
#     "args": [
#         "transition", {
#             "duration": 400,
#             "easing": "cubic-in-out"
#         }
#     ],
#     "initialValue": "1952",
#     "plotlycommand": "animate",
#     "values": days,
#     "visible": True
# }
# fig_dict["layout"]["updatemenus"] = [
#     {
#         "buttons": [
#             {
#                 "args": [None, {"frame": {"duration": 500, "redraw": False},
#                                 "fromcurrent": True, "transition": {"duration": 300,
#                                                                     "easing": "quadratic-in-out"}}],
#                 "label": "Play",
#                 "method": "animate"
#             },
#             {
#                 "args": [[None], {"frame": {"duration": 0, "redraw": False},
#                                   "mode": "immediate",
#                                   "transition": {"duration": 0}}],
#                 "label": "Pause",
#                 "method": "animate"
#             }
#         ],
#         "direction": "left",
#         "pad": {"r": 10, "t": 87},
#         "showactive": False,
#         "type": "buttons",
#         "x": 0.1,
#         "xanchor": "right",
#         "y": 0,
#         "yanchor": "top"
#     }
# ]

# sliders_dict = {
#     "active": 0,
#     "yanchor": "top",
#     "xanchor": "left",
#     "currentvalue": {
#         "font": {"size": 20},
#         "prefix": "Day:",
#         "visible": True,
#         "xanchor": "right"
#     },
#     "transition": {"duration": 300, "easing": "cubic-in-out"},
#     "pad": {"b": 10, "t": 50},
#     "len": 0.9,
#     "x": 0.1,
#     "y": 0,
#     "steps": []
# }

# # make data
# day = '1/22/20'
# for c in countries:
#     dataset_by_day = grouped_countries[grouped_countries["date"] == day]
#     dataset_by_day_and_cont = dataset_by_day[
#         dataset_by_day["Country/Region"] == c]

#     data_dict = {
#         "x": list(dataset_by_day_and_cont["date"]),
#         "y": list(dataset_by_day_and_cont["Confirmed"]),
#         "mode": "markers",
# #         "text": list(dataset_by_day_and_cont["country"]),
# #         "marker": {
# #             "sizemode": "area",
# #             "sizeref": 200000,
# #             "size": list(dataset_by_day_and_cont["pop"])
# #         },
#         "name": c
#     }
#     fig_dict["data"].append(data_dict)

# # make frames
# for d in days:
#     frame = {"data": [], "name": str(d)}
#     for c in countries:
#         dataset_by_day = grouped_countries[grouped_countries["date"] == d]
#         dataset_by_day_and_cont = dataset_by_day[
#             dataset_by_day["Country/Region"] == c]

#         data_dict = {
#             "x": list(dataset_by_day_and_cont["date"]),
#             "y": list(dataset_by_day_and_cont["Confirmed"]),
#             "mode": "markers",
#             "text": list(dataset_by_day_and_cont["Country/Region"]),
#             "marker": {
#                 "sizemode": "area",
#                 "sizeref": 200000,
#                 "size": list(dataset_by_day_and_cont["Confirmed"])
#             },
#             "name": c
#         }
#         frame["data"].append(data_dict)

#     fig_dict["frames"].append(frame)
#     slider_step = {"args": [
#         [day],
#         {"frame": {"duration": 300, "redraw": False},
#          "mode": "immediate",
#          "transition": {"duration": 300}}
#     ],
#         "label": day,
#         "method": "animate"}
#     sliders_dict["steps"].append(slider_step)


# fig_dict["layout"]["sliders"] = [sliders_dict]

# fig = go.Figure(fig_dict)

# fig.show()

In [None]:
# fig = go.Figure(
#     data=[go.Scatter(x=[0, 1], y=[0, 1])],
#     layout=go.Layout(
#         xaxis=dict(range=[0, 5], autorange=False),
#         yaxis=dict(range=[0, 5], autorange=False),
#         title="Start Title",
#         updatemenus=[dict(
#             type="buttons",
#             buttons=[dict(label="Play",
#                           method="animate",
#                           args=[None])])]
#     ),
#     frames=[go.Frame(data=[go.Scatter(x=[1, 2], y=[1, 2])]),
#             go.Frame(data=[go.Scatter(x=[1, 4], y=[1, 4])]),
#             go.Frame(data=[go.Scatter(x=[3, 4], y=[3, 4])],
#                      layout=go.Layout(title_text="End Title"))]
# )

# fig.show()

In [None]:
# global_data = grouped_countries.groupby(['Country/Region', 'code']).agg({'Confirmed': 'max', 'Deaths': 'max'}).reset_index()
# global_data['deathRate'] = global_data['Deaths']/global_data['Confirmed']
# death_rates = global_data[(global_data['Deaths'] > 0) & (global_data['Confirmed'] >1000)]

# # Country
# # country = grouped_country['Country/Region']


# # Initialize figure with subplots
# fig = make_subplots(
#     rows=2, cols=2,
#     column_widths=[0.6, 0.4],
#     row_heights=[0.5, 0.5],
#     specs=[[{"type": "choropleth", "rowspan":2}, {"type": "bar"}],
#            [                 None               , {"type": "surface"}]])

# # fig.add_trace(
# #     px.choropleth(grouped_countries, locations='code',
# #                     hover_data=["Confirmed", "Deaths"],
# #                     color='Confirmed',
# #                     range_color=(0, 25000),
# #                     color_continuous_scale='orrd', 
# #                     labels={'Confirmed': 'Confirmed Cases (x10)', 'Deaths': 'Deaths'}, 
# #                     featureidkey="grouped_countries.Deaths",
# #                     scope='world'))

# # Add scattergeo globe map
# fig.add_trace(
#     go.Choropleth(
#         locations = global_data['code'],
#         z = global_data['Confirmed'],
# #         text = global_data['Deaths'],
#         colorscale = 'Reds',
#         autocolorscale=False,
#         reversescale=False,
#         marker_line_color='white',
#         marker_line_width=0.2,
#         colorbar_tickprefix = '',
#         colorbar_title = ''
#     ), row=1, col=1
# )

# #Add death rate bar chart
# fig.add_trace(
#     go.Bar(x=death_rates["Country/Region"],y=death_rates["deathRate"], marker=dict(color="crimson"), showlegend=False),
#     row=1, col=2
# )


# fig.update_layout(template="plotly_dark", showlegend=False, xaxis={'categoryorder':'total descending'},
#     annotations=[
#         dict(
# text="Data sources: WHO, CDC, ECDC, NHC, DXY, 1point3acres, Worldometers.info, BNO, state and national government health departments, and local media reports.",
#             showarrow=False,
#             xref="paper",
#             yref="paper",
#             x=0,
#             y=0)
#     ],
#         coloraxis_showscale=False, 
#         margin={
#         "r": 0,
#         "t": 20,
#         "l": 0,
#         "b": 0
#     },
#         coloraxis_colorbar=dict(
#             title="<b>Confirmed Cases</b> <br>" + "(Log Scale)",
#             tickvals=[1.5, 2.5, 3.5, 4.5],
#             ticktext=["100", "1k", "10k", "100k"],
#             thicknessmode="pixels",
#             thickness=10,
#             lenmode="pixels",
#             len=200,
#                       ),
#         hovermode="x",
#         hoverlabel=dict(
#             bgcolor="#BF4025",
#             font_size=16,
#                       )
#                  )

# # Update geo subplot properties
# fig.update_geos(
#     projection_type="natural earth",
#     landcolor="lightgrey",
#     oceancolor="MidnightBlue",
#     showocean=False,
#     lakecolor="white",
#     showframe=False,
#     showcoastlines=True,
#     coastlinewidth=0.2
# )


# # # Add 3d surface of volcano
# # fig.add_trace(
# #     go.Surface(z=df_v.values.tolist(), showscale=False),
# #     row=2, col=2
# # )


# # # Rotate x-axis labels
# fig.update_xaxes()
# fig.update_yaxes(title="Current Death Rates <br> (>1000 cases)}", tickfont=dict(family='Rockwell', color='crimson', size=10))

# #Set theme, margin, and annotation in layout
# # fig.update_layout(showlegend=False, xaxis={'categoryorder':'total descending'}, coloraxis_showscale=False,
# # #             coloraxis_colorbar=dict(
# # #     title="Number of Bills per Cell",
# # #     thicknessmode="pixels", thickness=10,
# # #     lenmode="pixels", len=50,
# # #     yanchor="top", y=1,
# # #     ticks="outside", ticksuffix=" bills",
# # #     dtick=5),      
# #     template="plotly_dark",
# #     margin=dict(r=0, t=0, b=0, l=0),
# #     annotations=[
# #         dict(
# # text="Source: mjkjk",
# #             showarrow=False,
# #             xref="paper",
# #             yref="paper",
# #             x=0,
# #             y=0)
# #     ]
# # )
# fig.update_traces(hovertemplate='<b>' + global_data['Country/Region'] +
#                       '</b>' + '<br>' + 'Confirmed Cases: ' +
#                       [str((f"{num:,d}")) for num in global_data['Confirmed']] + '<br>' +
#                       'Deaths: ' + [str((f"{num:,d}")) for num in global_data['Deaths']] + '<br>' + 
#                       'Current Death Rate: ' + [str(round(num*100, 2)) for num in global_data['deathRate']] + '%' + 
#                       '<extra></extra>')
