<img width="10%" alt="Naas" src="https://landen.imgix.net/jtci2pxwjczr/assets/5ice39g4.png?w=160"/>

# Covid-19 Active Cases World Map

# Input

## Import Libraries

In [16]:
import naas
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

## Variables

In [17]:
# URLs of the raw csv dataset
urls = [
    'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv',
    'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv',
    'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
]

confirmed_df, deaths_df, recovered_df = tuple(pd.read_csv(url) for url in urls)

In [18]:
title = "Worldmap"

# Output paths
output_image = f"{title}.png"
output_html = f"{title}.html"

### Get Data
Mostly adopted from this [COVID19 Data Processing Tutorial](https://towardsdatascience.com/covid-19-data-processing-58aaa3663f6)

Clean the dataset to show the cases by country

Steps:

1. Convert from Wide to Long Dataframe (Convert all datetimes to a single column)

2. Merge/Join the Confirmed, Deaths and Recovered tables into a single table

3. Converting Date from string to datetime

4. Replacing missing values/NaNs
5. Coronavirus cases reported from 3 cruise ships should be treated differently and adjustments need to be made for Canada (deciding to drop Canada due to missing recovery data)

6. Get Active Cases = Confirmed - Deaths - Recovered


In [19]:
#Wide to Long DataFrame conversion
dates = confirmed_df.columns[4:]
confirmed_df_long = confirmed_df.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Confirmed'
)
deaths_df_long = deaths_df.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Deaths'
)
recovered_df_long = recovered_df.melt(
    id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], 
    value_vars=dates, 
    var_name='Date', 
    value_name='Recovered'
)

# Adjust for Canada
recovered_df_long = recovered_df_long[(recovered_df_long['Country/Region']!='Canada')]

In [20]:
# Join into one single dataframe/table
# Merging confirmed_df_long and deaths_df_long
full_table = confirmed_df_long.merge(
  right=deaths_df_long, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)
# Merging full_table and recovered_df_long
full_table = full_table.merge(
  right=recovered_df_long, 
  how='left',
  on=['Province/State', 'Country/Region', 'Date', 'Lat', 'Long']
)

# Convert date strings to actual dates
full_table['Date'] = pd.to_datetime(full_table['Date'])
# Handle some missing values / NaNs
full_table['Recovered'] = full_table['Recovered'].fillna(0).astype('int64')



In [21]:
full_table.isna().sum()
# full_table.dtypes

Province/State    137802
Country/Region         0
Lat                 1428
Long                1428
Date                   0
Confirmed              0
Deaths                 0
Recovered              0
dtype: int64

In [22]:
# Adjust for Canada and 3 cruise ships
ship_rows = full_table['Province/State'].str.contains('Grand Princess') | full_table['Province/State'].str.contains('Diamond Princess') | full_table['Country/Region'].str.contains('Diamond Princess') | full_table['Country/Region'].str.contains('MS Zaandam')
full_ship = full_table[ship_rows]
full_table = full_table[~(ship_rows)]

# Add one more entry for each day to get the entire world's counts/totals
world_dict = {"Country/Region": "World", "Confirmed": pd.Series(full_table.groupby(['Date'])['Confirmed'].sum()), "Deaths": pd.Series(full_table.groupby(['Date'])['Deaths'].sum()),"Recovered": pd.Series(full_table.groupby(['Date'])['Recovered'].sum())}
world_df = pd.DataFrame.from_dict(world_dict).reset_index()
print(world_df.columns)
full_table = pd.concat([full_table, world_df], ignore_index=True)

Index(['Date', 'Country/Region', 'Confirmed', 'Deaths', 'Recovered'], dtype='object')


In [23]:
# Active Cases = Confirmed - Deaths - Recovered
full_table['Active'] = full_table['Confirmed'] - full_table['Deaths'] - full_table['Recovered']

full_grouped = full_table.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [24]:
len(full_grouped["Country/Region"].unique())

195

In [33]:
# Go back from long to wide for viz purposes
df = full_grouped
df.rename(columns={"Country/Region": "Country"}, inplace=True)
df_confirmed = df[["Date", "Country", "Confirmed"]]
df_deaths = df[["Date", "Country", "Deaths"]]
df_active = df[["Date", "Country", "Active"]]
df_recovered = df[["Date", "Country", "Recovered"]]

df_confirmed = df_confirmed.pivot(index="Date", columns="Country", values="Confirmed")
df_deaths = df_deaths.pivot(index="Date", columns="Country", values="Deaths")
df_recovered = df_recovered.pivot(index="Date", columns="Country", values="Recovered")
df_active = df_active.pivot(index="Date", columns="Country", values="Active")

def create_layout_button(df, column):
    first, latest = df.index.values[0], df.index.values[-1]
    return dict(label = column,
                method = 'update',
                args = [{'visible': df.columns.isin([column]),
                         'title': column,
                         'xaxis.range': [first, latest],
                         'showlegend': True
                        }])

def multi_plot(df, title, addAll = True):
    first, latest = df.index.values[0], df.index.values[-1]
    fig = go.Figure()

    for column in df.columns.to_list():
        fig.add_trace(
            go.Scatter(
                x = df.index,
                y = df[column],
                name = column
            )
        )

    button_all = dict(label = 'All',
                  method = 'update',
                  args = [{'visible': df.columns.isin(df.columns),
                           'title': 'All',
                           'xaxis.range': [first, latest],
                           'showlegend':True}])
    
    # Need "World" to be the default choice if "All" is not shown
    button_world = create_layout_button(df, "World")

    fig.update_layout(
        updatemenus=[{
            "active": 0,
            "buttons": ([button_all] * addAll) + [button_world] + [create_layout_button(df, column) for column in df.columns if column != "World"],
            "showactive": True
            }
        ],
        yaxis_type="log"
    )
    
    # Update remaining layout properties
    fig.update_layout(
        title_text=title,
#         annotations=[dict(
#             text="Country:",
#             x=0, y=0
#         )]
    )
   
    fig.show()

# test_df_active = df_active.swapaxes("index", "columns")
test_df_active = df_active
latest = test_df_active.index.values[-1]
print(latest)
test_df_active


2022-01-04T00:00:00.000000000


Country,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,West Bank and Gaza,World,Yemen,Zambia,Zimbabwe
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-22,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,510,0,0,0
2020-01-23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,605,0,0,0
2020-01-24,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,876,0,0,0
2020-01-25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,2,0,1350,0,0,0
2020-01-26,0,0,0,0,0,0,0,0,4,0,...,0,0,0,0,2,0,2006,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-31,150728,207007,212156,23600,79823,4164,5537239,336958,400701,1264886,...,407213,197418,6,439307,1698863,464829,282756994,8142,250540,208254
2022-01-01,150751,207007,212534,23600,80626,4164,5557247,337005,460698,1268478,...,408122,197545,6,439497,1713482,464829,283868449,8143,254208,209197
2022-01-02,150828,207665,212868,23600,81148,4164,5577726,337030,490297,1271758,...,409074,197633,6,439639,1730209,464829,284743002,8146,255934,209197
2022-01-03,150819,207665,213234,24362,81989,4164,5622081,337053,535038,1275070,...,410793,197697,6,439747,1745955,464829,287172958,8154,257468,211040


In [34]:
# Getting the world map data for plotting
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,212,213,214,215,216,217,218,219,220,221
COUNTRY,Afghanistan,Albania,Algeria,American Samoa,Andorra,Angola,Anguilla,Antigua and Barbuda,Argentina,Armenia,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Virgin Islands,West Bank,Yemen,Zambia,Zimbabwe
GDP (BILLIONS),21.71,13.4,227.8,0.75,4.8,131.4,0.18,1.24,536.2,10.88,...,55.6,63.08,0.82,209.2,187.8,5.08,6.64,45.45,25.61,13.74
CODE,AFG,ALB,DZA,ASM,AND,AGO,AIA,ATG,ARG,ARM,...,URY,UZB,VUT,VEN,VNM,VGB,WBG,YEM,ZMB,ZWE


In [39]:
result = pd.concat([test_df_active, df.T], axis=0)
result

Unnamed: 0,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,212,213,214,215,216,217,218,219,220,221
2020-01-22 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2020-01-23 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2020-01-24 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2020-01-25 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2020-01-26 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-01-03 00:00:00,150819.0,207665.0,213234.0,24362.0,81989.0,4164.0,5622081.0,337053.0,535038.0,1275070.0,...,,,,,,,,,,
2022-01-04 00:00:00,150840.0,208797.0,213650.0,24661.0,82888.0,4367.0,5703242.0,337140.0,606513.0,1280541.0,...,,,,,,,,,,
COUNTRY,,,,,,,,,,,...,Uruguay,Uzbekistan,Vanuatu,Venezuela,Vietnam,Virgin Islands,West Bank,Yemen,Zambia,Zimbabwe
GDP (BILLIONS),,,,,,,,,,,...,55.6,63.08,0.82,209.2,187.8,5.08,6.64,45.45,25.61,13.74


# Output

In [41]:
fig = go.Figure()

config = {'displayModeBar': False}

fig = go.Figure(data=go.Choropleth(
    locations = result['CODE'],
    z = result['Date'],
    text = result['COUNTRY'],
    colorscale = 'Blues',
    autocolorscale=False,
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_tickprefix = '',
    colorbar_title = 'Active<br>Cases',
))

fig.update_layout(
    title=title ,
    plot_bgcolor="#ffffff",
    legend_x=1,
    geo=dict(
        showframe=False,
        showcoastlines=False,
        #projection_type='equirectangular'
    ),
    dragmode= False,
    #width=800,
    height=600,

)
fig.show(config=config)

KeyError: 'CODE'