# Notebook to map Covid-19 cases
This notebook can be used to map the confirmed cases, deaths and recoveries that have been reported globally

In [12]:
import folium
import pandas as pd
from folium import plugins
from datetime import timedelta, date
import numpy as np
import urllib
import os

## We create a generator and fetch data from the open Covid-19 data on github
This fetches the underlying data

In [13]:
map_hooray = folium.Map(zoom_start=12) 

def daterange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

start_date = date(2020, 1, 22)
end_date = date.today()

DIR = "./csse_covid_19_daily_reports"
if not os.path.isdir(DIR):
        os.makedirs(DIR)
for single_date in daterange(start_date, end_date):
    urllib.request.urlretrieve(f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{single_date.strftime('%m-%d-%Y')}.csv", 
                      f"{DIR}/{single_date.strftime('%m-%d-%Y')}.csv")
    print(f"{single_date.strftime('%m-%d-%Y')}.csv")


01-22-2020.csv
01-23-2020.csv
01-24-2020.csv
01-25-2020.csv
01-26-2020.csv
01-27-2020.csv
01-28-2020.csv
01-29-2020.csv
01-30-2020.csv
01-31-2020.csv
02-01-2020.csv
02-02-2020.csv
02-03-2020.csv
02-04-2020.csv
02-05-2020.csv
02-06-2020.csv
02-07-2020.csv
02-08-2020.csv
02-09-2020.csv
02-10-2020.csv
02-11-2020.csv
02-12-2020.csv
02-13-2020.csv
02-14-2020.csv
02-15-2020.csv
02-16-2020.csv
02-17-2020.csv
02-18-2020.csv
02-19-2020.csv
02-20-2020.csv
02-21-2020.csv
02-22-2020.csv
02-23-2020.csv
02-24-2020.csv
02-25-2020.csv
02-26-2020.csv
02-27-2020.csv
02-28-2020.csv
02-29-2020.csv
03-01-2020.csv
03-02-2020.csv
03-03-2020.csv
03-04-2020.csv
03-05-2020.csv
03-06-2020.csv
03-07-2020.csv
03-08-2020.csv
03-09-2020.csv
03-10-2020.csv
03-11-2020.csv
03-12-2020.csv
03-13-2020.csv
03-14-2020.csv
03-15-2020.csv
03-16-2020.csv
03-17-2020.csv
03-18-2020.csv
03-19-2020.csv
03-20-2020.csv
03-21-2020.csv
03-22-2020.csv
03-23-2020.csv
03-24-2020.csv
03-25-2020.csv
03-26-2020.csv
03-27-2020.csv
03-28-2020

In [33]:
map_hooray = folium.Map(zoom_start=12) 

def daterange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

start_date = date(2020, 1, 22)
end_date = date.today()
df = pd.DataFrame()
for single_date in daterange(start_date, end_date):
    fp = f"{DIR}/{single_date.strftime('%m-%d-%Y')}.csv"
    df = df.append(pd.read_csv(fp).rename(columns={"Province_State":"Province/State","Country_Region":"Country/Region","Last_Update":"Last Update","Lat":"Latitude","Long_":"Longitude"}))


In [34]:
df.head

<bound method NDFrame.head of       Province/State  Country/Region          Last Update  Confirmed  Deaths  \
0              Anhui  Mainland China      1/22/2020 17:00        1.0     NaN   
1            Beijing  Mainland China      1/22/2020 17:00       14.0     NaN   
2          Chongqing  Mainland China      1/22/2020 17:00        6.0     NaN   
3             Fujian  Mainland China      1/22/2020 17:00        1.0     NaN   
4              Gansu  Mainland China      1/22/2020 17:00        NaN     NaN   
...              ...             ...                  ...        ...     ...   
3930             NaN        Zimbabwe  2020-08-04 04:41:59     4075.0    80.0   
3931         Unknown        Colombia  2020-07-17 22:34:48        0.0     0.0   
3932  Grand Princess          Canada  2020-07-13 12:34:33       13.0     0.0   
3933           Idaho              US  2020-07-10 02:34:22        1.0     0.0   
3934          Oregon              US  2020-07-06 19:33:59        0.0     0.0   

      Rec

## Map in the lon/lats that have come into the datasets
Some of the initial data points missed longitude and latitude, so we map that back based on the provice and country

In [18]:
# We need to map in the coordintes for some of the first dates
df["Province/State"] = df["Province/State"].fillna("N/A")
df2 = df[["Province/State","Country/Region","Latitude","Longitude"]].dropna(axis=0).groupby(["Province/State","Country/Region"]).first()

df_acc = df[["Province/State","Country/Region","Last Update","Confirmed","Deaths","Recovered"]].set_index(["Province/State","Country/Region"]).join(df2)


## We need to add an initial 0 so that we can fill in missing dates
All missing reports for each country will be filled with the previous value

In [19]:
# Add 0 initial value to all locations
df3=df2.copy()
df3["Last Update"]=start_date-timedelta(days=1)
df3["Confirmed"]=0
df3["Deaths"]=0
df3["Recovered"]=0
df_acc = df_acc.append(df3)

In [20]:
# enforce datetime column
df_acc["Last Update"] = pd.to_datetime(df_acc["Last Update"]).dt.date
df_acc = df_acc.reset_index()
df_acc["Location"] = df_acc["Province/State"]+", "+df_acc["Country/Region"]
df_acc = df_acc.groupby(["Location","Last Update"]).agg({"Latitude":np.max, "Longitude":np.max, "Confirmed":np.max,"Deaths":np.max,"Recovered":np.max})
df_acc = df_acc.sort_index()
new_index = pd.MultiIndex.from_product(df_acc.index.levels)
df_acc = df_acc.reindex(new_index,method="ffill").reset_index()

In [21]:
# Ensure you're handing the map floats
df_acc['Latitude'] = df_acc['Latitude'].astype(float)
df_acc['Longitude'] = df_acc['Longitude'].astype(float)

In [22]:
# Select only relevant columns
heat_df = df_acc[["Latitude", "Longitude", "Last Update","Confirmed","Deaths","Recovered"]].copy()

# Enforce float
heat_df["Confirmed"] = heat_df["Confirmed"].astype(float)
heat_df["Deaths"] = heat_df["Deaths"].astype(float)
heat_df["Recovered"] = heat_df["Recovered"].astype(float)

heat_df = heat_df.dropna(axis=0, subset=['Latitude','Longitude', 'Confirmed'])
heat_df = heat_df.sort_values("Last Update")
heat_df["Last Update"] = pd.to_datetime(heat_df["Last Update"]).dt.strftime('%Y-%m-%d')
heat_df = heat_df[heat_df["Confirmed"]>0]

heat_df["local_growth"] = heat_df.groupby(["Latitude", "Longitude"]).Confirmed.pct_change()
heat_df["local_growth"] = heat_df["local_growth"].clip(0).fillna(0)


In [23]:
# Create weight column, using date
heat_df['Confirmed'] = df_acc['Confirmed'].astype(float)
heat_df['Deaths'] = df_acc['Deaths'].astype(float)
heat_df['Recovered'] = df_acc['Recovered'].astype(float)

heat_df = heat_df.dropna(axis=0, subset=['Latitude','Longitude', 'Confirmed'])
heat_df = heat_df.sort_values("Last Update")
heat_df["Last Update"] = pd.to_datetime(heat_df["Last Update"]).dt.strftime('%Y-%m-%d')
heat_df = heat_df[heat_df["Confirmed"]>0]

heat_df["local_growth"] = heat_df.groupby(["Latitude", "Longitude"]).Confirmed.pct_change()
heat_df["local_growth"] = heat_df["local_growth"].clip(0).fillna(0)


In [24]:
# for visualizing it assymtotically - scaling so that china won't overshadow everything else
limit_value = heat_df["Confirmed"].max()
alpha = 1000

# List comprehension to make out list of lists
confirmed_data = [[[row['Latitude'],row['Longitude'],alpha*np.log(1+row["Confirmed"]/limit_value)/(1+alpha*np.log(1+row["Confirmed"]/limit_value))] for index, row in heat_df[heat_df['Last Update'] == i].iterrows()] for i in heat_df["Last Update"].unique()]

# Plot it on the map
hm = plugins.HeatMapWithTime(confirmed_data,index=list(heat_df["Last Update"].unique()),name="Confirmed cases",auto_play=True,max_opacity=0.8)
hm.add_to(map_hooray)

ctrl = folium.LayerControl()
ctrl.add_to(map_hooray)

<folium.map.LayerControl at 0x7f5d322caa20>

In [31]:

# Display the map
map_hooray