In [1]:
import pandas as pd
from datetime import date
from glob import glob
import seaborn as sns
import plotly.offline as pyo
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px

# with this renderer, the plots are also shown in the html file.
pio.renderers.default = "iframe_connected"

In [2]:
today = date.today()
dates = [date.strftime('%m-%d-%Y') for date in pd.date_range(start='2020-01-22', end=today)][:-1]
dates_as_date = pd.date_range(start='2020-01-22', end=today)[:-1]


# standardize column names for all entries
def rename_columns(column):
    column_map = {
        'Lat': 'Latitude',
        'Long_': 'Longitude',
        'Incidence_Rate': 'Incident_Rate'
    }
    if column in column_map:
        return column_map[column]
    return column.replace('/', '_').replace('-', '_').replace(' ', '_')

all_cases = []
for d in range(len(dates)):
    path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv".format(dates[d])
    data = pd.read_csv(path)
    data.rename(columns=rename_columns, inplace=True)
    data['Date'] = dates_as_date[d]
    all_cases.append(data)

df = pd.concat(all_cases)

In [3]:
# all_cases.Country_Region.unique()

# standardize country names for all entries
country_mapping = {
    'MS Zaandam|Diamond Princess|Cruise Ship': 'Others', # move cruise ships to others
    'Hong Kong.+': 'Hong Kong',
    'Iran.+': 'Iran',
    '.*Congo.*': 'Congo',
    'Mainland China': 'China',
    '.*Bahamas.*': 'The Bahamas',
    '.*Gambia.*': 'The Gambia',
    'Viet Nam': 'Vietnam',
    'Taiwan\*': 'Taiwan',
    'Cote d\'Ivoire': 'Ivory Coast',
    'Cabo Verde': 'Cape Verde',
    'Russian Federation': 'Russia',
    ' Azerbaijan': 'Azerbaijan',
    'Holy See': 'Vatican City',
    'Republic of Ireland': 'Ireland',
    'Republic of Moldova': 'Moldova',
    'Czechia': 'Czech Republic',
    'Republic of Korea|Korea, South': 'South Korea',
    'Timor-Leste': 'East Timor',
    'Macao SAR|Macau': 'Macao',
    'UK': 'United Kingdom',
    'Jersey|Guernsey': 'Channel Islands',
    'Dominican Republicn Republic|Dominica': 'Dominican Republic'
}

df['Country_Region'].replace(
    to_replace=country_mapping.keys(),
    value=country_mapping.values(),
    regex=True,
    inplace=True
)

In [8]:
df.to_csv('worldwide.csv')

In [2]:
df = pd.read_csv('worldwide.csv')


Columns (15) have mixed types.Specify dtype option on import or set low_memory=False.



In [3]:
# group data by country
daily_updates = df.groupby(['Country_Region', 'Date']).agg(
     Confirmed = ('Confirmed','sum'),
     Deaths = ('Deaths','sum'),
 ).reset_index()

In [4]:
# get changes in data
updates_per_country = daily_updates.groupby('Country_Region')
# diff takes difference to point in group before it
daily_updates['New_Confirmed'] = updates_per_country['Confirmed'].diff().fillna(0)
daily_updates['New_Deaths'] = updates_per_country['Deaths'].diff().fillna(0)

In [5]:
# reorder columns
worldwide_pretty = daily_updates.loc[:, ['Date', 'Country_Region', 'New_Confirmed', 'Confirmed', 'New_Deaths', 'Deaths']]

In [6]:
worldwide_pretty.head(3)

Unnamed: 0,Date,Country_Region,New_Confirmed,Confirmed,New_Deaths,Deaths
0,2020-02-24,Afghanistan,0.0,1.0,0.0,0.0
1,2020-02-25,Afghanistan,0.0,1.0,0.0,0.0
2,2020-02-26,Afghanistan,0.0,1.0,0.0,0.0


In [7]:
# create df with one column per date
worldwide_all = worldwide_pretty.groupby('Date').agg(
    Confirmed = ('Confirmed', 'sum'),
    New_Confirmed = ('New_Confirmed', 'sum')
)
# create a moving average
worldwide_all['SMA_14'] = round(worldwide_all.loc[:,'New_Confirmed'].rolling(window=14).mean())
worldwide_all = worldwide_all.reset_index()

In [8]:
worldwide_all.head(3)

Unnamed: 0,Date,Confirmed,New_Confirmed,SMA_14
0,2020-01-22,557.0,0.0,
1,2020-01-23,1097.0,537.0,
2,2020-01-24,941.0,-158.0,


In [23]:
plot = go.Figure()

plot.add_trace(
    go.Bar(
        name='Daily Meas',
        x=worldwide_all["Date"], 
        y=worldwide_all["New_Confirmed"],
        marker={
            # size': 10,
            'color': 'red',
            'opacity': 1
        },
    )
)

plot.add_trace(
    go.Scatter(
        name='14 Day MA',
        x=worldwide_all["Date"], 
        y=worldwide_all["SMA_14"],
        mode='lines',
        marker={
            'size': 1,
            'color': 'black',
            'opacity': 0.7
        },
        line_width=1.5
    )
)


plot.update_layout(
    title_x=0,
    title='New Corona Cases Worldwide',
    margin={
        'l': 25,
        'b': 25,
        't': 50,
        'r': 20
    },
    height=400,
    yaxis={
        'color': '#748B99',
        'showgrid': True,
        'showline': True,
        'gridwidth': 1,
        'gridcolor': '#B6C3CC',
        # 'rangemode': "tozero",
        'range':
            [0,
             worldwide_all.New_Confirmed.max() * 1.05],
        # 'ticksuffix': ' '
        # 'tickprefix': 'id: '
    },
    xaxis={
        'showgrid': False,
        'color': '#748B99',
        'showline': True,
        'linecolor': '#B6C3CC',
    },
    paper_bgcolor='#FFFFFF',
    plot_bgcolor='rgba(0,0,0,0)',
    showlegend=False,
    legend={
        'yanchor': 'top',
        'y': 0.99,
        'xanchor': 'right',
        'x': 0.99
    })

plot.show()

In [99]:
path = "https://raw.githubusercontent.com/openZH/covid_19/master/COVID19_Fallzahlen_CH_total_v2.csv"
chData = pd.read_csv(path)

In [100]:
chGrouped = chData.groupby('abbreviation_canton_and_fl')

In [101]:
chData['ncumul_conf'] = chGrouped['ncumul_conf'].apply(lambda x: x.ffill())
chData['ncumul_deceased'] = chGrouped['ncumul_deceased'].apply(lambda x: x.ffill())
chData['ncumul_conf'] = chData['ncumul_conf'].fillna(0)
chData['ncumul_deceased'] = chData['ncumul_deceased'].fillna(0)

In [102]:
chData = chData[chData.date >= '2020-05-31']

In [112]:
chGrouped = chData.groupby('abbreviation_canton_and_fl')['ncumul_conf']
chData = chData.assign(min_conf=chGrouped.transform(min))
chData['total_cases'] = chData['ncumul_conf'] - chData['min_conf']

chGrouped = chData.groupby('abbreviation_canton_and_fl')['ncumul_deceased']
chData = chData.assign(min_deceased=chGrouped.transform(min))
chData['total_deaths'] = chData['ncumul_deceased'] - chData['min_deceased']

chData = chData[chData.date >= '2020-06-01']

chData = chData[['date','abbreviation_canton_and_fl','total_cases','total_deaths']]

In [113]:
chData

Unnamed: 0,date,abbreviation_canton_and_fl,total_cases,total_deaths
2359,2020-06-01,BL,0.0,0.0
2360,2020-06-01,FR,0.0,0.0
2361,2020-06-01,GR,0.0,0.0
2362,2020-06-01,JU,0.0,0.0
2363,2020-06-01,NE,0.0,0.0
...,...,...,...,...
11803,2021-06-14,SZ,11896.0,213.0
11804,2021-06-14,TG,18498.0,434.0
11805,2021-06-14,BS,11945.0,155.0
11806,2021-06-14,AI,1199.0,15.0
