In [1]:
covid_url = "https://opendata.ecdc.europa.eu/covid19/casedistribution/json/"
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import json
import urllib
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [2]:
covid_json_unformated = urllib.request.urlopen(covid_url).read().decode("utf-8")
covid_json = json.loads(covid_json_unformated)
cdf = pd.DataFrame(covid_json['records'])

In [50]:
for country, grp in cdf.groupby("countriesAndTerritories"):
    if country == "Holy_See":
        print(grp["delta14d-incidence"])

4387       0.00
4388       0.00
4389       0.00
4390       0.00
4391       0.00
4392       0.00
4393       0.00
4394       0.00
4395       0.00
4396       0.00
4397       0.00
4398       0.00
4399       0.00
4400       0.00
4401       0.00
4402    1717.79
4403       0.00
4404   -1717.79
4405       0.00
4406       0.00
4407       0.00
4408       0.00
4409       0.00
4410       0.00
4411       0.00
4412       0.00
4413       0.00
4414       0.00
4415       0.00
4416       0.00
4417       0.00
4418       0.00
4419       0.00
4420       0.00
4421       0.00
4422       0.00
4423       0.00
4424       0.00
4425     122.70
4426     245.40
4427       0.00
4428    -245.40
4429       0.00
4430     122.70
4431     490.80
4432    -122.70
4433    -490.80
4434    -122.70
Name: delta14d-incidence, dtype: float64


Rename columns to something more Pythonian. If you think they look already great, then at least rename notification_rate_per_100000_population_14-days to 14d-incidence

Identify which columns have not been casted to an appropriate type during loading!

We did not cover datetime objects in pandas, however they are quite powerful!

Try:

In [3]:
cdf.rename(
    columns={"notification_rate_per_100000_population_14-days": "14d-incidence"},
    inplace=True
)

In [4]:
cdf['date_reported'] = pd.to_datetime(cdf['dateRep'], format='%d/%m/%Y', errors='raise')
cdf.drop(columns = "dateRep", inplace = True)

Now you can treat the column as a datetime objects using df[col].dt , e.g. https://docs.python.org/3/library/datetime.html#datetime.date.year

In [20]:
cdf['date_reported'].dt.day.head()

0     8
1     1
2    25
3    18
4    11
Name: date_reported, dtype: int64

Create a new column deltaTime_since_start_of_recording

Create histograms for different columns or describe the df. Can you spot the inconsistency in the data? Fix it! :)

Identify those countries (grouped by continent) which showed the most drastic increase most drastic and decrease of the 14d-incidence within the different years since recording. Visualize intuitively!

Which country showed the highest/lowest fluctuation in 14d-incidence within a year?

In [5]:
cdf["deltaTime_since_start_of_recording"] =cdf["date_reported"] - cdf["date_reported"].min()

In [51]:
for variable in list(cdf.describe().columns):
    fig = px.histogram(cdf, x=variable)
    fig.show()

In [53]:
cdf[cdf["deaths_weekly"]<0] = 0
cdf[cdf["cases_weekly"]<0] = 0
cdf["14d-incidence"] = cdf["14d-incidence"].fillna(0)
cdf["14d-incidence"] = cdf["14d-incidence"].replace("", 0)
cdf["14d-incidence"] = cdf["14d-incidence"].astype(float)
cdf[cdf["14d-incidence"]<0] = 0
cdf = cdf[cdf["popData2019"]!=0]
cdf = cdf[cdf["countriesAndTerritories"]!="Holy_See"]
cdf = cdf[cdf["countriesAndTerritories"]!="Gibraltar"]


In [58]:
inc = []
dec = []

for cont, cont_grp in cdf.groupby("continentExp"):
    inc_country = []
    dec_country = []
    
    for country, count_grp in cont_grp.groupby("countriesAndTerritories"):
        delta14d = count_grp["14d-incidence"].diff().fillna(0)
        inc_country.append((max(delta14d), country))
        dec_country.append((abs(min(delta14d)), country))

        
    inc.append(max(inc_country))
    dec.append(max(dec_country))
print([y[1] for y in dec])

fig = go.Figure()
fig.add_trace(go.Bar(
    x=list(cdf.groupby("continentExp").groups.keys()),
    y=[y[0] for y in inc],
    name='Highest Increase',
    text = [y[1] for y in inc],
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x=list(cdf.groupby("continentExp").groups.keys()),
    y=[y[0] for y in dec],
    name='Highest Decrease',
    text = [y[1] for y in dec],
    marker_color='lightblue'
))

fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide',barmode='group', title = "Change in 14 day incidence", yaxis_title = "delta 14d-incidence")
fig.show()

['Seychelles', 'Aruba', 'Israel', 'Ireland', 'French_Polynesia', 'Cases_on_an_international_conveyance_Japan']


Create a line plot showing the 14-incidence for all European countries. Use groupby operation to generate the data list for the plotly plot.

Create a smoothed version of the 14d-incidence by averaging 3 months.

Create a radial plot of death rate / 100000 people (see popData2019), where one year completes a circle, i.e. 360˚. Visualize the recored years for Italy, Germany, Sweden and Greece. Hint you might need to turn the dateTime into day within the year (%j) and adjust 365 to 360 degrees.

Optional: Find "regular" mortality rates for those countries and visualize it in the plot as well.

In [26]:
df = cdf[["continentExp", "countriesAndTerritories", "14d-incidence"]].groupby("continentExp")

fig = px.line(cdf.loc[df.groups["Europe"]].sort_values("date_reported"), x="date_reported", y="14d-incidence", color='countriesAndTerritories')
fig.show()

In [27]:
df = cdf[["continentExp", "countriesAndTerritories", "14d-incidence"]].groupby("continentExp")

df_toplot = cdf.loc[df.groups["Europe"]].sort_values("date_reported")
df_toplot["14d-incidence"] = df_toplot["14d-incidence"].rolling(window = 12).mean()

fig = px.line(df_toplot, x="date_reported", y="14d-incidence", color='countriesAndTerritories')
fig.show()

In [53]:
import plotly.express as px

cdf["death_rate"] = cdf["cases_weekly"]*100000/cdf["popData2019"]
cdf["yearprog"] = cdf["deltaTime_since_start_of_recording"]/365

fig = px.line_polar(cdf.query('countriesAndTerritories == "Italy" or countriesAndTerritories == "Germany" or countriesAndTerritories == "Sweden" or countriesAndTerritories == "Greece"'), r="death_rate", theta = "yearprog", color = "countriesAndTerritories",
                    title = "Death Rate of COVID")
fig.show()

Create a radial plot of death rate / 100000 people (see popData2019), where one year completes a circle, i.e. 360˚. Visualize the recored years for Italy, Germany, Sweden and Greece. Hint you might need to turn the dateTime into day within the year (%j) and adjust 365 to 360 degrees.