In [181]:
covid_url = "https://opendata.ecdc.europa.eu/covid19/casedistribution/json/"
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import json
import urllib
import pandas as pd
import numpy as np
import plotly.express as px

In [182]:
covid_json_unformated = urllib.request.urlopen(covid_url).read().decode("utf-8")
covid_json = json.loads(covid_json_unformated)
cdf = pd.DataFrame(covid_json['records'])

In [86]:
cdf.head(10)

Unnamed: 0,year_week,cases_weekly,deaths_weekly,countriesAndTerritories,geoId,countryterritoryCode,popData2019,continentExp,14d-incidence,date_reported,deltaTime_since_start_of_recording,delta14d-incidence
0,2021-05,238,8,Afghanistan,AF,AFG,38041757.0,Asia,1.33,2021-02-08 00:00:00,399 days 00:00:00,
1,2021-04,267,16,Afghanistan,AF,AFG,38041757.0,Asia,2.58,2021-02-01 00:00:00,392 days 00:00:00,1.25
2,2021-03,713,43,Afghanistan,AF,AFG,38041757.0,Asia,3.34,2021-01-25 00:00:00,385 days 00:00:00,0.76
3,2021-02,557,45,Afghanistan,AF,AFG,38041757.0,Asia,3.24,2021-01-18 00:00:00,378 days 00:00:00,-0.1
4,2021-01,675,71,Afghanistan,AF,AFG,38041757.0,Asia,4.15,2021-01-11 00:00:00,371 days 00:00:00,0.91
5,2020-53,902,60,Afghanistan,AF,AFG,38041757.0,Asia,7.61,2021-01-04 00:00:00,364 days 00:00:00,3.46
6,2020-52,1994,88,Afghanistan,AF,AFG,38041757.0,Asia,7.19,2020-12-28 00:00:00,357 days 00:00:00,-0.42
7,2020-51,740,111,Afghanistan,AF,AFG,38041757.0,Asia,6.56,2020-12-21 00:00:00,350 days 00:00:00,-0.63
8,2020-50,1757,71,Afghanistan,AF,AFG,38041757.0,Asia,9.01,2020-12-14 00:00:00,343 days 00:00:00,2.45
9,2020-49,1672,137,Afghanistan,AF,AFG,38041757.0,Asia,7.22,2020-12-07 00:00:00,336 days 00:00:00,-1.79


Rename columns to something more Pythonian. If you think they look already great, then at least rename notification_rate_per_100000_population_14-days to 14d-incidence

Identify which columns have not been casted to an appropriate type during loading!

We did not cover datetime objects in pandas, however they are quite powerful!

Try:

In [183]:
cdf.rename(
    columns={"notification_rate_per_100000_population_14-days": "14d-incidence"},
    inplace=True
)

In [184]:
cdf['date_reported'] = pd.to_datetime(cdf['dateRep'], format='%d/%m/%Y', errors='raise')
cdf.drop(columns = "dateRep", inplace = True)

Now you can treat the column as a datetime objects using df[col].dt , e.g. https://docs.python.org/3/library/datetime.html#datetime.date.year

In [185]:
cdf['date_reported'].dt.day.head()

0     8
1     1
2    25
3    18
4    11
Name: date_reported, dtype: int64

Create a new column deltaTime_since_start_of_recording

Create histograms for different columns or describe the df. Can you spot the inconsistency in the data? Fix it! :)

Identify those countries (grouped by continent) which showed the most drastic increase most drastic and decrease of the 14d-incidence within the different years since recording. Visualize intuitively!

Which country showed the highest/lowest fluctuation in 14d-incidence within a year?

In [186]:
cdf["deltaTime_since_start_of_recording"] =cdf["date_reported"] - cdf["date_reported"].min()

In [59]:
for variable in list(cdf.describe().columns):
    fig = px.histogram(cdf, x=variable)
    fig.show()

In [187]:
cdf[cdf["deaths_weekly"]<0] = 0
cdf[cdf["cases_weekly"]<0] = 0
cdf["14d-incidence"] = cdf["14d-incidence"].fillna(0)
cdf["14d-incidence"] = cdf["14d-incidence"].replace("", 0)
cdf["14d-incidence"] = cdf["14d-incidence"].astype(float)
cdf[cdf["14d-incidence"]<0] = 0
cdf = cdf[cdf["popData2019"]!=0]



In [188]:
cdf["delta14d-incidence"] = cdf["14d-incidence"].diff()

In [87]:
cdf.pivot_table(
    index=[
        "continentExp",
        "countriesAndTerritories", 

    ], 
    aggfunc={
        'delta14d-incidence' : [np.max, np.min]
    }
)

Unnamed: 0_level_0,Unnamed: 1_level_0,delta14d-incidence,delta14d-incidence
Unnamed: 0_level_1,Unnamed: 1_level_1,amax,amin
continentExp,countriesAndTerritories,Unnamed: 2_level_2,Unnamed: 3_level_2
Africa,Algeria,8.15,-8.13
Africa,Angola,2.33,-2.17
Africa,Benin,4.66,-2.19
Africa,Botswana,207.54,-43.58
Africa,Burkina_Faso,5.82,-3.81
...,...,...,...
Oceania,Papua_New_Guinea,0.58,-0.70
Oceania,Solomon_Islands,0.74,-0.60
Oceania,Vanuatu,0.33,-0.33
Oceania,Wallis_and_Futuna,0.00,0.00


Create a line plot showing the 14-incidence for all European countries. Use groupby operation to generate the data list for the plotly plot.

Create a smoothed version of the 14d-incidence by averaging 3 months.

Create a radial plot of death rate / 100000 people (see popData2019), where one year completes a circle, i.e. 360˚. Visualize the recored years for Italy, Germany, Sweden and Greece. Hint you might need to turn the dateTime into day within the year (%j) and adjust 365 to 360 degrees.

Optional: Find "regular" mortality rates for those countries and visualize it in the plot as well.

In [134]:
df = cdf[["continentExp", "countriesAndTerritories", "14d-incidence"]].groupby("continentExp")

fig = px.line(cdf.loc[df.groups["Europe"]].sort_values("date_reported"), x="date_reported", y="14d-incidence", color='countriesAndTerritories')
fig.show()

In [152]:
df = cdf[["continentExp", "countriesAndTerritories", "14d-incidence"]].groupby("continentExp")

df_toplot = cdf.loc[df.groups["Europe"]].sort_values("date_reported")
df_toplot["14d-incidence"] = df_toplot["14d-incidence"].rolling(window = 12).mean()

fig = px.line(df_toplot, x="date_reported", y="14d-incidence", color='countriesAndTerritories')
+üpo fdy<fig.show()

In [201]:
import plotly.express as px

cdf["death_rate"] = cdf["cases_weekly"]/(cdf["popData2019"]/100000)
cdf["yearprog"] = cdf["deltaTime_since_start_of_recording"]/365

fig = px.line_polar(cdf.query('countriesAndTerritories == "Italy" or countriesAndTerritories == "Germany" or countriesAndTerritories == "Sweden" or countriesAndTerritories == "Greece"'), r="death_rate", color = "countriesAndTerritories", theta="yearprog", line_close=True,
                    color_discrete_sequence=px.colors.sequential.Plasma_r,
                    template="plotly_dark",)
fig.show()

Create a radial plot of death rate / 100000 people (see popData2019), where one year completes a circle, i.e. 360˚. Visualize the recored years for Italy, Germany, Sweden and Greece. Hint you might need to turn the dateTime into day within the year (%j) and adjust 365 to 360 degrees.