In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

data = pd.read_csv('https://raw.githubusercontent.com/ec-jrc/COVID-19/master/data-by-country/jrc-covid-19-all-days-by-country.csv')

In [None]:
data.head(10)

Unnamed: 0,Date,iso3,CountryName,lat,lon,CumulativePositive,CumulativeDeceased,CumulativeRecovered,CurrentlyPositive,Hospitalized,IntensiveCare,EUcountry,EUCPMcountry,NUTS
0,2020-01-03,AIA,Anguilla,18.225119,-63.07213,0,0.0,,0,,,False,False,AI
1,2020-01-03,DMA,Dominica,15.416749,-61.36413,0,0.0,,0,,,False,False,DM
2,2020-01-03,DOM,Dominican Republic,18.736509,-70.161744,0,0.0,,0,,,False,False,DO
3,2020-01-03,FLK,Falkland Islands (Malvinas),-51.800581,-59.528839,0,0.0,,0,,,False,False,FK
4,2020-01-03,IDN,Indonesia,-2.518722,118.015568,0,0.0,,0,,,False,False,ID
5,2020-01-03,RUS,Russian Fed.,61.523111,-74.9,0,0.0,,0,,,False,False,RU
6,2020-01-03,SVN,Slovenia,46.145528,14.974541,0,0.0,,0,,,True,True,SI
14,2020-01-04,SVN,Slovenia,46.145528,14.974541,0,0.0,,0,,,True,True,SI
13,2020-01-04,RUS,Russian Fed.,61.523111,-74.9,0,0.0,,0,,,False,False,RU
12,2020-01-04,IDN,Indonesia,-2.518722,118.015568,0,0.0,,0,,,False,False,ID


In [None]:
# See the data type of each column
data.dtypes

Date                    object
iso3                    object
CountryName             object
lat                    float64
lon                    float64
CumulativePositive       int64
CumulativeDeceased     float64
CumulativeRecovered    float64
CurrentlyPositive        int64
Hospitalized           float64
IntensiveCare          float64
EUcountry                 bool
EUCPMcountry              bool
NUTS                    object
dtype: object

In [None]:
# Create a line chart for all European countries
data = data.dropna(subset=["CurrentlyPositive"])

# Convert the "Date" column to datetime type for proper sorting
data["Date"] = pd.to_datetime(data["Date"])

# Sort the data by date
data.sort_values(by="Date", inplace=True)

# Extract the list of EU countries from the dataset
eu_countries = data[data["EUcountry"] == True]["CountryName"].unique()

fig = px.line(data_frame=data[data["CountryName"].isin(eu_countries) & data["EUcountry"]],
              x="Date", y="CurrentlyPositive", color="CountryName",
              labels={"CurrentlyPositive": "Currently Positive Cases", "Date": "Date"},
              title="COVID-19 Currently Positive Cases over Time in EU Countries")

fig.show()

In [None]:
# Create a Daily New Cases chart for France:
france = data.loc[(data['CountryName']=="France") & (data['CurrentlyPositive']>0),['CurrentlyPositive', 'Date']]
france['CurrentlyPositive'] = france['CurrentlyPositive'].diff()

fig1 = px.line(france,x='Date', y='CurrentlyPositive',
               labels = {'CurrentlyPositive': 'Currently Positive Cases'},
               title = 'Daily New Cases for France')
fig1.show()

In [None]:
# Spain data checkup
spain = data.loc[(data['CountryName']=="Spain") & (data['CurrentlyPositive']>0),['CurrentlyPositive', 'Date']]
spain['CurrentlyPositive'] = spain['CurrentlyPositive'].diff()

fig8 = px.line(spain,x='Date', y='CurrentlyPositive',
               labels = {'CurrentlyPositive': 'Currently Positive Cases'},
               title = 'Daily New Cases for Spain')
fig8.show()
# The plot shows that the data for Spain is erroneous and cannot be fixed

In [None]:
# Filter data for European countries
european_countries = data[data['EUcountry'] == True]

# Filter data for the end of 2020
end_of_2020 = european_countries[european_countries['Date'] == '2020-12-31']

# Create a choropleth map with country names labeled
fig2 = px.choropleth(end_of_2020,
                    locations='iso3',
                    color='CumulativePositive',
                    hover_name='CountryName',
                    projection='mercator',
                    title='COVID-19 Cases in European Countries at the End of 2020',
                    labels={'CumulativePositive': 'Cumulative Positive Cases', 'iso3': 'Country Code'},
                    color_continuous_scale='Viridis',
                    range_color=(0, end_of_2020['CumulativePositive'].max())
                   )

# Add country borders to the map
fig2.update_geos(showcountries=True, countrycolor="Black")

# Show country names directly on the map
fig2.update_geos(showcoastlines=False, showland=True, showocean=True, showlakes=False, showrivers=False, showsubunits=True, oceancolor="rgba(0, 0, 0, 0)")
fig2.show()


In [None]:
# Create a chart for the progression of fatality rate in European countries:

# Create fatality rate columns for each year
european_countries['FatalityRate'] = (european_countries['CumulativeDeceased'] / european_countries['CumulativePositive']) * 100

# Plot the data
fig3 = px.line(european_countries, x = 'Date', y = 'FatalityRate', color = 'CountryName',
               title = 'Progression of Fatality Rates for European Countries (2020-2022)',
               labels = {'Date': 'Year', 'FatalityRate': 'Fatality Rate (%)'}
              )

fig3.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
# Create ranking charts for EU countries in terms of Fatality Rate:

# Group by year and country, calculate average fatality rate
average_fatality_rates = european_countries.groupby([european_countries['Date'].dt.year, 'CountryName'])['FatalityRate'].mean()

# Convert the groupby result to a DataFrame
average_fatality_rates_df = average_fatality_rates.reset_index()

# Sort countries based on average fatality rate for each year
sorted_countries = average_fatality_rates_df.sort_values(by=['Date', 'FatalityRate'])

sort_2020 = sorted_countries[sorted_countries['Date'] == 2020]
sort_2021 = sorted_countries[sorted_countries['Date'] == 2021]
sort_2022 = sorted_countries[sorted_countries['Date'] == 2022]

fig4 = px.bar(sort_2020, x = 'CountryName', y = 'FatalityRate',
              title = 'Top EU countries by lowest Fatality Rate in 2020',
              labels ={'CountryName': 'Country', 'FatalityRate': 'Fatality Rate (%)'})
fig4.show()

In [None]:
fig5 = px.bar(sort_2021, x = 'CountryName', y = 'FatalityRate',
              title = 'Top EU countries by lowest Fatality Rate in 2021',
              labels ={'CountryName': 'Country', 'FatalityRate': 'Fatality Rate (%)'})
fig5.show()

In [None]:
fig6 = px.bar(sort_2022, x = 'CountryName', y = 'FatalityRate',
              title = 'Top EU countries by lowest Fatality Rate in 2022',
              labels ={'CountryName': 'Country', 'FatalityRate': 'Fatality Rate (%)'})
fig6.show()