# Covid and Influenza - sick and death cases comparison

**Sources:**
- https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data
- https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset
- https://flunewseurope.org/VirusCharacteristics



**Terms of Use:**
1.	This data set is licensed under the Creative Commons Attribution 4.0 International (CC BY 4.0) by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. Copyright Johns Hopkins University 2020.
2.	Attribute the data as the "COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University" or "JHU CSSE COVID-19 Data" for short, and the url: https://github.com/CSSEGISandData/COVID-19.
3.	For publications that use the data, please cite the following publication: "Dong E, Du H, Gardner L. An interactive web-based dashboard to track COVID-19 in real time. Lancet Inf Dis. 20(5):533-534. doi: 10.1016/S1473-3099(20)30120-1"

### Notes:

**Influenza:**
- Add a description - where I found flu reports
- “Influenza virus detections by type in Europe.xlsx”

**Covid:** Use csv files from csse_covid_19_time_series folder
- refresh this report: (https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data) and add README - description

In [2]:
import pandas as pd
import os
from pathlib import Path


from bs4 import BeautifulSoup
import requests
import re 
import io
import numpy as np

In [3]:
BASE_DIR = os.path.dirname(os.path.abspath("__file__"))

In [4]:
directory_flue = os.path.join(BASE_DIR, 'influenza')

# 1. Influenza

----------
Influenza virus detections by type in Europe:

In [None]:
ws = 'Influenza virus detections by type in Europe.xlsx'
flu_detected = pd.read_excel(os.path.join(directory_flue, ws).replace('\\','/'))

In [None]:
flu_detected.head()

In [None]:
flu_detected['Region'].unique()

In [None]:
flu_detected[(flu_detected.Country=='Poland')&(flu_detected.Week=='2015-W40')]

In [None]:
len(flu_detected[(flu_detected.Country=='Poland')&(flu_detected.Region=='EU/EEA')])

In [None]:
len(flu_detected[(flu_detected.Country=='Poland')&(flu_detected.Region=='WHO Europe')])

In [None]:
flu_detected[(flu_detected.Country=='Poland')&(flu_detected.Region=='WHO Europe')].iloc[:,5:].values.sum()

In [None]:
flu_detected[(flu_detected.Country=='Poland')&(flu_detected.Region=='EU/EEA')].iloc[:,5:].values.sum()

In [None]:
len(flu_detected[flu_detected.Region=='EU/EEA'])

In [None]:
print(f"In EEA we have {len(flu_detected[flu_detected.Region=='EU/EEA'])} records while in WHO Europe we have"
      f" {len(flu_detected[flu_detected.Region=='WHO Europe'])} records")

In [None]:
flu_detected = flu_detected[flu_detected.Region == 'WHO Europe']

In [None]:
flu_detected.info()

## First conclusion:
- Both tables looks similar
- Change coumn name: YearWeek - Week
- What is "Surveillance System Type" and do we need it?
- Do we need "Season" and "Region" columns? If not then remove them
- Create total cases for flu
- Unpivot the table
- Split YearWeek column into 2 separate

In [None]:
flu_detected.rename(columns={'Week':'YearWeek'},inplace=True)

In [None]:
flu_detected.columns

In [None]:
flu_detected['Surveillance System Type'].unique()

In [None]:
flu_detected['Season'].unique()

In [None]:
flu_detected['YearWeek'].unique()[:5]

So, I do not need columns "Surveillance System Type" (_one unique value: 'Non-sentinel'_), "Season" (_Week is more precise_) and "Region" (_only 'WHO Europe'_)

-----------
Now I will create 2 functions:
1. Calculate total influenza cases
2. Unpivot a table to keep it simple

In [None]:
# this column is just in one table. So I did not have it in the above function
flu_detected = flu_detected.drop(['Season','Region','Surveillance System Type'],axis=1) 

In [None]:
flu_detected['Total Detected Cases'] = flu_detected.iloc[:,4:].sum(axis=1).values
flu_detected.head()

In [None]:
flu_detected[(flu_detected['Total Detected Cases']>0)&(flu_detected.YearWeek=='2021-W01')]

In [None]:
# Unpivot table 
def FluPivot(df,colname):
    x = pd.melt(df, id_vars=['Country', 'YearWeek'], var_name='Flu Type', value_name='Cases').sort_values(['YearWeek','Country'])
    x[['Year', 'Week']] = x['YearWeek'].str.split('-', n=1, expand=True)
    x = x.drop(["YearWeek"],axis=1)
    x.rename(columns={'Cases':colname},inplace=True)
    return x

In [None]:
flu_detected2 = FluPivot(flu_detected,'Detected_Cases')
flu_detected2.head()

In [None]:
flu_detected2['Flu Type'].unique()

In [None]:
flu_detected2.info()

In [None]:
# Rename Flu Type names
original_type_names = ['A not subtyped', 'A(H1)pdm09', 'A(H3)',
       'B lineage not determined', 'B/Vic', 'B/Yam']

new_type_names = ['A', 'A(H1)', 'A(H3)','B', 'B/Vic', 'B/Yam']

for o, n in zip(original_type_names,new_type_names):
    flu_detected2.loc[(flu_detected2['Flu Type'] == o),'Flu Type']=n

In [None]:
flu_detected2.head()

In [None]:
import matplotlib.pyplot as plt

In [None]:
for year in (flu_detected2.Year.unique()):
    print(year)

In [None]:
for year in (flu_detected2.Year.unique()):
    fd2 = flu_detected2[(flu_detected2.Year==year)&(flu_detected2['Flu Type']!='Total Detected Cases')].groupby(['Flu Type'])['Detected_Cases'].sum().sort_values().reset_index()

    fig, ax = plt.subplots()
    types = fd2['Flu Type']
    cases = fd2.Detected_Cases
    result = ax.bar(types,cases)

    ax.set_ylabel('Total Cases')
    ax.set_xlabel('Flu Types')
    ax.set_title(f'Detected Influenza Cases per type - {year}')

    plt.show()

In [None]:
# Amount od total cases
flu_detected2[(flu_detected2['Flu Type']=='Total Detected Cases')].sort_values(['Year']).groupby(['Flu Type','Year'])\
['Detected_Cases'].sum().reset_index()

In [None]:
# Amount of records
flu_detected2[(flu_detected2['Flu Type']=='Total Detected Cases')].groupby('Year')['Flu Type'].count()

In [None]:
years = flu_detected2.Year.unique()
for year in years:
    print(f'In {year} we have {len(flu_detected2[flu_detected2.Year==year].Week.unique())} weeks')

In [None]:
# remove unnecesary weeks
# flu_detected2 = flu_detected2[flu_detected2.Year!='2015']
years_list = ['2016','2017','2018','2019','2020', '2021'] # we keep 2021 as covid data are available for this year
flu_detected2 = flu_detected2[flu_detected2['Year'].isin(years_list)]

In [None]:
fd2 = flu_detected2[(flu_detected2['Flu Type']=='Total Detected Cases')].sort_values(['Year'])\
.groupby(['Flu Type','Year'])['Detected_Cases'].sum().reset_index()

xs = fd2['Year']
ys = fd2.Detected_Cases.values

plt.rcParams["figure.figsize"] = (15,10)
plt.plot(xs,ys,'bo-')

for x,y in zip(xs,ys):

    #label = "{:.0f}".format(y)
    label = f'{y:,}'
    plt.annotate(label, # this is the text
                 (x,y), # this is the point to label
                 textcoords="offset points", # how to position the text
                 xytext=(0,10), # distance from text to points (x,y)
                 ha='center') # horizontal alignment can be left, right or center
plt.grid(color = 'green', linestyle = '--', linewidth = 0.5)

plt.show()

In [None]:
flu_detected2.shape

In [None]:
flu_detected2.head()

In [None]:
df_flu = flu_detected2.groupby(['Country','Year','Week'])['Detected_Cases'].sum().reset_index()

In [None]:
df_flu.info()

In [None]:
df_flu.sort_values(['Year','Week']).head(10)

-----
## Import Covid tables down below

## [Daily reports (csse_covid_19_daily_reports)](https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports)

This folder contains daily case reports. All timestamps are in UTC (GMT+0).

### File naming convention
MM-DD-YYYY.csv in UTC.

### Field description
* <b>FIPS</b>: US only. Federal Information Processing Standards code that uniquely identifies counties within the USA.
* <b>Admin2</b>: County name. US only.
* <b>Province_State</b>: Province, state or dependency name.
* <b>Country_Region</b>: Country, region or sovereignty name. The names of locations included on the Website correspond with the official designations used by the U.S. Department of State.
* <b>Last Update</b>: MM/DD/YYYY HH:mm:ss  (24 hour format, in UTC).
* <b>Lat</b> and <b>Long_</b>: Dot locations on the dashboard. All points (except for Australia) shown on the map are based on geographic centroids, and are not representative of a specific address, building or any location at a spatial scale finer than a province/state. Australian dots are located at the centroid of the largest city in each state.
* <b>Confirmed</b>: Counts include confirmed and probable (where reported).
* <b>Deaths</b>: Counts include confirmed and probable (where reported).
* <b>Recovered</b>: Recovered cases are estimates based on local media reports, and state and local reporting when available, and therefore may be substantially lower than the true number. US state-level recovered cases are from [COVID Tracking Project](https://covidtracking.com/).
* <b>Active:</b> Active cases = total cases - total recovered - total deaths.
* <b>Incident_Rate</b>: Incidence Rate = cases per 100,000 persons.
* <b>Case_Fatality_Ratio (%)</b>: Case-Fatality Ratio (%) = Number recorded deaths / Number cases.
* All cases, deaths, and recoveries reported are based on the date of initial report. Exceptions to this are noted in the "Data Modification" and "Retrospective reporting of (probable) cases and deaths" subsections below.  


What do we need:

* <b>Country_Region</b>: Country, region or sovereignty name. The names of locations included on the Website correspond with the official designations used by the U.S. Department of State.
* <b>Last Update</b>: MM/DD/YYYY HH:mm:ss  (24 hour format, in UTC).
* <b>Lat</b> and <b>Long_</b>: Dot locations on the dashboard. All points (except for Australia) shown on the map are based on geographic centroids, and are not representative of a specific address, building or any location at a spatial scale finer than a province/state. Australian dots are located at the centroid of the largest city in each state.
* <b>Confirmed</b>: Counts include confirmed and probable (where reported).
* <b>Deaths</b>: Counts include confirmed and probable (where reported).
* <b>Recovered</b>: Recovered cases are estimates based on local media reports, and state and local reporting when available, and therefore may be substantially lower than the true number. US state-level recovered cases are from [COVID Tracking Project](https://covidtracking.com/).
* <b>Active:</b> Active cases = total cases - total recovered - total deaths.
* <b>Incident_Rate</b>: Incidence Rate = cases per 100,000 persons.
* <b>Case_Fatality_Ratio (%)</b>: Case-Fatality Ratio (%) = Number recorded deaths / Number cases.
* All cases, deaths, and recoveries reported are based on the date of initial report. Exceptions to this are noted in the "Data Modification" and "Retrospective reporting of (probable) cases and deaths" subsections below.  


In [5]:
url = "https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports"
r = requests.get(url)

In [6]:
html_doc = r.text
soup = BeautifulSoup(html_doc)
a_tags = soup.find_all('a')

# Store a list of urls ending in .csv: urls => list
urls = ['https://raw.githubusercontent.com'+re.sub('/blob', '', link.get('href'))
        for link in a_tags  if '.csv' in link.get('href')]

# Store a list of Data Frame names to be assigned to the list: df_list_names => list
df_list_names = [url.split('.csv')[0].split('/')[url.count('/')] for url in urls]

In [7]:
urls[:5]

['https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-01-2021.csv',
 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-02-2021.csv',
 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-03-2021.csv',
 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-04-2021.csv',
 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-05-2021.csv']

In [8]:
df_list_names[:5]

['01-01-2021', '01-02-2021', '01-03-2021', '01-04-2021', '01-05-2021']

In [9]:
comments = pd.DataFrame(columns=['File_Name','Added','Not_Added'])
comments.head()

Unnamed: 0,File_Name,Added,Not_Added


In [10]:
len(urls)

489

In [11]:
cols = ['Country_Region','Last_Update','Lat','Long_','Confirmed','Deaths','Recovered','File_Name']

covid_table = pd.DataFrame(columns = cols)
covid_table.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,File_Name


In [12]:
def Add_Comment(url_name, is_ok, is_not_ok):
    data = [[url_name,is_ok,is_not_ok]]
    comment_note = pd.DataFrame(data, columns = ['File_Name','Added','Not_Added'])
    return comment_note

In [13]:
# some tables have a bit different column names, if so then I keep only the below names
col_names1 = ['Country_Region','Last_Update','Lat','Long_','Confirmed','Deaths','Recovered']
col_names2 = ['Country/Region','Last Update','Latitude','Longitude','Confirmed','Deaths','Recovered']
col_names3 = ['Country/Region','Last Update','Confirmed','Deaths','Recovered']

In [14]:
url

'https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports'

In [15]:
for count, url in enumerate(urls):
    download = requests.get(url).content
    # Reading the downloaded content and turning it into a pandas dataframe
    df = pd.read_csv(io.StringIO(download.decode('utf-8')))
    if (df.shape[1] == 14) | (df.shape[1] == 12):
        df = df[col_names1]
    elif df.shape[1] == 8:
        df = df[col_names2]
    elif df.shape[1] == 6:
        df = df[col_names3]
        df['Lat'] = 0
        df['Long_'] = 0
        df = df[['Country/Region','Last Update','Lat','Long_','Confirmed','Deaths','Recovered']]
    else:
        print(f'We have {df.shape[1]} columns in {url} file')
    
    df['File_Name'] = df_list_names[count]        
    try:
        df.columns = cols # renaming the columns
        covid_table = covid_table.append(df, ignore_index=True)
        comment = Add_Comment(df_list_names[count], 1, 0)
        comments = comments.append(comment, ignore_index=True)
    except:
        comment = Add_Comment(df_list_names[count], 0, 1)
        comments = comments.append(comment, ignore_index=True)
        pass

In [16]:
df.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,File_Name
0,Afghanistan,2021-01-01 05:23:07,33.93911,67.709953,51526,2191,41727,12-31-2020
1,Albania,2021-01-01 05:23:07,41.1533,20.1683,58316,1181,33634,12-31-2020
2,Algeria,2021-01-01 05:23:07,28.0339,1.6596,99610,2756,67127,12-31-2020
3,Andorra,2021-01-01 05:23:07,42.5063,1.5218,8049,84,7432,12-31-2020
4,Angola,2021-01-01 05:23:07,-11.2027,17.8739,17553,405,11044,12-31-2020


In [17]:
covid_table.head()

Unnamed: 0,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,File_Name
0,Afghanistan,2021-01-02 05:22:33,33.93911,67.709953,51526,2191,41727,01-01-2021
1,Albania,2021-01-02 05:22:33,41.1533,20.1683,58316,1181,33634,01-01-2021
2,Algeria,2021-01-02 05:22:33,28.0339,1.6596,99897,2762,67395,01-01-2021
3,Andorra,2021-01-02 05:22:33,42.5063,1.5218,8117,84,7463,01-01-2021
4,Angola,2021-01-02 05:22:33,-11.2027,17.8739,17568,405,11146,01-01-2021


In [18]:
covid_table.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1645514 entries, 0 to 1645513
Data columns (total 8 columns):
 #   Column          Non-Null Count    Dtype  
---  ------          --------------    -----  
 0   Country_Region  1645514 non-null  object 
 1   Last_Update     1645514 non-null  object 
 2   Lat             1611454 non-null  float64
 3   Long_           1611454 non-null  float64
 4   Confirmed       1645485 non-null  object 
 5   Deaths          1645072 non-null  object 
 6   Recovered       1386287 non-null  object 
 7   File_Name       1645514 non-null  object 
dtypes: float64(2), object(6)
memory usage: 100.4+ MB


In [None]:
comments[['Added','Not_Added']].sum().plot(kind='bar')

## Save the file
to safe my time for loading data from the internet

In [19]:
covid_table.to_csv('covid_summary.csv',index=False)

In [None]:
df_covid = pd.read_csv('covid_summary.csv')
# df_covid = covid_table.copy()

## Covid Table Analyzing

In [None]:
df_covid['File_Name'] = pd.to_datetime(df_covid['File_Name'])

In [None]:
df_covid['Last_Update'] = pd.to_datetime(df_covid['Last_Update']).dt.date

In [None]:
df_covid.head()

In [None]:
df_covid.info()

In [None]:
# df_covid['Recovered'] = df_covid['Recovered'].replace('', np.nan)
# df_covid['Deaths'] = df_covid['Deaths'].replace('', np.nan)
# df_covid['Confirmed'] = df_covid['Confirmed'].replace('', np.nan)

In [None]:
df_covid.fillna({'Deaths':0,'Confirmed':0,'Recovered':0},inplace=True)

In [None]:
df_covid['Confirmed'] = df_covid['Confirmed'].astype(float)

In [None]:
df_covid['Recovered'] = df_covid['Recovered'].astype(float)

In [None]:
df_covid['Deaths'] = df_covid['Deaths'].astype(float)

In [None]:
df_covid.info()

In [None]:
# Last update shows date the data come from but to vompare it with flue we need to get week number.
# To get week number we will use isocalendar function
week_no = []
year_no = []
for value in df_covid['Last_Update']:
    week_no.append(value.isocalendar()[1])
    year_no.append(value.isocalendar()[0])

df_covid['Week'] = week_no
df_covid['Year'] = year_no

In [None]:
df_covid = df_covid.groupby(['Country_Region','Week','Year']).agg({'Confirmed':'sum','Deaths':'sum','Recovered':'sum'}).reset_index().sort_values(['Country_Region','Week'])

In [None]:
df_covid[(df_covid.Country_Region=='Poland')&(df_covid.Year==2020)].head()

In [None]:
df_covid.Year.unique()

In [None]:
# covid_table_sum = covid_table[['Country_Region','Last_Update','Confirmed', 'Deaths','Recovered']]
# covid_table_sum = covid_table_sum.groupby(['Country_Region','Last_Update']).agg({'Confirmed':'sum','Deaths':'sum','Recovered':'sum'}).reset_index()

## Comparison

In [None]:
covid_df.info()

In [None]:
covid_df.rename(columns={'Country_Region':'Country'},inplace=True)

In [None]:
covid_df.head()

In [None]:
flu_df2.info()

In [None]:
flu_df2.head()

In [None]:
flu_df2['Week'] = flu_df2['Week'].map(lambda x: x.lstrip('W'))
flu_df2['Week'] = flu_df2['Week'].astype('int')
flu_df2['Year'] = flu_df2['Year'].astype('int')

In [None]:
flu_df2.head()

In [None]:
flu_countries = flu_df2[['Country']].drop_duplicates()
flu_countries['flu'] = 1
cov_countries = covid_df[['Country']].drop_duplicates()
cov_countries['cov'] = 1

In [None]:
countries = pd.merge(flu_countries,cov_countries,on='Country',how='outer')

In [None]:
len(countries)

In [None]:
countries[countries['cov'].isnull()]

----
Now we can see countries names which are different in both tables. I am going to change them 

In [None]:
missing_countries = ['Herzego','Koso','Mold','Mace','Turkm','Kingdom']
print('Covid:')
for c in missing_countries:
    print(covid_df[covid_df.Country.str.contains(c)].Country.unique()) #flu_df2
    
print('\nInfluenza:')
for c in missing_countries:
    print(flu_df2[flu_df2.Country.str.contains(c)].Country.unique()) #flu_df2

In [None]:
missing_countries = ['Herzego','Koso','Mold','Mace','Kingdom', 'Rus']
new_countries = ['Bosnia and Herzegovina','Kosovo','Moldova','Macedonia','United Kingdom', 'Russia'] # in covid_df we have Russian Federation

for old,new in zip(missing_countries,new_countries):
    covid_df.loc[covid_df.Country.str.contains(old), 'Country'] = new
    flu_df2.loc[flu_df2.Country.str.contains(old), 'Country'] = new

In [None]:
print('Covid:')
for c in missing_countries:
    print(covid_df[covid_df.Country.str.contains(c)].Country.unique()) #flu_df2
    
print('\nInfluenza:')
for c in missing_countries:
    print(flu_df2[flu_df2.Country.str.contains(c)].Country.unique()) #flu_df2

Now the coutries names are the same (Turkmenistan is missing in Covid table)

In [None]:
covid_df.head()

In [None]:
covid_df.shape

In [None]:
flu_df2.rename(columns={'Detected_Cases':'Detected_FluCases', 'Hospitalized_Cases':'Hospitalized_FluCases'},inplace=True)

In [None]:
flu_df2.head()

In [None]:
flu_df2.shape

In [None]:
final_df = pd.merge(covid_df,flu_df2,on=['Country','Year','Week'],how='right').sort_values(['Year','Week','Country'])

In [None]:
final_df.shape

In [None]:
final_df.head()

In [None]:
final_df[(final_df.Year==2021) & (final_df.Confirmed.isnull())]

In [None]:
final_df[(final_df.Year==2020) & (final_df.Confirmed.isnull())]['Week'].unique()

Seems everything is ok so we can change NaN into 0 for covid

In [None]:
final_df.fillna({'Deaths':0,'Confirmed':0,'Recovered':0},inplace=True)

In [None]:
final_df.info()

In [None]:
final_df.describe()

In [None]:
final_df.Week.unique()

In [None]:
data = {
  "Week": [1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53],
  "Quarter": [1,  1,  1,  1,  1,  6,  7,  8,  9, 10, 11, 12, 1, 2, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 3, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 452, 53]
}

quarters = pd.DataFrame(data)

In [None]:
quarters = pd.DataFrame(columns={"Week","Quarter"})

In [None]:
def quarter(x): 
    if (x <= 13):
        return 1
    elif (x <= 26):
        return 2
    elif (x <= 39):
        return 3
    else:
        return 4

quarters['Week'] = final_df.Week.unique()
quarters['Quarter'] = quarters['Week'].apply(quarter)
quarters

In [None]:
final_df = final_df.merge(quarters,on='Week',how='inner')

In [None]:
final_df.shape

In [None]:
final_df.columns

In [None]:
final_df[(final_df.Detected_FluCases>0)&(final_df.Year==2021)].sort_values('Detected_FluCases',ascending=False)

In [None]:
final_df.loc[final_df.Country=='Poland',('Country', 'Year', 'Confirmed', 'Deaths', 'Recovered',
       'Detected_FluCases', 'Hospitalized_FluCases', 'Quarter')].groupby(['Year','Quarter']).sum().sort_values(['Year','Quarter']).reset_index()

Next Steps:

- Remove Quarter2 2021
- check why we do not have any Flu in 2021 (download it if we do not have)

### in the covid_df table we need to rename countries from this list: countries[countries['cov'].isnull()] 

### Dodaj ponizej porownanie

In [None]:
chart = final_df.copy()
chart['YearQuater'] = chart.Year.astype(str)+'-'+chart.Quarter.astype(str)
chart = chart.groupby(['YearQuater']).agg({'Confirmed':'sum','Deaths':'sum','Recovered':'sum','Detected_FluCases':'sum','Hospitalized_FluCases':'sum'}).reset_index()

fig, ax = plt.subplots()
plt.rcParams["figure.figsize"] = (20,5)

y = chart.Confirmed
x = chart.YearQuater.unique()

result = ax.plot(x,y)


# # Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Confirmed Cases')
ax.set_xlabel('Periods')
ax.set_title(f'Confirmed Covid Cases per week')

plt.show()