In [1]:
import numpy as np
import pandas as pd
import seaborn as sns3
import matplotlib.pyplot as plt

#read our covid csv
#source: https://github.com/casus/covid-19-Infektion_Tod_in_Deutschland/blob/main/Germany_States_COVID19_Death_Infections.csv
df = pd.read_csv("covid19_de.csv")

In [2]:
#shows the shape
print(df.shape)

(339434, 6)


In [3]:
#number of data in the data set
print(df.size)

2036604


In [4]:
#first 5 entries
print(df.head())

   region       name        date  dead  infected  population
0    1001  Flensburg  2020-03-14     0         4       89504
1    1001  Flensburg  2020-03-18     0         2       89504
2    1001  Flensburg  2020-03-19     0         4       89504
3    1001  Flensburg  2020-03-20     0         2       89504
4    1001  Flensburg  2020-03-21     0         1       89504


In [5]:
#last 5 entries
print(df.tail())

        region              name        date  dead  infected  population
339429   16077  Altenburger Land  2022-12-04     0        12       90118
339430   16077  Altenburger Land  2022-12-05     0        29       90118
339431   16077  Altenburger Land  2022-12-06     0        48       90118
339432   16077  Altenburger Land  2022-12-07     0        40       90118
339433   16077  Altenburger Land  2022-12-08     0        24       90118


In [6]:
#count total covid infections
total_infect = df['infected'].sum()
print(total_infect)

36757178


In [7]:
#count total covid deaths
total_deaths = df['dead'].sum()
print(total_deaths)

158944


In [8]:
#earliest entry?
print(df['date'].min())
#latest entry?
print(df['date'].max())

2020-01-02
2022-12-09


In [9]:
#a german city we all know
df = pd.read_csv("covid19_de.csv", index_col='name')
df_wue = df.loc['Würzburg']
print(df_wue)

          region        date  dead  infected  population
name                                                    
Würzburg    9663  2020-03-05     0         1      127880
Würzburg    9663  2020-03-09     0         2      127880
Würzburg    9663  2020-03-10     2         5      127880
Würzburg    9663  2020-03-11     0         1      127880
Würzburg    9663  2020-03-12     2         4      127880
...          ...         ...   ...       ...         ...
Würzburg    9663  2022-12-05     0        76      127880
Würzburg    9663  2022-12-06     0        54      127880
Würzburg    9663  2022-12-07     0        38      127880
Würzburg    9663  2022-12-08     0        28      127880
Würzburg    9663  2022-12-09     0        29      127880

[846 rows x 5 columns]


In [10]:
#statistics for covid infections in wuerzburg
wue_infect = df_wue.loc['Würzburg']['infected'].describe()
print(wue_infect)

count    846.000000
mean      78.560284
std      128.440184
min        1.000000
25%        7.000000
50%       21.000000
75%       79.000000
max      692.000000
Name: infected, dtype: float64


In [11]:
#now we will find out when most people got infected
df_wue[df_wue['infected'] == 692]

Unnamed: 0_level_0,region,date,dead,infected,population
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Würzburg,9663,2022-03-25,0,692,127880


In [12]:
#statistics for covid deaths in wuerzburg
wue_deaths = df_wue.loc['Würzburg']['dead'].describe()
print(wue_deaths)

count    846.000000
mean       0.264775
std        0.720528
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max        7.000000
Name: dead, dtype: float64


In [13]:
#now we will find out when the 7 people died
df_wue[df_wue['dead'] == 7]

Unnamed: 0_level_0,region,date,dead,infected,population
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Würzburg,9663,2020-03-28,7,23,127880


In [14]:
#relation wue infections to germany
print('infections wue to germany:', df_wue['infected'].sum()/df['infected'].sum()*100)
#relation wue deaths to germany
print('deaths wue to germny:', df_wue['dead'].sum()/df['dead'].sum()*100)

#relation wue deaths to wue infections
print('wue deaths to wue infections:', df_wue['dead'].sum()/df_wue['infected'].sum()*100)
#relation germany deaths to germany infections
print('DE deaths to DE infections:', df['dead'].sum()/df['infected'].sum()*100)

infections wue to germany: 0.18081366311635785
deaths wue to germny: 0.14093013891685122
wue deaths to wue infections: 0.3370346965183112
DE deaths to DE infections: 0.432416220853516


In [15]:
df = pd.read_csv("covid19_de.csv")

print('cases of death')
query_date1 = df.query("date < '2020-12-26'")
print(query_date1['dead'].sum())
query_date2 = df.query("date >= '2020-12-26'")
print(query_date2['dead'].sum())

print('\ninfections:')
query_date1 = df.query("date < '2020-12-26'")
print(query_date1['infected'].sum())
query_date2 = df.query("date >= '2020-12-26'")
print(query_date2['infected'].sum())

print('\ncases of death/infections before the vac:')
print(query_date1['dead'].sum()/query_date1['infected'].sum()*100)

print('\ncases of death / infections after the vac:')
print(query_date2['dead'].sum()/query_date2['infected'].sum()*100)

print('\nnot considered: weaker covid variants - as we all know it started with the deadliest one and got weaker from time to time - this would be quite important but would exceed the amount of time and used data for this little presentation')

cases of death
45843
113101

infections:
1638388
35118790

cases of death/infections before the vac:
2.7980551615368277

cases of death / infections after the vac:
0.3220526675321103

not considered: weaker covid variants - as we all know it started with the deadliest one and got weaker from time to time - this would be quite important but would exceed the amount of time and used data for this little presentation
