In [22]:
import pandas as pd

pd.set_option('display.max_rows', 500)

   ![Stages EDS]('../reports/figures/stages_EDS.png')

# Data Understanding

* RKI, webscrape https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Fallzahlen.html
* John Hopkins (GITHUB) https://github.com/CSSEGISandData/COVID-19.git
* REST API services to retreive data https://npgeo-corona-npgeo-de.hub.arcgis.com

## GITHUB csv data

In [16]:
data_path='../data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
pd.read_csv(data_path)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,5/30/22,5/31/22,6/1/22,6/2/22,6/3/22,6/4/22,6/5/22,6/6/22,6/7/22,6/8/22
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,180259,180347,180419,180520,180584,180615,180615,180688,180741,180784
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,276101,276101,276221,276221,276310,276342,276401,276415,276468,276518
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,265877,265884,265887,265889,265889,265889,265897,265900,265904,265909
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,42894,42894,42894,42894,43067,43067,43067,43067,43067,43224
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,99761,99761,99761,99761,99761,99761,99761,99761,99761,99761
5,,Antarctica,-71.9499,23.347,0,0,0,0,0,0,...,11,11,11,11,11,11,11,11,11,11
6,,Antigua and Barbuda,17.0608,-61.7964,0,0,0,0,0,0,...,8163,8253,8253,8295,8295,8378,8378,8378,8378,8378
7,,Argentina,-38.4161,-63.6167,0,0,0,0,0,0,...,9230573,9230573,9230573,9230573,9230573,9230573,9276618,9276618,9276618,9276618
8,,Armenia,40.0691,45.0382,0,0,0,0,0,0,...,422963,422963,422963,422963,422963,422963,422963,423006,423006,423006
9,Australian Capital Territory,Australia,-35.4735,149.0124,0,0,0,0,0,0,...,131938,132735,133582,134286,134286,134286,134286,136860,137664,138407


## Web Scapping

In [19]:
import requests
from bs4 import BeautifulSoup

In [21]:
page = requests.get("https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Fallzahlen.html")

In [23]:
soup = BeautifulSoup(page.content, 'html.parser')

In [25]:
html_table = soup.find('table')

In [27]:
all_rows=html_table.find_all('tr')

In [37]:
final_data_list=[]

In [51]:
for pos,rows in enumerate(all_rows):
    col_list=[each_col.get_text(strip=True) for each_col in rows.find_all('td')]
    final_data_list.append(col_list)
 

In [52]:
pd_daily_status=pd.DataFrame(final_data_list).dropna().rename(columns={0: 'state',
                                                             1:'cases',
                                                             2:'changes',
                                                             3:'cases_per_100k',
                                                             4:'fatal',
                                                             5:'comment'})

In [53]:
pd_daily_status.head()

Unnamed: 0,state,cases,changes,cases_per_100k,fatal,comment
2,Baden-Württem­berg,3.714.108,7.826,26.526,2389,16.182
3,Bayern,4.956.999,10.994,40.54,3085,24.167
4,Berlin,1.061.611,2.237,7.585,2070,4.628
5,Branden­burg,797.080,1.29,4.432,1751,5.688
6,Bremen,204.310,752.0,2.545,3742,780.0


## API calls

In [54]:
data=requests.get('https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/Coronafälle_in_den_Bundesländern/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json')

In [56]:
import json

In [57]:
json_object=json.loads(data.content)

In [58]:
type(json_object)

dict

In [59]:
json_object.keys()

dict_keys(['objectIdFieldName', 'uniqueIdField', 'globalIdFieldName', 'geometryProperties', 'geometryType', 'spatialReference', 'fields', 'features'])

In [62]:
full_list=[]
for pos,each_dict in enumerate (json_object['features'][:]):
    full_list.append(each_dict['attributes'])                                           
                                                    
                                                        

In [63]:
pd.DataFrame(full_list)

Unnamed: 0,OBJECTID_1,LAN_ew_AGS,LAN_ew_GEN,LAN_ew_BEZ,LAN_ew_EWZ,OBJECTID,Fallzahl,Aktualisierung,AGS_TXT,GlobalID,faelle_100000_EW,Shape__Area,Shape__Length,Death,cases7_bl_per_100k,cases7_bl,death7_bl,cases7_bl_per_100k_txt,AdmUnitId
0,1,1,Schleswig-Holstein,Land,2910875,15,760562,1654812000000,1,fc5ba936-c95c-432c-8a33-9eb2f30b660f,26128.294757,45737310000.0,2881496.0,2563,504.55619,14687,3,5046,1
1,2,2,Hamburg,Freie und Hansestadt,1852478,6,594566,1654812000000,2,0f3e860c-5181-4d3f-a421-1d51f50315ea,32095.711798,2089396000.0,418800.2,2679,331.178022,6135,3,3312,2
2,3,3,Niedersachsen,Land,8003421,9,2423749,1654812000000,3,3fd77024-c29b-4843-9be8-682ad48e60c9,30283.912342,129983600000.0,4008988.0,9429,454.268244,36357,3,4543,3
3,4,4,Bremen,Freie Hansestadt,680130,5,204310,1654812000000,4,4132268b-54de-4327-ac1e-760e915112f1,30039.845324,1119157000.0,335717.7,780,374.193169,2545,0,3742,4
4,5,5,Nordrhein-Westfalen,Land,17925570,10,5384556,1654812000000,5,561d658f-3ee5-46e3-bc95-3528c6558ab9,30038.40882,87829360000.0,2648673.0,25504,377.142819,67605,4,3771,5
5,6,6,Hessen,Land,6293154,7,1900217,1654812000000,6,93277ac4-e8fc-48c7-8940-028dc2ed66af,30194.986488,52359130000.0,2148244.0,10222,419.026771,26370,4,4190,6
6,7,7,Rheinland-Pfalz,Land,4098391,11,1173348,1654812000000,7,e9b4296f-9be2-4e53-9a58-ccf1396cb03d,28629.479227,47838770000.0,1774430.0,5698,325.639989,13346,0,3256,7
7,8,8,Baden-Württemberg,Land,11103043,1,3714108,1654812000000,8,80394ddf-c6a4-4a6e-be8e-0259a81b22a9,33451.261965,81517320000.0,2544320.0,16182,238.907478,26526,2,2389,8
8,9,9,Bayern,Freistaat,13140183,2,4956999,1654812000000,9,1ff920f4-62cd-4a4f-b8c9-f042f2a3e00a,37723.972337,163485500000.0,3898618.0,24167,308.519295,40540,8,3085,9
9,10,10,Saarland,Land,983991,12,319857,1654812000000,10,e3396a6f-8a30-4fdf-8df7-def77dd38bea,32506.089995,6060692000.0,562678.9,1701,322.665553,3175,0,3227,10
