# Corona Virus  Data Analysis (COVID - 2019)

In [19]:
import subprocess
import os

import pandas as pd

import requests
from bs4 import BeautifulSoup

import json


pd.set_option('display.max_rows', 500)

## CRISP DM

![CRISP_DM](../reports/figures/CRISP_DM.png)

## Data Understanding

* RKI, webscrape (webscraping) https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Fallzahlen.html
* John Hopkins (GITHUB) https://github.com/CSSEGISandData/COVID-19.git
* REST API services to retreive data https://npgeo-corona-npgeo-de.hub.arcgis.com/

## GITHUB csv data

git clone/pull https://github.com/CSSEGISandData/COVID-19.git

In [20]:
git_pull = subprocess.Popen( "git pull",
                           cwd = os.path.dirname( '../data/raw/COVID-19/' ),
                           shell = True,
                           stdout = subprocess.PIPE,
                           stderr = subprocess.PIPE )
(out, error) = git_pull.communicate()

print("Error : "+ str(error))
print("Out : "+ str(out))

Error : b''
Out : b'Already up to date.\n'


In [21]:
data_path= '../data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
pd_raw=pd.read_csv(data_path)

In [22]:
pd_raw.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,9/5/20,9/6/20,9/7/20,9/8/20,9/9/20,9/10/20,9/11/20,9/12/20,9/13/20,9/14/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,38324,38398,38494,38520,38544,38572,38606,38641,38716,38772
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,10102,10255,10406,10553,10704,10860,11021,11185,11353,11520
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,46071,46364,46653,46938,47216,47488,47752,48007,48254,48496
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,1215,1215,1261,1261,1301,1301,1344,1344,1344,1438
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,2935,2965,2981,3033,3092,3217,3279,3335,3388,3439


## Data Gathering using Webscraping method from Worldometer Statics.

Data from:https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Fallzahlen.html

In [23]:
data_from = "https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Fallzahlen.html"
page = requests.get(data_from)
soup = BeautifulSoup(page.content,'html.parser')

In [24]:
html_table = soup.find('table')

In [25]:
all_rows = html_table.find_all('tr')

In [26]:
final_data_list=[]

In [27]:
for pos,rows in enumerate(all_rows):
    col_list = [each_col.get_text(strip=True) for each_col in rows.find_all('td')]
    final_data_list.append(col_list)

In [28]:
pd_daily_status=pd.DataFrame(final_data_list).dropna().rename(columns={0:'state',
                                                                      1: 'No. of Cases',
                                                                      2: 'New Cases found',
                                                                      3: 'Past Week Count ',
                                                                      4: '7 day Incidence',
                                                                      5: 'Deaths'})

In [29]:
pd_daily_status.head()

Unnamed: 0,state,No. of Cases,New Cases found,Past Week Count,7 day Incidence,Deaths
2,Baden-Württem­berg,45.323,69,1.269,115,1.868
3,Bayern,62.401,427,2.324,178,2.647
4,Berlin,12.354,85,486.0,130,226.0
5,Branden­burg,4.016,10,63.0,25,169.0
6,Bremen,2.137,2,64.0,94,59.0


## REST API METHOD

In [30]:
data=requests.get('https://services7.arcgis.com/mOBPykOjAyBO2ZKk/arcgis/rest/services/Coronaf%C3%A4lle_in_den_Bundesl%C3%A4ndern/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json')

In [31]:
json_object = json.loads(data.content)

In [32]:
type(json_object)

dict

In [33]:
json_object.keys()

dict_keys(['objectIdFieldName', 'uniqueIdField', 'globalIdFieldName', 'geometryProperties', 'serverGens', 'geometryType', 'spatialReference', 'fields', 'features'])

In [34]:
full_list=[]
for pos,each_dict in enumerate (json_object['features'][:]):
    full_list.append(each_dict['attributes'])

In [35]:
pd.DataFrame(full_list)

Unnamed: 0,OBJECTID_1,LAN_ew_AGS,LAN_ew_GEN,LAN_ew_BEZ,LAN_ew_EWZ,OBJECTID,Fallzahl,Aktualisierung,AGS_TXT,GlobalID,faelle_100000_EW,Shape__Area,Shape__Length,Death
0,1,1,Schleswig-Holstein,Land,2896712,15,4255,1600120800000,1,fc5ba936-c95c-432c-8a33-9eb2f30b660f,146.890682,45737310000.0,2881496.0,161
1,2,2,Hamburg,Freie und Hansestadt,1841179,6,6846,1600120800000,2,0f3e860c-5181-4d3f-a421-1d51f50315ea,371.826965,2089396000.0,418800.2,268
2,3,3,Niedersachsen,Land,7982448,9,18023,1600120800000,3,3fd77024-c29b-4843-9be8-682ad48e60c9,225.782868,129983600000.0,4008988.0,667
3,4,4,Bremen,Freie Hansestadt,682986,5,2137,1600120800000,4,4132268b-54de-4327-ac1e-760e915112f1,312.890747,1119157000.0,335717.7,59
4,5,5,Nordrhein-Westfalen,Land,17932651,10,62725,1600120800000,5,561d658f-3ee5-46e3-bc95-3528c6558ab9,349.780967,87829360000.0,2648673.0,1834
5,6,6,Hessen,Land,6265809,7,16985,1600120800000,6,93277ac4-e8fc-48c7-8940-028dc2ed66af,271.074334,52359130000.0,2148244.0,540
6,7,7,Rheinland-Pfalz,Land,4084844,11,9795,1600120800000,7,e9b4296f-9be2-4e53-9a58-ccf1396cb03d,239.788839,47838770000.0,1774430.0,247
7,8,8,Baden-Württemberg,Land,11069533,1,45323,1600120800000,8,80394ddf-c6a4-4a6e-be8e-0259a81b22a9,409.439134,81517320000.0,2544320.0,1868
8,9,9,Bayern,Freistaat,13076721,2,62401,1600120800000,9,1ff920f4-62cd-4a4f-b8c9-f042f2a3e00a,477.191492,163485500000.0,3898618.0,2647
9,10,10,Saarland,Land,990509,12,3234,1600120800000,10,e3396a6f-8a30-4fdf-8df7-def77dd38bea,326.4988,6060692000.0,562678.9,175
