# Description

Get Covid case information in EU

# Environment

## Library Imports

In [1]:
import os
import gc
gc.enable()

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('ggplot')

## Local Imports

In [2]:
from phuc import data_process, file
from phuc import visualization as vs

## Reload data path

In [3]:
from phuc.file import standard_template

standard_template.save_data_path(os.getcwd().split('/src')[0])

Loaded file data_path.pkl
Directory  /home/phuc/Desktop/Work/Data Sience/Epidemic Modeling/MathModel  already exists
Saved file data_path.pkl


##  Setup Pandas

In [4]:
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 100)

## Set up data path

In [5]:
CURR_DIR = os.getcwd().split('/src')[0]

In [6]:
file_path = file.load_pickle(CURR_DIR + '/data_path.pkl')['FILES']
dir_path = file.load_pickle(CURR_DIR + '/data_path.pkl')['DIRS']


Loaded file data_path.pkl
Loaded file data_path.pkl


In [7]:
#for key in file_path: print(key)
#for key in dir_path: print(key)

# Code

#  JOHN HOPSKIN Corona-Virus Repository

We use data from COVID-19/csse_covid_19_data/csse_covid_19_time_series/
that was cloned from https://github.com/CSSEGISandData/COVID-19 in 08/07/2020

This folder contains daily time series summary tables, including confirmed, deaths and recovered. All data is read in from the daily case report. The time series tables are subject to be updated if inaccuracies are identified in our historical data. The daily reports will not be adjusted in these instances to maintain a record of raw data.

Two time series tables are for the US confirmed cases and deaths, reported at the county level. They are named time_series_covid19_confirmed_US.csv, time_series_covid19_deaths_US.csv, respectively.

Three time series tables are for the global confirmed cases, recovered cases and deaths. Australia, Canada and China are reported at the province/state level. Dependencies of the Netherlands, the UK, France and Denmark are listed under the province/state level. The US and other countries are at the country level. The tables are renamed time_series_covid19_confirmed_global.csv and time_series_covid19_deaths_global.csv, and time_series_covid19_recovered_global.csv, respectively.

## General Information

We decide to use 3 data file:

+ TIME_SERIES_COVID19_CONFIRMED_GLOBAL_CSV
+ TIME_SERIES_COVID19_RECOVERED_GLOBAL_CSV_PATH
+ TIME_SERIES_COVID19_DEATHS_GLOBAL_CSV_PATH

Those files share some simlarites:

The Data about Corona-Virus from the JOHN HOPSKIN is contain the data from more than 173 Countries/Regions recorded from 1/22/20 to 7/8/20 (MM/DD/YY) in the time, we cloned to our repos.

Only have Null value at the Province/State, which is reasonable.

In [13]:
dat_confirmed = data_process.read_csv(file_path[\
                    'TIME_SERIES_COVID19_CONFIRMED_GLOBAL_CSV_PATH'])

time_series_covid19_confirmed_global.csv 's information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Columns: 173 entries, Province/State to 7/8/20
dtypes: float64(2), int64(169), object(2)
memory usage: 359.6+ KB
None



In [71]:
dat_recovered = data_process.read_csv(file_path[\
                    'TIME_SERIES_COVID19_RECOVERED_GLOBAL_CSV_PATH'])

time_series_covid19_recovered_global.csv 's information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 253 entries, 0 to 252
Columns: 173 entries, Province/State to 7/8/20
dtypes: float64(2), int64(169), object(2)
memory usage: 342.1+ KB
None



In [72]:
dat_death = data_process.read_csv(file_path[\
                    'TIME_SERIES_COVID19_DEATHS_GLOBAL_CSV_PATH'])

time_series_covid19_deaths_global.csv 's information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Columns: 173 entries, Province/State to 7/8/20
dtypes: float64(2), int64(169), object(2)
memory usage: 359.6+ KB
None



# Working Data

With the constrain of time and simplicity of our model used to analys, we decide to use only the data in EU countries that shared many similarites in the outbreak and how goverment deal with it.

In [64]:
EU_nation = \
"""\
Austria, Belgium, Bulgaria, Croatia, Cyprus, \
Czechia, Denmark, Estonia, Finland, France, Germany,\
Greece, Hungary, Ireland, Italy, Latvia, Lithuania, Luxembourg,\
Malta, Netherlands, Poland, Portugal, Romania, \
Slovakia, Slovenia, Spain, Sweden \
""".replace(" ","").split(",")

# United Kingdom did not official exit from the EU
# so we still consider UK is a part of EU

EU_nation.append('United Kingdom')

Dependent states is not consider in our analysis due to the geographical distances.

In [86]:
for name_df, df in [['EU_confirmed', dat_confirmed], 
                    ['EU_death' ,dat_death],
                    ['EU_recovered',dat_recovered]]:

    df_eu = df[df['Country/Region'].\
                    isin(EU_nation)].reset_index(drop=True)

    # remove dependent state
    df_eu = df_eu[df_eu['Province/State'].isnull()].\
                    reset_index(drop=True)
    
    # drop unnecessary columns
    df_eu = df_eu.drop(['Province/State','Lat','Long'],axis =1)
                    
    file.save_pickle(dir_path['WORKING_DIR'] +'/'+ name_df+'.pkl', df_eu)
    df_eu.to_csv(dir_path['WORKING_DIR'] +'/'+ name_df+'.csv',index=False)

Directory  /home/phuc/Desktop/Work/Data Sience/Epidemic Modeling/MathModel/data/working  already exists
Saved file EU_confirmed.pkl
Directory  /home/phuc/Desktop/Work/Data Sience/Epidemic Modeling/MathModel/data/working  already exists
Saved file EU_death.pkl
Directory  /home/phuc/Desktop/Work/Data Sience/Epidemic Modeling/MathModel/data/working  already exists
Saved file EU_recovered.pkl


# Colab Settings

## Connect Drive

In [None]:
from google.colab import drive 
# conncent phuc.phan1012000@hcmut.edu.vn

# default location for the drive
ROOT = "/content/drive"            
drive.mount(ROOT)   

## Load Library

In [None]:
import sys
import os
# Load phuc's library on Colab
sys.path.append("/content/drive/My Drive/Work/Data_Science_Library")

## Setup Project

In [None]:
GIT_REPOSITORY = "kalapa-s-CreditScore"
# path to project on Google Drive
MY_GOOGLE_DRIVE_PATH = 'My Drive/Work/Projects'
PROJECT_PATH = os.path.join(ROOT, MY_GOOGLE_DRIVE_PATH,GIT_REPOSITORY)

# GO TO WORKING PROJECT PATH
os.chdir(PROJECT_PATH+'/src')
os.remove(PROJECT_PATH+'/data_path.pkl')