# COVID West AfricaVisualization

In [1]:
#Imports
import numpy as np
import pandas as pd
from datetime import datetime,timedelta
import matplotlib.pyplot as plt
import seaborn as sns
import time
import altair as alt
from vega_datasets import data
import os


## I. Load Base csv files

Source files were obtained from John Hopkins Center for Sytems Science and Engineering: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data
          
Three main files gathering the statistics for each country starting from january 22th, 2020. One file for the confimed cases, another for the dead cases and the last one for the recovered cases

In [2]:
confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
death = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

#### Confirmed Cases data

In [3]:
confirmed.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,521,555,607,665,714,784,840,906,933,996
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,416,433,446,467,475,494,518,539,548,562
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,1761,1825,1914,1983,2070,2160,2268,2418,2534,2629
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,601,601,638,646,659,673,673,696,704,713
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,19,19,19,19,19,19,19,19,24,24


#### Dead Cases data

In [4]:
death.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,15,18,18,21,23,25,30,30,30,33
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,23,23,23,23,24,25,26,26,26,26
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,256,275,293,313,326,336,348,364,367,375
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,26,26,29,29,31,33,33,35,35,36
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2


#### Recovered Cases data

In [5]:
recovered.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,32,32,32,32,40,43,54,99,112,131
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,182,197,217,232,248,251,277,283,302,314
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,405,460,591,601,691,708,783,846,894,1047
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,71,71,128,128,128,169,169,191,205,235
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,2,4,4,4,5,5,5,5,6,6


##  II. Build Dataset for West-Africa countries

We would like to zoom the covid-19 evolution on West Africa countries. We have 15 countries in West-Africa. By merging the  information from the three previous datasets,we have a new dataframe displaying the number of cases for each day and each category. 
Kindly note that the data is updated on a daily basis

In [6]:
#Load countries and their coordinates
countries_oa = pd.read_csv('countries_OA.csv')
countries = countries_oa['countries'].values

In [7]:
confirmed_oa = confirmed[confirmed['Country/Region'].isin(countries)].drop(columns = ['Province/State'])
death_oa = death[death['Country/Region'].isin(countries)].drop(columns = ['Province/State'])
recovered_oa = recovered[recovered['Country/Region'].isin(countries)].drop(columns = ['Province/State'])

In [8]:
df_conf = pd.melt(confirmed_oa,id_vars=["Country/Region", "Lat", "Long"], var_name="Date", value_name="Confirmed_cases")
df_death = pd.melt(death_oa,id_vars=["Country/Region", "Lat", "Long"], var_name="Date", value_name="Dead")
df_recovered = pd.melt(recovered_oa,id_vars=["Country/Region", "Lat", "Long"], var_name="Date", value_name="Recovered")

In [9]:
#Change Date in Datetime format
df_conf['Date'] = pd.to_datetime(df_conf['Date'])
df_death['Date'] = pd.to_datetime(df_death['Date'])
df_recovered['Date'] = pd.to_datetime(df_recovered['Date'])

In [10]:
#Rename Country/Region by Country
df_conf.rename(columns={'Country/Region':'Country'}, inplace=True)
df_death.rename(columns={'Country/Region':'Country'}, inplace=True)
df_recovered.rename(columns={'Country/Region':'Country'}, inplace=True)

In [11]:
##list all the days from the begining till the previous day
date_list = pd.date_range(start = '2020-01-22', end = pd.datetime.today()-timedelta(1)).tolist()

  


In [12]:
## add all the countries for each date
# for Confirmed_df
for dt in date_list:
    if df_conf[df_conf['Date'] == dt].shape[0] != countries_oa['countries'].shape[0]:
        existing_dep = df_conf[df_conf['Date'] == dt]['Country']
        non_existing_dep = np.setdiff1d(countries_oa['countries'].values,existing_dep)
        for ct in non_existing_dep:
            if dt == '2020-01-22':
                df_conf = df_conf.append({'Country': ct, 'Lat' : countries_oa[countries_oa['countries'] == ct]['latitude'].values[0] , 'Long': countries_oa[countries_oa['countries'] == ct]['longitude'].values[0], 'Date': dt, 'Confirmed_cases': 0}, ignore_index = True)
            else:
                df_conf = df_conf.append({'Country': ct, 'Lat' : countries_oa[countries_oa['countries'] == ct]['latitude'].values[0] , 'Long': countries_oa[countries_oa['countries'] == ct]['longitude'].values[0], 'Date': dt, 'Confirmed_cases': np.nan}, ignore_index = True)
#for death_df    
for dt in date_list:
    if df_death[df_death['Date'] == dt].shape[0] != countries_oa['countries'].shape[0]:
        existing_dep = df_death[df_death['Date'] == dt]['Country']
        non_existing_dep = np.setdiff1d(countries_oa['countries'].values,existing_dep)
        for ct in non_existing_dep:
            if dt == '2020-01-22':
                df_death = df_death.append({'Country': ct, 'Lat' : countries_oa[countries_oa['countries'] == ct]['latitude'].values[0] , 'Long': countries_oa[countries_oa['countries'] == ct]['longitude'].values[0], 'Date': dt, 'Dead': 0}, ignore_index = True)
            else:
                df_death = df_death.append({'Country': ct, 'Lat' : countries_oa[countries_oa['countries'] == ct]['latitude'].values[0] , 'Long': countries_oa[countries_oa['countries'] == ct]['longitude'].values[0], 'Date': dt, 'Dead': np.nan}, ignore_index = True)
#for recovered_df    
for dt in date_list:
    if df_recovered[df_recovered['Date'] == dt].shape[0] != countries_oa['countries'].shape[0]:
        existing_dep = df_recovered[df_recovered['Date'] == dt]['Country']
        non_existing_dep = np.setdiff1d(countries_oa['countries'].values,existing_dep)
        for ct in non_existing_dep:
            if dt == '2020-01-22':
                df_recovered = df_recovered.append({'Country': ct, 'Lat' : countries_oa[countries_oa['countries'] == ct]['latitude'].values[0] , 'Long': countries_oa[countries_oa['countries'] == ct]['longitude'].values[0], 'Date': dt, 'Recovered': 0}, ignore_index = True)
            else:
                df_recovered = df_recovered.append({'Country': ct, 'Lat' : countries_oa[countries_oa['countries'] == ct]['latitude'].values[0] , 'Long': countries_oa[countries_oa['countries'] == ct]['longitude'].values[0], 'Date': dt, 'Recovered': np.nan}, ignore_index = True)
        

In [13]:
#Sort all df.. by date and Countries... they will have same index 
df_conf = df_conf.sort_values(by = ['Date','Country']).reset_index(drop=True)
df_death = df_death.sort_values(by = ['Date','Country']).reset_index(drop=True)
df_recovered = df_recovered.sort_values(by = ['Date','Country']).reset_index(drop=True)

In [14]:
#Buil our dataframe which is a merge of all df
df = pd.merge(df_conf,df_death)
df['Recovered'] = df_recovered['Recovered']

In [15]:
# Clean df by replacing nan values
# Fill Na values. First Date.. put all NA to 0.0
df.loc[df['Date'] == '2020-01-22'] = df.loc[df['Date'] == '2020-01-22'].fillna(0)

In [16]:
countries = countries_oa['countries'].values.tolist()

In [17]:
# For the remaining dates, create a set of DataFrame by country(knowing that we sort it before by date and country_name)
#and then forward_fill
df_list = []
for ct in countries:
    dfi = df[df['Country'] == ct]
    dfi = dfi.ffill()
    df_list.append(dfi)

covid_oa = pd.concat(df_list,ignore_index=True)

In [18]:
covid_oa

Unnamed: 0,Country,Lat,Long,Date,Confirmed_cases,Dead,Recovered
0,Burkina Faso,12.2383,-1.5616,2020-01-22,0,0,0
1,Burkina Faso,12.2383,-1.5616,2020-01-23,0,0,0
2,Burkina Faso,12.2383,-1.5616,2020-01-24,0,0,0
3,Burkina Faso,12.2383,-1.5616,2020-01-25,0,0,0
4,Burkina Faso,12.2383,-1.5616,2020-01-26,0,0,0
...,...,...,...,...,...,...,...
1330,Togo,8.6195,0.8248,2020-04-15,81,3,35
1331,Togo,8.6195,0.8248,2020-04-16,81,5,45
1332,Togo,8.6195,0.8248,2020-04-17,83,5,48
1333,Togo,8.6195,0.8248,2020-04-18,84,5,49


In [19]:
covid_oa.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1335 entries, 0 to 1334
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Country          1335 non-null   object        
 1   Lat              1335 non-null   float64       
 2   Long             1335 non-null   float64       
 3   Date             1335 non-null   datetime64[ns]
 4   Confirmed_cases  1335 non-null   int64         
 5   Dead             1335 non-null   int64         
 6   Recovered        1335 non-null   int64         
dtypes: datetime64[ns](1), float64(2), int64(3), object(1)
memory usage: 73.1+ KB


In [20]:
# create daily cases dataset
covid_oa['Daily_case'] = 0
for ct in countries:
    temp = 0
    for dt in date_list:
        first_cases = covid_oa[(covid_oa['Date'] == dt) & (covid_oa['Country'] == ct)]['Confirmed_cases'].values[0]
        ind = covid_oa[(covid_oa['Date'] == dt) & (covid_oa['Country'] == ct)]['Confirmed_cases'].index[0]
        if first_cases != 0:
            covid_oa.loc[ind,'Daily_case'] = first_cases - temp
            temp = first_cases

In [21]:
covid_oa.to_csv('dailycasenumber_oa.csv', index=False)

In [22]:
covid_oa

Unnamed: 0,Country,Lat,Long,Date,Confirmed_cases,Dead,Recovered,Daily_case
0,Burkina Faso,12.2383,-1.5616,2020-01-22,0,0,0,0
1,Burkina Faso,12.2383,-1.5616,2020-01-23,0,0,0,0
2,Burkina Faso,12.2383,-1.5616,2020-01-24,0,0,0,0
3,Burkina Faso,12.2383,-1.5616,2020-01-25,0,0,0,0
4,Burkina Faso,12.2383,-1.5616,2020-01-26,0,0,0,0
...,...,...,...,...,...,...,...,...
1330,Togo,8.6195,0.8248,2020-04-15,81,3,35,4
1331,Togo,8.6195,0.8248,2020-04-16,81,5,45,0
1332,Togo,8.6195,0.8248,2020-04-17,83,5,48,2
1333,Togo,8.6195,0.8248,2020-04-18,84,5,49,1
