# Africa vs Rest of the World Covid 19

The objective of this notebook is to show how covid 19 is spreading differently in the different continent on the earth planet. To do so, we speak

1. load data and python package library
2. Exploratory data analysis

    2.1 descriptive analysis
    
    2.2 visualization
    
    2.3 epidemiology terms: lethality, incidence, ...
    
3. geospatial analysis
4. forecasting
5. conclusion

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Load data and python package library

In [None]:
# import package
import matplotlib.pyplot as plt
import seaborn as sns 
import statsmodels as sm
import folium as fl
#import vincent 
from pathlib import Path
from sklearn.impute import SimpleImputer
import geopandas as gpd
import mapclassify as mpc
import warnings
import plotly.offline as py
import plotly.express as px
import cufflinks as cf

In [None]:
%matplotlib inline
pd.options.plotting.backend
#pd.plotting.register_matplotlib_converters()
gpd.plotting.plot_linestring_collection
py.init_notebook_mode(connected=False)
cf.set_config_file(offline=True)
sns.set()
warnings.filterwarnings('ignore')

In [None]:
raw_conf = pd.read_csv('/kaggle/input/covid19-data-from-john-hopkins-university/RAW_global_confirmed_cases.csv')
raw_deaths = pd.read_csv('/kaggle/input/covid19-data-from-john-hopkins-university/RAW_global_deaths.csv')
conv_conf = pd.read_csv('/kaggle/input/covid19-data-from-john-hopkins-university/CONVENIENT_global_confirmed_cases.csv')
conv_deaths = pd.read_csv('/kaggle/input/covid19-data-from-john-hopkins-university/CONVENIENT_global_deaths.csv')
meta_conf = pd.read_csv('/kaggle/input/covid19-data-from-john-hopkins-university/CONVENIENT_global_metadata.csv')

In [None]:
raw_conf.head()

In [None]:
raw_deaths.tail()

In [None]:
raw_conf.info()

In [None]:
raw_deaths.info()

In [None]:
conv_conf.tail()

In [None]:
conv_conf.info()

# Exploratory data analysis

## Descriptive analysis

In [None]:
#confirmed case descriptive 
raw_conf.iloc[:, -1].describe()

In [None]:
#deaths case descriptive
raw_deaths.iloc[:, -1].describe()

In [None]:
date = list(raw_conf.columns)[-1]# last date
country_confirmed = raw_conf.groupby('Country/Region')[date].agg('sum').reset_index()

In [None]:
#top 10 country most affected by covid 19
country_confirmed.sort_values(by=date, ascending=False).style.background_gradient('viridis')

We can see The first five countries most contaminated by Covid 19.

In [None]:
country_deaths = raw_deaths.groupby('Country/Region')[date].agg('sum').reset_index()

In [None]:
#top 10 country most killed by covid 19
country_deaths.sort_values(by=date, ascending=False).style.background_gradient('viridis')

## Visualization

In [None]:
plt.figure(figsize=(15,5))
sns.distplot(country_confirmed[date], bins=10, kde=False)
plt.title(f'Covid19 confirmed case distribution for date {date}')
plt.show()

Many countries have confirmed case between [0, 25e5]. 

In [None]:
plt.figure(figsize=(15,5))
sns.distplot(country_deaths[date], bins=10, kde=False)
plt.title(f'Covid19 deaths case distribution for date {date}')
plt.show()

Many countries have deaths case between [0, 50000].

In [None]:
sns.jointplot(x=raw_conf.Long, y=raw_conf.Lat, kind='kde')
plt.show()

North America and Europe continent are the two coronavirus outbreak for this second wave. 

In [None]:
confirmed = raw_conf.groupby('Country/Region')[list(raw_conf.columns)[4:]].agg('sum')

In [None]:
death = raw_deaths.groupby('Country/Region')[list(raw_conf.columns)[4:]].agg('sum')

In [None]:
case_conf = confirmed.T

In [None]:
case_conf[['US', 'India', 'Brazil', 'Russia','France']].iplot(title='5 countries having huge covid 19 confirmed case in the world.',
                                                             legend=True)

In [None]:
fatalities = death.T

In [None]:
fatalities[['US', 'India', 'Brazil', 'Mexico','Italy']].iplot(title='5 countries having huge covid 19 deaths in the world.',
                                                            legend=True)

# Geospatial analysis

Put all together raw_conf and raw_death in the same dataframe global_covid19.

In [None]:
region = []
cases = []
time = []
latitude = []
longitude = []
fat = []
for u in list(raw_conf.columns)[4:]:
    time.append([u for i in range(raw_conf.shape[0])])
    region.append(list(raw_conf['Country/Region']))
    cases.append(list(raw_conf[u]))
    
    latitude.append(list(raw_conf.Lat))
    longitude.append(list(raw_conf.Long))
    fat.append(list(raw_deaths[u]))

In [None]:
global_covid19 = pd.DataFrame()

In [None]:
global_covid19['date'] = np.concatenate(time)
global_covid19['country'] = np.concatenate(region)
global_covid19['Lat'] = np.concatenate(latitude)
global_covid19['Long'] = np.concatenate(longitude)
global_covid19['cases'] = np.concatenate(cases)
global_covid19['fatalities'] = np.concatenate(fat)

In [None]:
global_covid19.head()

### Interactive map.

In [None]:
center_point = dict(lon=0, lat=0)
figx = px.density_mapbox(global_covid19, lat='Lat', lon='Long', z="cases",
                        center = center_point, hover_name='country', zoom = 5,
                         range_color= [20, 20] , radius=20,
                        mapbox_style= 'open-street-map', title='Novel Covid19 cases in the world',
                        animation_frame='date')
figx.update(layout_coloraxis_showscale=True)
figx.show()

According to this interative map, what is your opinion for this reality? Give your answers in comment.

In [None]:
center = dict(lon=0, lat=0)
figy = px.density_mapbox(global_covid19, lat='Lat', lon='Long', z="fatalities",
                        center = center, hover_name='country', zoom = 5,
                         range_color= [20, 20] , radius=20,
                        mapbox_style= 'open-street-map', title='Novel Covid19 fatalities in the world.',
                        animation_frame='date')
figy.update(layout_coloraxis_showscale=True)
figy.show()

According to this interative map, what is your opinion for this reality? Give your answers in comment.

**Rename**

In [None]:
replace = ['Dem. Rep. Congo',  'Congo','Central African Rep.',
          'Eq. Guinea','eSwatini','Bosnia and Herz.', 'S. Sudan', 'Dominican Rep.', 
          'United States of America', 'South Korea', "Côte d'Ivoire"]

name = ['Congo (Kinshasa)',  'Congo (Brazzaville)', 
        'Central African Republic', 'Equatorial Guinea', 'Eswatini', 'Bosnia and Herzegovina', 'South Sudan',
       'Dominica','US', 'Korea, South',"Côte d'Ivoire"]

In [None]:
global_covid19 = global_covid19.replace(to_replace=name, value=replace)

In [None]:
end_date = global_covid19.date.unique()[-1] #today
yesterday = global_covid19.date.unique()[-2] #yesterday

### Covid 19 worldwide map

In [None]:
geoCovid = gpd.GeoDataFrame(global_covid19, geometry=gpd.points_from_xy(global_covid19.Long, global_covid19.Lat))

In [None]:
geoCovid.crs = 'epsg:4326'

In [None]:
geoCovid.head()

In [None]:
world_path_file = gpd.datasets.get_path('naturalearth_lowres') # upload natural data map
world = gpd.read_file(world_path_file)

In [None]:
need_data = global_covid19[global_covid19.date == end_date]

In [None]:
before_data = global_covid19[global_covid19.date == yesterday]

In [None]:
before_data.head()

In [None]:
need_data['new_cases'] = need_data['cases'].values - before_data['cases'].values
need_data['new_fatalities'] = need_data['fatalities'].values - before_data['fatalities'].values

In [None]:
need_data.head()

In [None]:
need_dat = need_data.groupby('country')[['cases', 'fatalities','new_cases', 'new_fatalities']].agg('sum').reset_index()

In [None]:
geo_merged = world.merge(need_dat[['country','cases','fatalities', 'new_cases', 'new_fatalities']] , 
                         left_on='name', right_on='country')

In [None]:
geo_merged.tail()

In [None]:
geo_merged['prevalence'] = (geo_merged.cases/geo_merged.pop_est)*100000
geo_merged['incidence'] = (geo_merged.new_cases/geo_merged.pop_est)*100000
geo_merged['lethality'] = (geo_merged.fatalities/geo_merged.cases)*100

In [None]:
geo_merged.tail()

In [None]:
geo_merged.plot(cmap='cividis_r', column='cases', legend=True, figsize=(15,9), scheme='quantiles', k=4)
plt.title(f'SARS-Cov 2 cases in the worldwide for date {end_date}.')
plt.show()

In [None]:
geo_merged.plot(cmap='OrRd', column='fatalities', legend=True, figsize=(15,9), scheme='quantiles', k=3)
plt.title(f'SARS-Cov 2 fatalities in the worldwide for date {end_date}.')
plt.show()

Are you agree with this geo graph? give your answers in the comment.

### Geomap for 10 countries most affected by covid 19.

In [None]:
most_aff_country = need_data[need_data.country.isin(['United States of America', 'Brazil', 'India', 'France',
                                                    'Russia', 'United Kingdom', 'Turkey', 'Italy', 'Spain',
                                                    'Argentina', 'Colombia', 'Germany','Mexico', 'Poland',
                                                    'Iran', 'Peru', 'Ukraine', 'South Africa', 'Netherlands',
                                                    'Indonesia'])]

In [None]:
ax = world.plot(figsize=(20,20), edgecolor='black', linestyle=':', color='whitesmoke')
ax.scatter(most_aff_country.Long, most_aff_country.Lat, cmap='cividis', s=most_aff_country.cases/10000,
          c=most_aff_country.cases )
for id in [10, 100, 150, 200]:
    plt.scatter([], [], c='k', alpha=0.5, s=id, label = str(id))
plt.legend(scatterpoints=1, frameon=True, labelspacing=1, title='Confirmed size')
plt.title('The spreading of COVID-19 cases in the 20 countries most affected ')
plt.show()

Covid 19 is spreading in the North-West direction. Are you agree? give your answers in the comment.

### Sars Cov 2 Bilan

In [None]:
sars_bilan = []

In [None]:
for u in list(geo_merged.continent.unique()):
    x = geo_merged[geo_merged.continent == u]
    c = x.groupby('continent')[['cases', 'fatalities']].agg('sum')
    sars_bilan.append(c)

In [None]:
pd.concat(sars_bilan)['cases'].iplot(title='SARS-Cov 2 cases in the Earth planet', legend=True, logy=False, kind='bar')

Are you agree? comment

In [None]:
pd.concat(sars_bilan)['fatalities'].iplot(title='SARS-Cov 2 fatalities in the Earth planet', legend=True, logy=False, kind='bar')

Are you agree? comment.

You can continuous to visualize here.
https://www.kaggle.com/lumierebatalong/africa-covid-19-forecasting

# Epidemiology terms: prevalence, incidence, new cases, new fatalities

## Prevalence

In [None]:
geo_merged[['country', 'prevalence']].sort_values(by='prevalence', ascending=False).style.background_gradient('viridis')

In [None]:
geo_merged.plot(cmap='OrRd', column='prevalence', legend=True, figsize=(15,9), scheme='quantiles', k=3)
plt.title(f'SARS-Cov 2 prevalence per 100000 inhabitants in the worldwide for date {end_date}.')
plt.show()

## Incidence

In [None]:
geo_merged[['country', 'incidence']].sort_values(by='incidence', ascending=False).style.background_gradient('viridis')

In [None]:
geo_merged.plot(cmap='rainbow', column='incidence', legend=True, figsize=(15,9), scheme='quantiles', k=3)
plt.title(f'SARS-Cov 2 incidence per 100000 inhabitants in the worldwide for date {end_date}.')
plt.show()

## New cases

In [None]:
geo_merged[['country', 'new_cases']].sort_values(by='new_cases', ascending=False).style.background_gradient('viridis')

In [None]:
geo_merged.plot(cmap='rainbow', column='new_cases', legend=True, figsize=(15,9), scheme='quantiles', k=3)
plt.title(f'SARS-Cov 2 new cases in the worldwide for date {end_date}.')
plt.show()

# New fatalities

In [None]:
geo_merged[['country', 'new_fatalities']].sort_values(by='new_fatalities', ascending=False).style.background_gradient('viridis')

In [None]:
geo_merged.plot(cmap='OrRd', column='new_fatalities', legend=True, figsize=(15,9), scheme='quantiles', k=2)
plt.title(f'SARS-Cov 2 new fatalities in the worldwide for date {end_date}.')
plt.show()

## Case fatality rate

**The case fatality rate** is the proportion of deaths related to a particular disease or condition, out of the total number of cases affected by the disease or affected by the particular condition.

In [None]:
geo_merged[['country', 'lethality']].sort_values(by='lethality', ascending=False).style.background_gradient('viridis')

In [None]:
geo_merged.plot(cmap='OrRd', column='lethality', legend=True, figsize=(15,9), scheme='quantiles', k=3)
plt.title(f'SARS-Cov 2 lethality in the worldwide for date {end_date}.')
plt.show()

Are you agree? comment 

### Be free to download, share and comment

# Upnext