In [1]:
import os
import glob

import matplotlib.pyplot as plt

import pandas as pd
import geopandas as gpd

from keplergl import KeplerGl

# I will do Data Pre-processing

### Here I have got Government Data


A dataset with all the information provided by the government about the covid19 (departement, sex, day, number of people currently hospitalized, number of people currently in resuscitation or critical care, the total amount of patient that returned home, the total amount of deaths at the hospital.

* **sexe : Sex**
* **jour: Day**
* **hosp: Number of people currently hospitalized**
* **rea: number of people currently in resuscitation or critical care**
* **rad: the total amount of patient that returned home**
* **dc: the total amount of deaths at the hospital**

In [2]:
covid_gouv = 'data/donnees-hospitalieres-covid19-2020-04-13-19h00.csv'
df_gouv    = pd.read_csv(covid_gouv, delimiter = ';')

df_gouv

Unnamed: 0,dep,sexe,jour,hosp,rea,rad,dc
0,01,0,2020-03-18,2,0,1,0
1,01,1,2020-03-18,1,0,1,0
2,01,2,2020-03-18,1,0,0,0
3,02,0,2020-03-18,41,10,18,11
4,02,1,2020-03-18,19,4,11,6
...,...,...,...,...,...,...,...
8179,974,1,2020-04-13,19,3,32,0
8180,974,2,2020-04-13,9,0,40,0
8181,976,0,2020-04-13,22,3,25,3
8182,976,1,2020-04-13,13,3,17,3


### I have only taken the maximum one, because it updates multiple times a day so, took the last updaed one for every province in France 

In [3]:
df_jour = df_gouv['jour'].drop_duplicates()
max = df_jour.max()

df_gouv['code'] = df_gouv['dep']

temp_df = dict()
for i in df_jour :
    temp_df[i] = df_gouv.loc[ df_gouv['jour'] == i]
dict_df = dict()
for key, df in temp_df.items() :
    dict_df[key] = df.groupby('code').max().drop(['sexe','dep'], axis=1)
    
dict_df[max]

Unnamed: 0_level_0,jour,hosp,rea,rad,dc
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
01,2020-04-13,120,30,139,44
02,2020-04-13,286,44,288,147
03,2020-04-13,58,20,82,13
04,2020-04-13,33,4,63,5
05,2020-04-13,49,13,70,3
...,...,...,...,...,...
971,2020-04-13,41,18,51,10
972,2020-04-13,39,16,52,6
973,2020-04-13,15,1,13,0
974,2020-04-13,28,3,72,0


### Demographic data

#### A dataset with useful demographic statistics. Source : https://www.regions-et-departements.fr/departements-francais

In [4]:
demographic = 'data/demographie (3).csv'
df_de       = pd.read_csv(demographic, delimiter = ',')
df_de       = df_de.set_index('code')

df_de

Unnamed: 0_level_0,nom,Superficie,Population,Densite
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
01,Ain,5762,631877,109.7
02,Aisne,7369,538659,73.1
03,Allier,7340,341613,46.5
04,Alpes-de-Haute-Provence,6925,161799,23.4
05,Hautes-Alpes,5549,140916,25.4
...,...,...,...,...
971,Guadeloupe,1628,397990,244.4
972,Martinique,1128,380877,337.7
973,Guyane,83534,259865,3.1
974,LaReunion,2504,850727,339.8


## Data for Different Affected Age group
Source : https://www.insee.fr/fr/statistiques/2012692#graphique-TCRD_021_tab1_departements

In [5]:
demographic_1  = 'data/TCRD_021.csv'
df_de1         = pd.read_csv(demographic_1, delimiter = ',')
df_de1['code'] = df_de1['Unnamed: 0']
df_de1         = df_de1.set_index('code').drop(['Unnamed: 0','Unnamed: 1'], axis =1 )
df_de1         = df_de1.dropna()

df_de1

Unnamed: 0_level_0,Ensemble,Part des femmes (en %),Part des hommes (en %),Part des 0 à 24 ans (en %),Part des 25 à 59 ans (en %),Part des 60 ans ou plus (en %),dont part des 75 ans ou plus (en %)
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01,656955.0,50.8,49.2,30.2,45.6,24.2,8.3
02,526050.0,51.1,48.9,29.5,42.6,27.9,9.3
03,331315.0,52.2,47.8,24.3,40.2,35.6,13.8
04,165197.0,51.2,48.8,24.5,41.2,34.3,12.7
05,141756.0,51.2,48.8,25.2,42.4,32.4,11.9
...,...,...,...,...,...,...,...
971,376879.0,54.6,45.4,28.8,43.4,27.9,9.1
972,358749.0,54.2,45.8,25.9,43.3,30.8,10.6
973,290691.0,51.4,48.6,48.6,41.7,9.7,2.1
974,859959.0,52.5,47.5,35.5,45.6,18.8,5.2


## Geospatial data

### A geospatial dataset with all the departement in France and their geometry (geojson format) Thanks to https://github.com/gregoiredavid

In [6]:
geospatial_data  = 'data/departements-avec-outre-mer.geojson'
f = gpd.read_file(geospatial_data)
f = f.set_index('code')
f = f.sort_index(axis=0)

#f.head()
f

Unnamed: 0_level_0,nom,geometry
code,Unnamed: 1_level_1,Unnamed: 2_level_1
01,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ..."
02,Aisne,"POLYGON ((3.17296 50.01131, 3.17382 50.01186, ..."
03,Allier,"POLYGON ((3.03207 46.79491, 3.03424 46.79080, ..."
04,Alpes-de-Haute-Provence,"POLYGON ((5.67604 44.19143, 5.67817 44.19051, ..."
05,Hautes-Alpes,"POLYGON ((6.26057 45.12685, 6.26417 45.12641, ..."
...,...,...
971,Guadeloupe,"MULTIPOLYGON (((-61.79038 16.16682, -61.79164 ..."
972,Martinique,"MULTIPOLYGON (((-60.86733 14.38867, -60.86802 ..."
973,Guyane,"MULTIPOLYGON (((-53.87049 5.74494, -53.86464 5..."
974,La Réunion,"MULTIPOLYGON (((55.56891 -21.37670, 55.56843 -..."


# DataFrame Coronavirus

The first step was to decide which index will be used to concatenate all the data.

The goal is to retrieve a geospatial data so we need to concatenate df_geo and covid19 data to have the geometry column.


I decided to pick the departement number (‘code’). Indeed all this data contain the departement number. However, df_gouv is a time series so to be able to use the departement number I split the data in multiple DataFrame. Each DataFrame is one specific date.

* Effected Population Density
* Number of Effected People
* people older than 75 year (%) (It can be changed to any age group based on previous data)
* Number of Cumulative Hospital
* Population Hospitalised 

In [7]:
dict_coronavirus = dict()
for i in df_jour :
    dict_coronavirus[i] = pd.concat([f,dict_df[i]], axis = 1)
    dict_coronavirus[i]['dc/densite']    = dict_coronavirus[i]['dc'] / df_de['Densite']
    dict_coronavirus[i]['people older than 75 year (%)'] = df_de1['dont part des 75 ans ou plus (en %)']
    dict_coronavirus[i]['dc/population'] = dict_coronavirus[i]['dc'] / df_de['Population']
    dict_coronavirus[i]['dc/cumul hospitalise'] = dict_coronavirus[i]['dc'] / (dict_coronavirus[i]['hosp']+dict_coronavirus[i]['rad'])
    dict_coronavirus[i]['cumul hopitalise / population']     = (dict_coronavirus[i]['hosp']+dict_coronavirus[i]['rad']) / df_de['Population']

dict_coronavirus[max]

Unnamed: 0_level_0,nom,geometry,jour,hosp,rea,rad,dc,dc/densite,people older than 75 year (%),dc/population,dc/cumul hospitalise,cumul hopitalise / population
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
01,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ...",2020-04-13,120,30,139,44,0.401094,8.3,0.000070,0.169884,0.000410
02,Aisne,"POLYGON ((3.17296 50.01131, 3.17382 50.01186, ...",2020-04-13,286,44,288,147,2.010944,9.3,0.000273,0.256098,0.001066
03,Allier,"POLYGON ((3.03207 46.79491, 3.03424 46.79080, ...",2020-04-13,58,20,82,13,0.279570,13.8,0.000038,0.092857,0.000410
04,Alpes-de-Haute-Provence,"POLYGON ((5.67604 44.19143, 5.67817 44.19051, ...",2020-04-13,33,4,63,5,0.213675,12.7,0.000031,0.052083,0.000593
05,Hautes-Alpes,"POLYGON ((6.26057 45.12685, 6.26417 45.12641, ...",2020-04-13,49,13,70,3,0.118110,11.9,0.000021,0.025210,0.000844
...,...,...,...,...,...,...,...,...,...,...,...,...
971,Guadeloupe,"MULTIPOLYGON (((-61.79038 16.16682, -61.79164 ...",2020-04-13,41,18,51,10,0.040917,9.1,0.000025,0.108696,0.000231
972,Martinique,"MULTIPOLYGON (((-60.86733 14.38867, -60.86802 ...",2020-04-13,39,16,52,6,0.017767,10.6,0.000016,0.065934,0.000239
973,Guyane,"MULTIPOLYGON (((-53.87049 5.74494, -53.86464 5...",2020-04-13,15,1,13,0,0.000000,2.1,0.000000,0.000000,0.000108
974,La Réunion,"MULTIPOLYGON (((55.56891 -21.37670, 55.56843 -...",2020-04-13,28,3,72,0,0.000000,5.2,0.000000,0.000000,0.000118


## Statistics

In [8]:
df_stats = dict_coronavirus[max]
threshold = 400

df_stats['décés/cumul hospitalisé'] = df_stats.loc[(df_stats.hosp + df_stats.rad) > threshold]['dc/cumul hospitalise']
df_stats['décés/cumul hospitalisé'] = df_stats['décés/cumul hospitalisé'].fillna(0)

df_stats['décés/densite'] = df_stats.loc[(df_stats.hosp + df_stats.rad) > threshold]['dc/densite']
df_stats['décés/densite'] = df_stats['décés/densite'].fillna(0)

df_stats['décés/population'] = df_stats.loc[(df_stats.hosp + df_stats.rad) > threshold]['dc/population']
df_stats['décés/population'] = df_stats['décés/population'].fillna(0)

df_stats = df_stats.drop(['cumul hopitalise / population','dc/cumul hospitalise','dc/population','dc/densite'], axis=1)
df_stats.keys()

Index(['nom', 'geometry', 'jour', 'hosp', 'rea', 'rad', 'dc',
       'people older than 75 year (%)', 'décés/cumul hospitalisé',
       'décés/densite', 'décés/population'],
      dtype='object')

# Date

In [9]:
print('Choose Date :')

print(df_jour.min())
print(df_jour.max())

Choose Date :
2020-03-18
2020-04-13


In [10]:
jour = '2020-04-13'

## Analyse

In [11]:
daily_coronavirus = dict_coronavirus[jour]

daily_coronavirus

Unnamed: 0_level_0,nom,geometry,jour,hosp,rea,rad,dc,dc/densite,people older than 75 year (%),dc/population,dc/cumul hospitalise,cumul hopitalise / population,décés/cumul hospitalisé,décés/densite,décés/population
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
01,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ...",2020-04-13,120,30,139,44,0.401094,8.3,0.000070,0.169884,0.000410,0.000000,0.000000,0.000000
02,Aisne,"POLYGON ((3.17296 50.01131, 3.17382 50.01186, ...",2020-04-13,286,44,288,147,2.010944,9.3,0.000273,0.256098,0.001066,0.256098,2.010944,0.000273
03,Allier,"POLYGON ((3.03207 46.79491, 3.03424 46.79080, ...",2020-04-13,58,20,82,13,0.279570,13.8,0.000038,0.092857,0.000410,0.000000,0.000000,0.000000
04,Alpes-de-Haute-Provence,"POLYGON ((5.67604 44.19143, 5.67817 44.19051, ...",2020-04-13,33,4,63,5,0.213675,12.7,0.000031,0.052083,0.000593,0.000000,0.000000,0.000000
05,Hautes-Alpes,"POLYGON ((6.26057 45.12685, 6.26417 45.12641, ...",2020-04-13,49,13,70,3,0.118110,11.9,0.000021,0.025210,0.000844,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
971,Guadeloupe,"MULTIPOLYGON (((-61.79038 16.16682, -61.79164 ...",2020-04-13,41,18,51,10,0.040917,9.1,0.000025,0.108696,0.000231,0.000000,0.000000,0.000000
972,Martinique,"MULTIPOLYGON (((-60.86733 14.38867, -60.86802 ...",2020-04-13,39,16,52,6,0.017767,10.6,0.000016,0.065934,0.000239,0.000000,0.000000,0.000000
973,Guyane,"MULTIPOLYGON (((-53.87049 5.74494, -53.86464 5...",2020-04-13,15,1,13,0,0.000000,2.1,0.000000,0.000000,0.000108,0.000000,0.000000,0.000000
974,La Réunion,"MULTIPOLYGON (((55.56891 -21.37670, 55.56843 -...",2020-04-13,28,3,72,0,0.000000,5.2,0.000000,0.000000,0.000118,0.000000,0.000000,0.000000


## Number of People are at risk

### Demographic Data

#### For above 75 years of age

In [12]:
cat_0 = 0
#Risque = 1
cat_1 = 9.000000 
#Risque = 2
cat_2 = 10.600000
#Risque = 3
cat_3 = 12.500000
#Risque = 4  

df_temp = df_de1.drop(['Ensemble','Part des femmes (en %)', 'Part des hommes (en %)','Part des 0 à 24 ans (en %)', 'Part des 25 à 59 ans (en %)', 'Part des 60 ans ou plus (en %)'], axis = 1)
x = df_temp['dont part des 75 ans ou plus (en %)']
df_temp['age_risque'] = df_temp['dont part des 75 ans ou plus (en %)'].apply(lambda x : 1 if x < cat_1 else (2 if x > cat_1 and x < cat_2 else (3 if x > cat_2 and x < cat_3 else 4)))
df_temp = df_temp.drop(['dont part des 75 ans ou plus (en %)'], axis = 1)

df_temp

Unnamed: 0_level_0,age_risque
code,Unnamed: 1_level_1
01,1
02,2
03,4
04,4
05,3
...,...
971,2
972,4
973,1
974,1


## Contamination

### Hospital for effected population

In [13]:
cat_0 = 0
#Risque = 1
cat_1 = 0.000284
#Risque = 2
cat_2 = 0.000494
#Risque =  3
cat_3 = 0.000898
#Risque = 3

x1 = dict_coronavirus[max]['cumul hopitalise / population']
df_temp['contamination_risque'] = dict_coronavirus[max]['cumul hopitalise / population'].apply(lambda x : 4 if x < cat_1 else (3 if x > cat_1 and x < cat_2 else (2 if x > cat_2 and x < cat_3 else 1)))

df_temp

Unnamed: 0_level_0,age_risque,contamination_risque
code,Unnamed: 1_level_1,Unnamed: 2_level_1
01,1,3
02,2,1
03,4,3
04,4,2
05,3,2
...,...,...
971,2,4
972,4,4
973,1,4
974,1,4


In [14]:
df_temp['score'] = df_temp['age_risque'] * df_temp['contamination_risque']
    
#df_temp.head()
df_temp

Unnamed: 0_level_0,age_risque,contamination_risque,score
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
01,1,3,3
02,2,1,2
03,4,3,12
04,4,2,8
05,3,2,6
...,...,...,...
971,2,4,8
972,4,4,16
973,1,4,4
974,1,4,4


In [15]:
df_risque = pd.concat([f,df_temp], axis = 1)

df_risque.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,nom,geometry,age_risque,contamination_risque,score
1,Ain,"POLYGON ((4.78021 46.17668, 4.78024 46.18905, ...",1,3,3
2,Aisne,"POLYGON ((3.17296 50.01131, 3.17382 50.01186, ...",2,1,2
3,Allier,"POLYGON ((3.03207 46.79491, 3.03424 46.79080, ...",4,3,12
4,Alpes-de-Haute-Provence,"POLYGON ((5.67604 44.19143, 5.67817 44.19051, ...",4,2,8
5,Hautes-Alpes,"POLYGON ((6.26057 45.12685, 6.26417 45.12641, ...",3,2,6


In [16]:
map_stats = KeplerGl(height=650)
map_stats.add_data(df_stats, name ='coronavirus')
map_stats

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'coronavirus': {'index': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12…

In [None]:
config_stats = map_stats.config
# Save map_1 config to a file
with open('config_stats.py', 'w') as f:
    f.write('config = {}'.format(config_stats))

In [None]:
# this will save current map
map_stats.save_to_html(file_name='map_stats.html')

### Visualisation
### Deceased

In [None]:
%run config_stats.py
config_stats = config

In [None]:
map_coronavirus_2D = KeplerGl(height=800, config = config)
map_coronavirus_2D.add_data(dict_coronavirus['2020-04-13'], name ='coronavirus')
map_coronavirus_2D

In [None]:
%run config_2D.py
config_2D = config

In [None]:
config_2D = map_coronavirus_2D.config

In [None]:
# Save map_1 config to a file
with open('config_2D.py', 'w') as f:
    f.write('config = {}'.format(config_2D))

In [None]:
map_coronavirus_2D = KeplerGl(height=800, config = config_2D)
map_coronavirus_2D.add_data(dict_coronavirus['2020-04-13'], name ='coronavirus')
map_coronavirus_2D

In [None]:
# # this will save current map
map_coronavirus_2D.save_to_html(file_name='map_coronavirus_2D.html')

### Critical

In [None]:
map_risque = KeplerGl(height=500, config = config)
map_risque.add_data(df_risque, name = 'risque')
map_risque

In [None]:
# Save map_1 config to a file
with open('config_risque.py', 'w') as f:
    f.write('config = {}'.format(config_risque))

In [None]:
%run config_risque.py
config_risque = config

In [None]:
map_risque = KeplerGl(height=650, config = config_risque)
map_risque.add_data(df_risque, name = 'risque')
map_risque

In [None]:
# this will save current map
map_risque.save_to_html(file_name='map_risque.html')

### Daily Cases

In [None]:
map_coronavirus_3D = KeplerGl(height=500, config = config)
map_coronavirus_3D.add_data(daily_coronavirus, name ='coronavirus')
map_coronavirus_3D

In [None]:
config_3D = map_coronavirus_3D.config

In [None]:
# Save map_1 config to a file
with open('config_3D.py', 'w') as f:
    f.write('config = {}'.format(config_3D))

In [None]:
%run config_3D.py
config_3D = config

In [None]:
map_coronavirus_3D = KeplerGl(height=500, config = config_3D)
map_coronavirus_3D.add_data(daily_coronavirus, name ='coronavirus')
map_coronavirus_3D

In [None]:
# this will save current map
map_coronavirus_3D.save_to_html(file_name='map_coronavirus_3D.html')