In [130]:
from covid_analytics import factors
import pandas as pd

## Données INSEE
sources :
- départements insee
- régions insee
- estimation de la population https://www.insee.fr/fr/statistiques/1893198

In [131]:
date_debut = '2021-04-06'
date_fin = '2021-05-06'
date_debut, date_fin

('2021-04-06', '2021-05-06')

In [132]:
departement = pd.read_csv('data/departement2020.csv')[['dep','reg','libelle']]
departement

Unnamed: 0,dep,reg,libelle
0,01,84,Ain
1,02,32,Aisne
2,03,84,Allier
3,04,93,Alpes-de-Haute-Provence
4,05,93,Hautes-Alpes
...,...,...,...
96,971,1,Guadeloupe
97,972,2,Martinique
98,973,3,Guyane
99,974,4,La Réunion


In [133]:
region = pd.read_csv('data/region2020.csv')[['reg','libelle']]
region

Unnamed: 0,reg,libelle
0,1,Guadeloupe
1,2,Martinique
2,3,Guyane
3,4,La Réunion
4,6,Mayotte
5,11,Île-de-France
6,24,Centre-Val-de-Loire
7,27,Bourgogne-Franche-Comté
8,28,Normandie
9,32,Hauts-de-France


In [134]:
population_reg = pd.read_csv('data/population-region-2021-insee.csv', delimiter=',', header='infer')
population_reg[0] = population_reg['Total']
population_reg[9] = population_reg['0 à 4 ans'] + population_reg['5 à 9 ans']
population_reg[19] = population_reg['10 à 14 ans'] + population_reg['15 à 19 ans']
population_reg[29] = population_reg['20 à 24 ans'] + population_reg['25 à 29 ans']
population_reg[39] = population_reg['30 à 34 ans'] + population_reg['35 à 39 ans']
population_reg[49] = population_reg['40 à 44 ans'] + population_reg['45 à 49 ans']
population_reg[59] = population_reg['50 à 54 ans'] + population_reg['55 à 59 ans']
population_reg[69] = population_reg['60 à 64 ans'] + population_reg['65 à 69 ans']
population_reg[79] = population_reg['70 à 74 ans'] + population_reg['75 à 79 ans']
population_reg[89] = population_reg['80 à 84 ans'] + population_reg['85 à 89 ans']
population_reg[90] = population_reg['90 à 94 ans'] + population_reg['95 ans et plus']
population_reg = population_reg[['libelle',0, 9,19,29,39,49,59,69,79,89,90]]
population_reg = pd.melt(population_reg, id_vars=['libelle'], value_vars=[0, 9,19,29,39,49,59,69,79,89,90], var_name='cl_age90', value_name='population').sort_values(by=['libelle', 'cl_age90'])
population_reg

Unnamed: 0,libelle,cl_age90,population
0,Auvergne-Rhône-Alpes,0,8090442
19,Auvergne-Rhône-Alpes,9,935839
38,Auvergne-Rhône-Alpes,19,1018289
57,Auvergne-Rhône-Alpes,29,902190
76,Auvergne-Rhône-Alpes,39,1009027
...,...,...,...
121,Île-de-France,59,1537772
140,Île-de-France,69,1178800
159,Île-de-France,79,815126
178,Île-de-France,89,417574


## Taux d'incidence
Le taux d'incidence correspond au nombre de tests positifs pour 100.000 habitants. Il est calculé de la manière suivante :
(100000 * nombre de cas positif) / Population

- incidence (https://www.data.gouv.fr/fr/datasets/taux-dincidence-de-lepidemie-de-covid-19/)

In [135]:
# Cas positifs par région, sexe et classe d'age
incidence = pd.read_csv('data/sp-pe-tb-heb-reg-2021-05-04-19h05.csv', delimiter=';', header='infer')
incidence = pd.merge(incidence, region, on="reg")
incidence

Unnamed: 0,reg,week,P_f,P_h,P,pop_f,pop_h,cl_age90,pop,libelle
0,1,2020-S21,0,0,0,19979.0,20155.0,9,40134.0,Guadeloupe
1,1,2020-S21,1,0,1,24994.0,25489.0,19,50483.0,Guadeloupe
2,1,2020-S21,1,0,1,17868.0,16690.0,29,34558.0,Guadeloupe
3,1,2020-S21,1,0,1,22328.0,14821.0,39,37149.0,Guadeloupe
4,1,2020-S21,1,0,1,28652.0,20276.0,49,48928.0,Guadeloupe
...,...,...,...,...,...,...,...,...,...,...
9697,94,2021-S16,25,19,44,23212.0,21883.0,69,45095.0,Corse
9698,94,2021-S16,12,17,29,19544.0,17141.0,79,36685.0,Corse
9699,94,2021-S16,8,2,10,11227.0,8178.0,89,19405.0,Corse
9700,94,2021-S16,11,4,15,3332.0,1447.0,90,4779.0,Corse


In [136]:
cumuls_incidence = incidence.groupby(['libelle', 'cl_age90']).mean()
cumuls_incidence['inc_f'] = 100000 * cumuls_incidence['P_f'] / cumuls_incidence['pop_f']
cumuls_incidence['inc_h'] = 100000 * cumuls_incidence['P_h'] / cumuls_incidence['pop_h']
cumuls_incidence['inc'] = 100000 * cumuls_incidence['P'] / cumuls_incidence['pop']
incidence_reg_cl_age = cumuls_incidence.reset_index()[['libelle','cl_age90','inc_f','inc_h','inc']].round(2)

incidence_reg_cl_age

Unnamed: 0,libelle,cl_age90,inc_f,inc_h,inc
0,Auvergne-Rhône-Alpes,0,206.38,190.37,198.95
1,Auvergne-Rhône-Alpes,9,60.19,59.98,60.61
2,Auvergne-Rhône-Alpes,19,216.00,209.60,213.92
3,Auvergne-Rhône-Alpes,29,307.35,265.84,286.72
4,Auvergne-Rhône-Alpes,39,255.84,220.54,238.73
...,...,...,...,...,...
193,Île-de-France,59,227.90,226.95,228.35
194,Île-de-France,69,179.71,197.76,190.14
195,Île-de-France,79,121.44,146.19,132.95
196,Île-de-France,89,142.52,158.13,148.88


## Hospitalisations

sources : https://www.data.gouv.fr/en/datasets/donnees-hospitalieres-relatives-a-lepidemie-de-covid-19/

Les données hospitalières relatives à l'épidémie du COVID-19 par département et sexe du patient : nombre de patients hospitalisés, nombre de personnes actuellement en réanimation ou soins intensifs, nombre de personnes actuellement en Soins de Suite et de Réadaptation (SSR) ou Unités de Soins de Longue Durée(USLD), nombre de personnes actuellement en hospitalisation conventionnelle, nombre actuellement de personnes hospitalisées dans un autre type de service ou nombre cumulé de personnes retournées à domicile, nombre cumulé de personnes décédées.

Les données hospitalières relatives à l'épidémie du COVID-19 par région, et classe d'âge du patient : nombre de patients hospitalisés, nombre de personnes actuellement en réanimation ou soins intensifs, nombre de personnes actuellement en Soins de Suite et de Réadaptation (SSR) ou Unités de Soins de Longue Durée(USLD), nombre de personnes actuellement en hospitalisation conventionnelle, nombre actuellement de personnes hospitalisées dans un autre type de service, nombre cumulé de personnes retournées à domicile, nombre cumulé de personnes décédées.

In [137]:
hospitalisation = pd.read_csv('data/donnees-hospitalieres-covid19-2021-05-06-19h05.csv', delimiter=';', header='infer')
hospitalisation['jour'] = pd.to_datetime(hospitalisation['jour'], infer_datetime_format=True)
hospitalisation = hospitalisation[hospitalisation['jour']>= date_debut]
hospitalisation.head()

Unnamed: 0,dep,sexe,jour,hosp,rea,HospConv,SSR_USLD,autres,rad,dc
116352,1,0,2021-04-06,171,21,114.0,36.0,0.0,2387,565
116353,1,1,2021-04-06,83,17,53.0,13.0,0.0,1156,334
116354,1,2,2021-04-06,87,4,60.0,23.0,0.0,1209,228
116355,2,0,2021-04-06,323,56,185.0,81.0,1.0,3348,912
116356,2,1,2021-04-06,176,34,100.0,42.0,0.0,1593,506


In [138]:
hospitalisation2 = pd.read_csv('data/donnees-hospitalieres-classe-age-covid19-2021-05-06-19h05.csv', delimiter=';', header='infer')
hospitalisation2['jour'] = pd.to_datetime(hospitalisation2['jour'], infer_datetime_format=True)
hospitalisation2 = hospitalisation2[hospitalisation2['jour']>= date_debut]
hospitalisation2

Unnamed: 0,reg,cl_age90,jour,hosp,rea,HospConv,SSR_USLD,autres,rad,dc
76032,1,0,2021-04-06,79,16,53.0,9.0,1.0,942,202
76033,1,9,2021-04-06,1,0,1.0,0.0,0.0,21,0
76034,1,19,2021-04-06,1,0,1.0,0.0,0.0,10,0
76035,1,29,2021-04-06,0,0,0.0,0.0,0.0,24,0
76036,1,39,2021-04-06,2,0,2.0,0.0,0.0,70,2
...,...,...,...,...,...,...,...,...,...,...
82165,94,59,2021-05-06,5,2,2.0,1.0,0.0,129,8
82166,94,69,2021-05-06,14,5,2.0,5.0,2.0,166,21
82167,94,79,2021-05-06,13,1,5.0,7.0,0.0,200,48
82168,94,89,2021-05-06,16,0,8.0,8.0,0.0,179,77


In [139]:
hospitalisation3 = pd.read_csv('data/covid-hospit-incid-reg-2021-05-06-19h05.csv', delimiter=';', header='infer')
hospitalisation3['jour'] = pd.to_datetime(hospitalisation3['jour'], infer_datetime_format=True)
hospitalisation3 = hospitalisation3[hospitalisation3['jour']>= date_debut]
hospitalisation3[hospitalisation3.reg==84]

Unnamed: 0,jour,libelle,reg,incid_rea
6894,2021-04-06,Auvergne-Rhône-Alpes,84,96
6912,2021-04-07,Auvergne-Rhône-Alpes,84,81
6930,2021-04-08,Auvergne-Rhône-Alpes,84,63
6948,2021-04-09,Auvergne-Rhône-Alpes,84,68
6966,2021-04-10,Auvergne-Rhône-Alpes,84,45
6984,2021-04-11,Auvergne-Rhône-Alpes,84,23
7002,2021-04-12,Auvergne-Rhône-Alpes,84,87
7020,2021-04-13,Auvergne-Rhône-Alpes,84,70
7038,2021-04-14,Auvergne-Rhône-Alpes,84,68
7056,2021-04-15,Auvergne-Rhône-Alpes,84,71


In [140]:
# Moyenne des cas actuellement en soins intensifs par région et par classe d'age
hospitalisation_cl_age90 = hospitalisation2.groupby(['reg','cl_age90']).sum().reset_index()[['reg','cl_age90','rea']].round(1)
hospitalisation_cl_age90[hospitalisation_cl_age90.reg==84]

Unnamed: 0,reg,cl_age90,rea
165,84,0,20507
166,84,9,51
167,84,19,46
168,84,29,179
169,84,39,517
170,84,49,1578
171,84,59,3842
172,84,69,6769
173,84,79,6361
174,84,89,882


In [141]:
# Moyenne des cas actuellement en soins intensifs par région et par sexe
hospitalisation_sexe = pd.merge(hospitalisation, departement, on="dep").groupby(['reg','sexe']).sum().reset_index()[['reg','sexe','rea']].round(1)
hospitalisation_sexe[hospitalisation_sexe.reg==24]

Unnamed: 0,reg,sexe,rea
18,24,0,6361
19,24,1,4251
20,24,2,2068


In [142]:
# Cumul des nouveaux cas en soins intensifs sur 1 mois par région
cumul_rea = hospitalisation3.groupby('reg').sum().reset_index()
cumul_rea[cumul_rea.reg==24]

Unnamed: 0,reg,incid_rea
6,24,508


In [143]:
100000 * 1598 / population_reg[(population_reg.libelle == 'Auvergne-Rhône-Alpes') & (population_reg.cl_age90 == 0)]['population']

0    19.751702
Name: population, dtype: float64

In [144]:
# Répartition moyenne des personnes en soins intensifs par région, classe d'age et sexe 
hospitalisation_sexe_t = hospitalisation_sexe[hospitalisation_sexe['sexe'] == 0][['reg','rea']]
hospitalisation_sexe_h = hospitalisation_sexe[hospitalisation_sexe['sexe'] == 1][['reg','rea']]
hospitalisation_sexe_f = hospitalisation_sexe[hospitalisation_sexe['sexe'] == 2][['reg','rea']]
hospitalisation_sexe_t.columns = ['reg','rea_t']
hospitalisation_sexe_f.columns = ['reg','rea_f']
hospitalisation_sexe_h.columns = ['reg','rea_h']
hospitalisation_sexe2 = pd.merge( hospitalisation_sexe_t , pd.merge( hospitalisation_sexe_h, hospitalisation_sexe_f, on='reg'), on='reg')
hospitalisation_sexe2[hospitalisation_sexe2.reg==24]

Unnamed: 0,reg,rea_t,rea_h,rea_f
6,24,6361,4251,2068


In [145]:
hospitalisation_sexe2['pct_h'] =  hospitalisation_sexe2['rea_h'] / hospitalisation_sexe2['rea_t']
hospitalisation_sexe2['pct_f'] = 1- hospitalisation_sexe2['pct_h']
hospitalisation_sexe2 = pd.merge( hospitalisation_cl_age90, hospitalisation_sexe2, on='reg')
hospitalisation_sexe2[hospitalisation_sexe2.reg==24]

Unnamed: 0,reg,cl_age90,rea,rea_t,rea_h,rea_f,pct_h,pct_f
66,24,0,6361,6361,4251,2068,0.668291,0.331709
67,24,9,2,6361,4251,2068,0.668291,0.331709
68,24,19,11,6361,4251,2068,0.668291,0.331709
69,24,29,13,6361,4251,2068,0.668291,0.331709
70,24,39,171,6361,4251,2068,0.668291,0.331709
71,24,49,390,6361,4251,2068,0.668291,0.331709
72,24,59,1142,6361,4251,2068,0.668291,0.331709
73,24,69,2194,6361,4251,2068,0.668291,0.331709
74,24,79,2071,6361,4251,2068,0.668291,0.331709
75,24,89,327,6361,4251,2068,0.668291,0.331709


In [146]:
hospitalisation_sexe2['pct_h'] =  hospitalisation_sexe2['rea'] / hospitalisation_sexe2['rea_t'] * hospitalisation_sexe2['pct_h']
hospitalisation_sexe2['pct_f'] = hospitalisation_sexe2['rea'] / hospitalisation_sexe2['rea_t'] * hospitalisation_sexe2['pct_f']
hospitalisation_reg_cl_age = pd.merge(hospitalisation_sexe2, region, on="reg")[['reg','libelle','cl_age90','rea_f','rea_h','rea','pct_h','pct_f']].round(3)
hospitalisation_reg_cl_age[hospitalisation_sexe2.reg==24]

Unnamed: 0,reg,libelle,cl_age90,rea_f,rea_h,rea,pct_h,pct_f
66,24,Centre-Val-de-Loire,0,2068,4251,6361,0.668,0.332
67,24,Centre-Val-de-Loire,9,2068,4251,2,0.0,0.0
68,24,Centre-Val-de-Loire,19,2068,4251,11,0.001,0.001
69,24,Centre-Val-de-Loire,29,2068,4251,13,0.001,0.001
70,24,Centre-Val-de-Loire,39,2068,4251,171,0.018,0.009
71,24,Centre-Val-de-Loire,49,2068,4251,390,0.041,0.02
72,24,Centre-Val-de-Loire,59,2068,4251,1142,0.12,0.06
73,24,Centre-Val-de-Loire,69,2068,4251,2194,0.231,0.114
74,24,Centre-Val-de-Loire,79,2068,4251,2071,0.218,0.108
75,24,Centre-Val-de-Loire,89,2068,4251,327,0.034,0.017


In [147]:
incidence_rea = pd.merge(hospitalisation_reg_cl_age, cumul_rea, on=['reg'])
incidence_rea[incidence_rea.reg==24]

Unnamed: 0,reg,libelle,cl_age90,rea_f,rea_h,rea,pct_h,pct_f,incid_rea
66,24,Centre-Val-de-Loire,0,2068,4251,6361,0.668,0.332,508
67,24,Centre-Val-de-Loire,9,2068,4251,2,0.0,0.0,508
68,24,Centre-Val-de-Loire,19,2068,4251,11,0.001,0.001,508
69,24,Centre-Val-de-Loire,29,2068,4251,13,0.001,0.001,508
70,24,Centre-Val-de-Loire,39,2068,4251,171,0.018,0.009,508
71,24,Centre-Val-de-Loire,49,2068,4251,390,0.041,0.02,508
72,24,Centre-Val-de-Loire,59,2068,4251,1142,0.12,0.06,508
73,24,Centre-Val-de-Loire,69,2068,4251,2194,0.231,0.114,508
74,24,Centre-Val-de-Loire,79,2068,4251,2071,0.218,0.108,508
75,24,Centre-Val-de-Loire,89,2068,4251,327,0.034,0.017,508


## Incidence en soins intensifs
cumuls des nouveaux cas chaque jour pendant les 30 derniers jours pour chqaue région
ramenés par tranche d'age et par sexe pour 100000 habitants

In [149]:
incidence_rea_reg_cl_age = pd.merge(incidence_rea, population_reg, on=['libelle','cl_age90'])
incidence_rea_reg_cl_age['inc_h'] = incidence_rea_reg_cl_age['incid_rea'] * incidence_rea_reg_cl_age['pct_h']  * 100000 / incidence_rea_reg_cl_age['population'] /2
incidence_rea_reg_cl_age['inc_f'] = incidence_rea_reg_cl_age['incid_rea'] * incidence_rea_reg_cl_age['pct_f']  * 100000 / incidence_rea_reg_cl_age['population'] /2
incidence_rea_reg_cl_age = incidence_rea_reg_cl_age.round(2)
incidence_rea_reg_cl_age[incidence_rea_reg_cl_age.reg==24]

Unnamed: 0,reg,libelle,cl_age90,rea_f,rea_h,rea,pct_h,pct_f,incid_rea,population,inc_h,inc_f
66,24,Centre-Val-de-Loire,0,2068,4251,6361,0.67,0.33,508,2561451,6.62,3.29
67,24,Centre-Val-de-Loire,9,2068,4251,2,0.0,0.0,508,281149,0.0,0.0
68,24,Centre-Val-de-Loire,19,2068,4251,11,0.0,0.0,508,316467,0.08,0.08
69,24,Centre-Val-de-Loire,29,2068,4251,13,0.0,0.0,508,249663,0.1,0.1
70,24,Centre-Val-de-Loire,39,2068,4251,171,0.02,0.01,508,291604,1.57,0.78
71,24,Centre-Val-de-Loire,49,2068,4251,390,0.04,0.02,508,317096,3.28,1.6
72,24,Centre-Val-de-Loire,59,2068,4251,1142,0.12,0.06,508,341260,8.93,4.47
73,24,Centre-Val-de-Loire,69,2068,4251,2194,0.23,0.11,508,327697,17.9,8.84
74,24,Centre-Val-de-Loire,79,2068,4251,2071,0.22,0.11,508,252688,21.91,10.86
75,24,Centre-Val-de-Loire,89,2068,4251,327,0.03,0.02,508,141338,6.11,3.06


## Balance bénéfices/risques

In [150]:
incidence_rea_reg_cl_age[ (incidence_rea_reg_cl_age['libelle'] == 'Corse')]

Unnamed: 0,reg,libelle,cl_age90,rea_f,rea_h,rea,pct_h,pct_f,incid_rea,population,inc_h,inc_f
187,94,Corse,0,102,327,443,0.74,0.26,30,349269,3.17,1.13
188,94,Corse,9,102,327,0,0.0,0.0,30,34584,0.0,0.0
189,94,Corse,19,102,327,0,0.0,0.0,30,36310,0.0,0.0
190,94,Corse,29,102,327,0,0.0,0.0,30,32754,0.0,0.0
191,94,Corse,39,102,327,10,0.02,0.01,30,43975,0.58,0.2
192,94,Corse,49,102,327,14,0.02,0.01,30,46093,0.75,0.26
193,94,Corse,59,102,327,98,0.16,0.06,30,47732,5.12,1.82
194,94,Corse,69,102,327,150,0.25,0.09,30,45158,8.3,2.96
195,94,Corse,79,102,327,138,0.23,0.08,30,37520,9.2,3.28
196,94,Corse,89,102,327,33,0.06,0.02,30,20002,4.12,1.5


In [151]:
incidence_rea_reg_cl_age['benef_h'] = 4* incidence_rea_reg_cl_age['inc_h'] * factors.incidence_boost[boost]
incidence_rea_reg_cl_age['benef_f'] = 4* incidence_rea_reg_cl_age['inc_f'] * factors.incidence_boost[boost]
incidence_rea_reg_cl_age[incidence_rea_reg_cl_age.reg==84]

Unnamed: 0,reg,libelle,cl_age90,rea_f,rea_h,rea,pct_h,pct_f,incid_rea,population,inc_h,inc_f,benef_h,benef_f
165,84,Auvergne-Rhône-Alpes,0,7306,13110,20507,0.64,0.36,1598,8090442,6.31,3.57,25.24,14.28
166,84,Auvergne-Rhône-Alpes,9,7306,13110,51,0.0,0.0,1598,935839,0.17,0.09,0.68,0.36
167,84,Auvergne-Rhône-Alpes,19,7306,13110,46,0.0,0.0,1598,1018289,0.08,0.08,0.32,0.32
168,84,Auvergne-Rhône-Alpes,29,7306,13110,179,0.01,0.0,1598,902190,0.53,0.27,2.12,1.08
169,84,Auvergne-Rhône-Alpes,39,7306,13110,517,0.02,0.01,1598,1009027,1.27,0.71,5.08,2.84
170,84,Auvergne-Rhône-Alpes,49,7306,13110,1578,0.05,0.03,1598,1039289,3.77,2.15,15.08,8.6
171,84,Auvergne-Rhône-Alpes,59,7306,13110,3842,0.12,0.07,1598,1050457,9.13,5.17,36.52,20.68
172,84,Auvergne-Rhône-Alpes,69,7306,13110,6769,0.21,0.12,1598,923640,18.25,10.29,73.0,41.16
173,84,Auvergne-Rhône-Alpes,79,7306,13110,6361,0.2,0.11,1598,715981,22.1,12.5,88.4,50.0
174,84,Auvergne-Rhône-Alpes,89,7306,13110,882,0.03,0.02,1598,383435,5.63,3.33,22.52,13.32


In [152]:
factors.astrazemeca_risk

risks =  pd.DataFrame(data={'cl_age90': [29, 39, 49, 59, 69, 79, 89], 'astrazeneca_risk': [5.8, 4.6, 5.8, 3.2, 3, 2.2, 1.2]})
risks

Unnamed: 0,cl_age90,astrazeneca_risk
0,29,5.8
1,39,4.6
2,49,5.8
3,59,3.2
4,69,3.0
5,79,2.2
6,89,1.2


In [153]:
incidence_rea_reg_cl_age.to_csv('data/incidence_rea_reg_cl_age-2021-05-14.csv')

In [154]:
balance = incidence_rea_reg_cl_age[(incidence_rea_reg_cl_age.cl_age90 != 0) & (incidence_rea_reg_cl_age.cl_age90 != 9) & (incidence_rea_reg_cl_age.cl_age90 != 19) & (incidence_rea_reg_cl_age.cl_age90 != 90)][['reg','cl_age90', 'benef_h', 'benef_f']]
balance = pd.merge(balance, risks, on=['cl_age90'])
balance['balance_astr_h'] = balance['benef_h'] -  balance['astrazeneca_risk']
balance['balance_astr_f'] = balance['benef_f'] -  balance['astrazeneca_risk']
balance[balance.reg==84]

Unnamed: 0,reg,cl_age90,benef_h,benef_f,astrazeneca_risk,balance_astr_h,balance_astr_f
15,84,29,2.12,1.08,5.8,-3.68,-4.72
33,84,39,5.08,2.84,4.6,0.48,-1.76
51,84,49,15.08,8.6,5.8,9.28,2.8
69,84,59,36.52,20.68,3.2,33.32,17.48
87,84,69,73.0,41.16,3.0,70.0,38.16
105,84,79,88.4,50.0,2.2,86.2,47.8
123,84,89,22.52,13.32,1.2,21.32,12.12


In [155]:
balance.to_csv('data/balance_astrazeneca-2021-05-14.csv', index=False)

In [156]:
balance['benef_h'].max() * 4.55


641.732