# Génération des informations pour les visites médicales (et actes médicaux)

In [1]:
import pandas as pd
import numpy as np

## Open DAMIR (fichier R): génération des données pour les statistiques de visites médicales (incluants les spécialistes)
Source: https://www.data.gouv.fr/fr/datasets/depenses-d-assurance-maladie-hors-prestations-hospitalieres-par-caisse-primaire-departement/

In [2]:
damir=pd.read_csv("../data/R201901.CSV", sep=';', encoding="latin-1", usecols=['dpt','prs_nat','exe_spe','act_dnb'])

In [3]:
damir.head()

Unnamed: 0,dpt,prs_nat,exe_spe,act_dnb
0,78,1098,1,0
1,75,1098,1,2
2,77,1098,1,1
3,78,1098,1,18
4,78,1098,1,1


### un peu d'extraction d'information ...

* on vire les codes PRS qui sont après 1400 (inclus) : ce sont des codes pour des prestations "non-médicales"
* nombres moyen de visites chez un médecins (par spécialité)
* probabilité de la nature prescription, sachant la spacialité du médecin et le département

In [4]:
prs_spe=damir.groupby(["dpt","prs_nat","exe_spe"]).agg({"act_dnb":["sum"]})
prs_spe.reset_index(inplace=True)
prs_spe.columns = prs_spe.columns.get_level_values(0)

#Sélection uniquement des codes de PRS inférieurs à 1400 (sinon, hors champs qui nous intéresse) + comptes positifs uniquement
prs_spe=prs_spe[(prs_spe["prs_nat"]<1400) & (prs_spe["act_dnb"]>0)]
prs_spe.head()

Unnamed: 0,dpt,prs_nat,exe_spe,act_dnb
0,1,1098,1,337
1,1,1099,1,68
2,1,1101,3,33
3,1,1101,4,64
4,1,1101,5,9


In [5]:
pop=pd.read_csv("pop.csv")
pop_dpt=pop.groupby('dpt').agg({"pop":'sum'})
pop_dpt.reset_index(inplace=True)
pop_dpt.columns = pop_dpt.columns.get_level_values(0)
pop_dpt.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,dpt,pop
0,41,559.357862
1,42,1630.18711
2,43,514.890537
3,44,2472.319674
4,45,1425.241687


On estime le nombre de prestation par personne pour chaque département (et par mois ... puisqu'on a les compte que pour 1 mois)!

In [6]:
## Number of visits per specialist per year (knowing the departement)
prs_spedpt=prs_spe.groupby(["dpt","exe_spe"]).agg({"act_dnb":["sum"]})
prs_spedpt.reset_index(inplace=True)
prs_spedpt.columns = prs_spedpt.columns.get_level_values(0)
nb_prs_spedpt=pd.merge(prs_spedpt,pop_dpt,on="dpt")
nb_prs_spedpt['nb']=nb_prs_spedpt['act_dnb']/nb_prs_spedpt["pop"]*12 # *12 to have a yearly number
nb_prs_spedpt=nb_prs_spedpt[['dpt',"exe_spe",'nb']]
nb_prs_spedpt.head()

Unnamed: 0,dpt,exe_spe,nb
0,1,1,3.579285
1,1,2,0.238643
2,1,3,0.164345
3,1,4,0.303223
4,1,5,0.121549


In [7]:
nb_prs_spedpt.to_csv("nb_prs_dptspe.csv")

In [8]:
prs=prs_spe.groupby(["dpt","exe_spe"]).agg({"act_dnb":["sum"]})
prs.reset_index(inplace=True)
prs.columns = prs.columns.get_level_values(0)
prs.head()

Unnamed: 0,dpt,exe_spe,act_dnb
0,1,1,190436
1,1,2,12697
2,1,3,8744
3,1,4,16133
4,1,5,6467


In [9]:
#probability of having a PRS of nature X knowing that you live in a dpt (and that you had a visit to a specialist, with specialty exe_spe)
p_nat_spedpt=pd.merge(prs_spe,prs,on=["dpt","exe_spe"],suffixes=('','_dpt'))
p_nat_spedpt['p']=p_nat_spedpt['act_dnb']/p_nat_spedpt["act_dnb_dpt"]
p_nat_spedpt=p_nat_spedpt[['dpt',"exe_spe",'prs_nat','p']]
p_nat_spedpt.head()

Unnamed: 0,dpt,exe_spe,prs_nat,p
0,1,1,1098,0.00177
1,1,1,1099,0.000357
2,1,1,1103,0.000987
3,1,1,1104,0.00157
4,1,1,1105,3.2e-05


In [10]:
p_nat_spedpt.to_csv("p_prsnat_dptspe.csv")

In [11]:
prs_spe['exe_spe'].unique()

array([ 1,  3,  4,  5,  6,  7,  8,  9, 11, 14, 15, 17, 18, 32, 35, 42,  2,
       12, 13, 31, 34, 19, 21, 36, 38, 37, 24, 99, 80, 29, 30])

## Open DAMIR (Fichier A)
Source: https://www.data.gouv.fr/fr/datasets/open-damir-base-complete-sur-les-depenses-dassurance-maladie-inter-regimes/

### Remarques préliminaires 

Source: commentaires de la page ci-dessus
#### Quantité et dénombrement des actes
La quantité sert à tarifer et à calculer le remboursement selon la formule suivante : Montant remboursé=Base de remboursement * quantité des actes * taux de remboursement
* Le dénombrement d’actes n’est renseigné que pour certain régimes
    * 01 Régime Général
    * O6 ENIM
    * 08 CNMSS
    * 10 CRPCEN
    * 90 CAVIMAC
* est à 0 pour les autres régimes

#### Dénombrement des actes
Pour compter les actes, utiliser la variable « Quantité d’actes » sauf dans les cas suivants :
* Les transports : la quantité contient le nombre de factures, tandis qu le dénombrement compte le nombre de courses (si 1 facture aller-retour : quantité=1 ; dénombrement=2)
* Les indemnités kilométriques : la quantité contient le nombre de kilomètres facturés
* Les indemnités journalières : la quantité contient le nombre de jours indemnisés
* Les frais de séjour : la quantité contient le nombre de jours hospitalisés



In [12]:
damir=pd.read_csv("../data/A201604.csv", sep=';', usecols=['BEN_SEX_COD','AGE_BEN_SNDS','BEN_RES_REG','PSE_ACT_CAT','PSE_SPE_SNDS', 'PRS_NAT', "PRS_ACT_QTE"])

In [13]:
damir.head()

Unnamed: 0,AGE_BEN_SNDS,BEN_RES_REG,BEN_SEX_COD,PRS_ACT_QTE,PRS_NAT,PSE_ACT_CAT,PSE_SPE_SNDS
0,40,28,2,6,3313,2,0
1,70,32,1,5,3386,2,0
2,60,32,1,5,3313,2,0
3,50,28,1,5,3312,2,0
4,60,28,2,14,1972,2,0


In [14]:
damir.rename(columns={"BEN_RES_REG":"RR","BEN_SEX_COD":"sex","AGE_BEN_SNDS":"age","PRS_NAT":"prs_nat","PSE_ACT_CAT":"pse_cat","PSE_SPE_SNDS":"exe_spe", "PRS_ACT_QTE":"act_dnb"}, inplace=True)
damir.head()

Unnamed: 0,age,RR,sex,act_dnb,prs_nat,pse_cat,exe_spe
0,40,28,2,6,3313,2,0
1,70,32,1,5,3386,2,0
2,60,32,1,5,3313,2,0
3,50,28,1,5,3312,2,0
4,60,28,2,14,1972,2,0


In [15]:
damir_dnb=damir.groupby(["RR","sex","age","prs_nat","pse_cat","exe_spe"]).agg({"act_dnb":'sum'}).reset_index()
damir_dnb.columns = damir_dnb.columns.get_level_values(0)
damir_dnb.head()

Unnamed: 0,RR,sex,age,prs_nat,pse_cat,exe_spe,act_dnb
0,5,0,0,2211,0,0,40
1,5,0,0,2212,0,0,8
2,5,0,0,2222,0,0,42
3,5,0,0,2234,0,0,2
4,5,0,0,2252,0,0,2


In [16]:
#dénombrements les plus importants
damir_dnb[damir_dnb["act_dnb"]>1500000]

Unnamed: 0,RR,sex,age,prs_nat,pse_cat,exe_spe,act_dnb
16487,11,1,50,1972,2,0,1624474
16769,11,1,50,3386,2,0,1703705
17705,11,1,60,1972,2,0,1995595
17993,11,1,60,3386,2,0,1999004
18880,11,1,70,1972,2,0,1572774
19118,11,1,70,3386,2,0,1511332
23825,11,2,30,3386,2,0,1540582
24813,11,2,40,1972,2,0,1785155
25079,11,2,40,3386,2,0,1882060
26039,11,2,50,1972,2,0,2314720


Commentaires sur les codes les plus fréquents:
* 1111: visite médicale standard
* 3313: Pharmarcie 65%
* 3386: HONO DISP 7 (??)
* 3211: Acte de biologie
* 1972: FRANCHISE HORS TIERS PAYANT SUR MEDICAMENT
* 6111: IJ NORMALES -3 MOIS
* 1811 IK plaine (indemnité déplacements)

### un peu d'extraction d'information ...

* on vire les codes PRS qui sont après 1400 (inclus) : ce sont des codes pour des prestations "non-médicales" ambulatoire (ambulances, infirmier, matériels, médicament, dentistes)
* nombres moyen de visites chez un médecins (par spécialité)
* probabilité de la nature prescription, sachant la spacialité du médecin et le département

In [17]:
#prs_spe=damir_dnb.groupby(["RR","prs_nat","pse_cat","exe_spe"]).agg({"act_dnb":["sum"]})
#prs_spe.reset_index(inplace=True)
#prs_spe.columns = prs_spe.columns.get_level_values(0)

#Sélection uniquement des codes de PRS inférieurs à 1300 (sinon, hors champs qui nous intéresse) + comptes positifs uniquement
# -> uniquement les 11XX pour les consultations
# -> et les 12XX pour les visites
prs=damir_dnb[(damir_dnb["prs_nat"]<1300) & (damir_dnb["act_dnb"]>=0)]

In [18]:
prs["pse_cat"].unique()
#0: etablissement
#1: médecin
#2: fournisseur
#3: labo
#4: dentiste
#5: sage-femmes
#99: sans object

array([ 0,  1,  4,  5, 99])

In [19]:
#on ne garde que les prestations executées par des médecins
prs=prs[(prs["pse_cat"]==1)]

In [20]:
prs

Unnamed: 0,RR,sex,age,prs_nat,pse_cat,exe_spe,act_dnb
14,5,1,0,1111,1,1,106755
15,5,1,0,1111,1,3,8
16,5,1,0,1111,1,4,28
17,5,1,0,1111,1,5,16
18,5,1,0,1111,1,6,2
...,...,...,...,...,...,...,...
216122,99,2,99,1155,1,8,3
216123,99,2,99,1155,1,9,6
216124,99,2,99,1155,1,15,7
216125,99,2,99,1155,1,31,3


In [21]:
try: pop
except NameError: pop = None
if pop is None:
    pop=pd.read_csv("pop.csv")
pop_rr=pop.groupby(['RR','sex','age']).agg({"pop":'sum'})
pop_rr.reset_index(inplace=True)
pop_rr.columns = pop_rr.columns.get_level_values(0)
pop_rr.head(10)

Unnamed: 0,RR,sex,age,pop
0,1,1,0,10960.896988
1,1,1,5,13307.588357
2,1,1,10,14694.197617
3,1,1,15,14223.408864
4,1,1,20,10040.116261
5,1,1,25,8185.081731
6,1,1,30,7702.09869
7,1,1,35,8710.070308
8,1,1,40,11952.321274
9,1,1,45,13864.137616


In [22]:
## Define new Region (gather outre-mer lands and departements)
def new_rr(x):
    if x<=6: return 5
    else: return x
pop_rr['RR']=pop_rr['RR'].apply(new_rr)

##Define new age classes (compliant with DAMIR)
def new_age(x):
    x=x-x%10
    if x==10: x=0
    if x>80: x=80
    return x
pop_rr['age']=pop_rr['age'].apply(new_age)

# recompute the population
pop_rr=pop_rr.groupby(['RR','sex','age']).agg({"pop":'sum'})
pop_rr.reset_index(inplace=True)
pop_rr.columns = pop_rr.columns.get_level_values(0)
pop_rr.head()

Unnamed: 0,RR,sex,age,pop
0,5,1,0,294137.420421
1,5,1,20,104651.13876
2,5,1,30,99458.005758
3,5,1,40,125572.485213
4,5,1,50,123875.368341


On estime le nombre total de prestation par personne pour chaque région, sexe et age (et par mois ... puisqu'on a les compte que pour 1 mois)!

In [23]:
## Number of visits per specialist per year (knowing the departement)
prs_spedpt=prs.groupby(["RR","age","sex","exe_spe"]).agg({"act_dnb":["sum"]})
prs_spedpt.reset_index(inplace=True)
prs_spedpt.columns = prs_spedpt.columns.get_level_values(0)
nb_prs_spedpt=pd.merge(prs_spedpt,pop_rr,on=["RR","age","sex"])
nb_prs_spedpt['nb']=nb_prs_spedpt['act_dnb']/nb_prs_spedpt["pop"]*12 # *12 to have a yearly number
nb_prs_spedpt=nb_prs_spedpt[["RR","age","sex","exe_spe",'nb']]
nb_prs_spedpt.head()

Unnamed: 0,RR,age,sex,exe_spe,nb
0,5,0,1,1,7.830911
1,5,0,1,2,0.030884
2,5,0,1,3,0.011709
3,5,0,1,4,0.037697
4,5,0,1,5,0.211167


In [24]:
np.max(nb_prs_spedpt['nb'])

21.842222944164313

In [25]:
nb_prs_spedpt[nb_prs_spedpt['nb']>10]

Unnamed: 0,RR,age,sex,exe_spe,nb
241,5,80,1,1,10.781984
256,5,80,2,1,11.442432
537,11,80,2,1,10.667988
793,24,80,1,1,13.645756
810,24,80,2,1,13.693156
1062,27,80,1,1,14.155556
1078,27,80,2,1,14.450642
1333,28,80,1,1,15.882462
1349,28,80,2,1,16.684926
1591,32,70,2,1,10.661241


In [26]:
nb_prs_spedpt.to_csv("nb_prs_rragesex.csv")

In [27]:
prs_spe=prs.groupby(["RR","age","sex","exe_spe"]).agg({"act_dnb":["sum"]})
prs_spe.reset_index(inplace=True)
prs_spe.columns = prs_spe.columns.get_level_values(0)
prs_spe.head()

Unnamed: 0,RR,age,sex,exe_spe,act_dnb
0,5,0,1,1,191947
1,5,0,1,2,757
2,5,0,1,3,287
3,5,0,1,4,924
4,5,0,1,5,5176


In [28]:
prs_spe2=prs.groupby(["RR","age","sex","exe_spe","prs_nat"]).agg({"act_dnb":["sum"]})
prs_spe2.reset_index(inplace=True)
prs_spe2.columns = prs_spe2.columns.get_level_values(0)
prs_spe2.head()

Unnamed: 0,RR,age,sex,exe_spe,prs_nat,act_dnb
0,5,0,1,1,1111,106755
1,5,0,1,1,1112,48562
2,5,0,1,1,1115,13
3,5,0,1,1,1117,222
4,5,0,1,1,1121,2


In [29]:
#probability of having a PRS of nature X knowing that you live in a dpt (and that you had a visit to a specialist, with specialty exe_spe)
p_nat_spedpt=pd.merge(prs_spe2,prs_spe,on=["RR","age","sex","exe_spe"],suffixes=('','_dpt'))
p_nat_spedpt['p']=p_nat_spedpt['act_dnb']/p_nat_spedpt["act_dnb_dpt"]
p_nat_spedpt=p_nat_spedpt[["RR","age","sex","exe_spe",'prs_nat','p']]
p_nat_spedpt.head()

Unnamed: 0,RR,age,sex,exe_spe,prs_nat,p
0,5,0,1,1,1111,0.556169
1,5,0,1,1,1112,0.252997
2,5,0,1,1,1115,6.8e-05
3,5,0,1,1,1117,0.001157
4,5,0,1,1,1121,1e-05


In [30]:
p_nat_spedpt.to_csv("p_prsnat_rragesex.csv")

In [31]:
damir_dnb['RR'].unique()

array([ 5, 11, 24, 27, 28, 32, 44, 52, 53, 75, 76, 84, 93, 99])

In [32]:
p_nat_spedpt.set_index(["RR","age","sex","exe_spe"],inplace=True)

In [33]:
p_nat_spedpt

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,prs_nat,p
RR,age,sex,exe_spe,Unnamed: 4_level_1,Unnamed: 5_level_1
5,0,1,1,1111,0.556169
5,0,1,1,1112,0.252997
5,0,1,1,1115,0.000068
5,0,1,1,1117,0.001157
5,0,1,1,1121,0.000010
...,...,...,...,...,...
99,99,2,35,1148,0.189655
99,99,2,35,1155,0.034483
99,99,2,37,1145,1.000000
99,99,2,38,1145,1.000000


In [34]:
p_nat_spedpt.loc[5,40,1,4] #spe chirurgie

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,prs_nat,p
RR,age,sex,exe_spe,Unnamed: 4_level_1,Unnamed: 5_level_1
5,40,1,4,1111,0.049121
5,40,1,4,1112,0.341779
5,40,1,4,1116,0.144778
5,40,1,4,1117,0.281799
5,40,1,4,1121,0.018614
5,40,1,4,1126,0.149948
5,40,1,4,1129,0.013961


In [35]:
p_nat_spedpt.loc[5,40,1,1] #generaliste

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,prs_nat,p
RR,age,sex,exe_spe,Unnamed: 4_level_1,Unnamed: 5_level_1
5,40,1,1,1111,0.616148
5,40,1,1,1112,0.280056
5,40,1,1,1115,0.001468
5,40,1,1,1117,0.002183
5,40,1,1,1121,0.003155
5,40,1,1,1122,4e-05
5,40,1,1,1123,2e-05
5,40,1,1,1124,0.005715
5,40,1,1,1125,0.004068
5,40,1,1,1130,0.068278


In [36]:
p_nat_spedpt.loc[5,40,1,6] #spe radio

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,prs_nat,p
RR,age,sex,exe_spe,Unnamed: 4_level_1,Unnamed: 5_level_1
5,40,1,6,1112,0.353933
5,40,1,6,1116,0.140449
5,40,1,6,1117,0.11236
5,40,1,6,1121,0.247191
5,40,1,6,1126,0.140449
5,40,1,6,1130,0.005618
