# Génération des informations pour les visites médicales (et actes médicaux)

In [1]:
import pandas as pd
import numpy as np

## Open DAMIR: génération des données pour les statistiques de visites médicales (incluants les spécialistes)
Source: https://www.data.gouv.fr/fr/datasets/depenses-d-assurance-maladie-hors-prestations-hospitalieres-par-caisse-primaire-departement/


In [2]:
damir=pd.read_csv("../data/R201901.CSV", sep=';', encoding="latin-1", usecols=['dpt','prs_nat','exe_spe','act_dnb'])

In [3]:
damir.head()

Unnamed: 0,dpt,prs_nat,exe_spe,act_dnb
0,78,1098,1,0
1,75,1098,1,2
2,77,1098,1,1
3,78,1098,1,18
4,78,1098,1,1


### un peu d'extraction d'information ...

* on vire les codes PRS qui sont après 1400 (inclus) : ce sont des codes pour des prestations "non-médicales"
* nombres moyen de visites chez un médecins (par spécialité)
* probabilité de la nature prescription, sachant la spacialité du médecin et le département

In [5]:
prs_spe=damir.groupby(["dpt","prs_nat","exe_spe"]).agg({"act_dnb":["sum"]})
prs_spe.reset_index(inplace=True)
prs_spe.columns = prs_spe.columns.get_level_values(0)

#Sélection uniquement des codes de PRS inférieurs à 1400 (sinon, hors champs qui nous intéresse) + comptes positifs uniquement
prs_spe=prs_spe[(prs_spe["prs_nat"]<1400) & (prs_spe["act_dnb"]>0)]
prs_spe.head()

Unnamed: 0,dpt,prs_nat,exe_spe,act_dnb
0,1,1098,1,337
1,1,1099,1,68
2,1,1101,3,33
3,1,1101,4,64
4,1,1101,5,9


In [14]:
prs_spe['exe_spe'].unique()

array([ 1,  3,  4,  5,  6,  7,  8,  9, 11, 14, 15, 17, 18, 32, 35, 42,  2,
       12, 13, 31, 34, 19, 21, 36, 38, 37, 24, 99, 80, 29, 30])

In [6]:
pop=pd.read_csv("pop.csv")
pop_dpt=pop.groupby('dpt').agg({"pop":'sum'})
pop_dpt.reset_index(inplace=True)
pop_dpt.columns = pop_dpt.columns.get_level_values(0)
pop_dpt.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,dpt,pop
0,41,559.357862
1,42,1630.18711
2,43,514.890537
3,44,2472.319674
4,45,1425.241687


On estime le nombre de prestation par personne pour chaque département (et par mois ... puisqu'on a les compte que pour 1 mois)!

In [7]:
## Number of visits per specialist per year (knowing the departement)
prs_spedpt=prs_spe.groupby(["dpt","exe_spe"]).agg({"act_dnb":["sum"]})
prs_spedpt.reset_index(inplace=True)
prs_spedpt.columns = prs_spedpt.columns.get_level_values(0)
nb_prs_spedpt=pd.merge(prs_spedpt,pop_dpt,on="dpt")
nb_prs_spedpt['nb']=nb_prs_spedpt['act_dnb']/nb_prs_spedpt["pop"]*12 # *12 to have a yearly number
nb_prs_spedpt=nb_prs_spedpt[['dpt',"exe_spe",'nb']]
nb_prs_spedpt.head()

Unnamed: 0,dpt,exe_spe,nb
0,1,1,3.579285
1,1,2,0.238643
2,1,3,0.164345
3,1,4,0.303223
4,1,5,0.121549


In [8]:
nb_prs_spedpt.to_csv("nb_prs_dptspe.csv")

In [9]:
prs=prs_spe.groupby(["dpt","exe_spe"]).agg({"act_dnb":["sum"]})
prs.reset_index(inplace=True)
prs.columns = prs.columns.get_level_values(0)
prs.head()

Unnamed: 0,dpt,exe_spe,act_dnb
0,1,1,190436
1,1,2,12697
2,1,3,8744
3,1,4,16133
4,1,5,6467


In [10]:
#probability of having a PRS of nature X knowing that you live in a dpt (and that you had a visit to a specialist, with specialty exe_spe)
p_nat_spedpt=pd.merge(prs_spe,prs,on=["dpt","exe_spe"],suffixes=('','_dpt'))
p_nat_spedpt['p']=p_nat_spedpt['act_dnb']/p_nat_spedpt["act_dnb_dpt"]
p_nat_spedpt=p_nat_spedpt[['dpt',"exe_spe",'prs_nat','p']]
p_nat_spedpt.head()

Unnamed: 0,dpt,exe_spe,prs_nat,p
0,1,1,1098,0.00177
1,1,1,1099,0.000357
2,1,1,1103,0.000987
3,1,1,1104,0.00157
4,1,1,1105,3.2e-05


In [12]:
p_nat_spedpt.to_csv("p_prsnat_dptspe.csv")

In [11]:
nb_prs_spedpt.set_index(['dpt','exe_spe'],inplace=True)


In [None]:
nb_prs_spedpt.loc['35',1]


In [None]:
p_nat_spedpt.set_index(['dpt','exe_spe'],inplace=True)

In [None]:
p_nat_spedpt.loc['22',15].sample(1,weights='p')['prs_nat'].iloc[0]

In [None]:
p_nat_spedpt.loc['22',15]

In [None]:
nb_prs_spedpt.loc['22',15]['nb']

In [13]:
prs_spe['exe_spe'].unique()

array([ 1,  3,  4,  5,  6,  7,  8,  9, 11, 14, 15, 17, 18, 32, 35, 42,  2,
       12, 13, 31, 34, 19, 21, 36, 38, 37, 24, 99, 80, 29, 30])