# DATA TRANSFORMATION - 16_medicamentos
## 0. Previos

Se cargan las bases y las librerías a utilizar.

In [2]:
# Librerías
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import numpy as np

import generic_funcions as gf

ruta_archivos = 'Data\\'

## 1. Load

In [3]:
modulo = 'Medicamentos'

In [4]:
dcc = gf.diccionario_llaves()

In [5]:
base = gf.carga_datos( ruta = ruta_archivos, diccionario = dcc, modulo = modulo )
base.head()

Unnamed: 0,id,fecha,Regional_EPS_Desc,Codigo_Prestacion_Op,Descripcion_Prestacion,Codigo_Diagnostico_EPS_Op,Diagnostico_EPS_Desc,Numero_Cantidad_Prestaciones,year,month,year_month
0,940545,2020-02-25,Medellin,M025848,TACROLIMUS MONOHIDRATADO -INDICACIONES DIFEREN...,L309,"DERMATITIS, NO ESPECIFICADA",1.0,2020,2,202002
1,725649,2020-02-26,Medellin,M029434,MONTELUKAST/LEVOCETIRIZINA DIHIDROCLORURO,J304,"RINITIS ALÉRGICA, NO ESPECIFICADA",28.0,2020,2,202002
2,535255,2020-02-27,Medellin,M026578,FLUTICASONA PROPIONATO/SALMETEROL,D848,OTRAS INMUNODEFICIENCIAS ESPECIFICADAS,1.0,2020,2,202002
3,1127795,2020-02-28,Medellin,M004049,DOXICICLINA,N760,VAGINITIS AGUDA,28.0,2020,2,202002
4,1673566,2020-02-25,Medellin,M280151,BECLOMETASONA NASAL,Z929,HISTORIA PERSONAL DE TRATAMIENTO MÉDICO NO ESP...,1.0,2020,2,202002


## 2. Transform

In [6]:
base = gf.letra_codigo( base, 'Codigo_Diagnostico_EPS_Op' )
base.head()

Unnamed: 0,id,fecha,Regional_EPS_Desc,Codigo_Prestacion_Op,Descripcion_Prestacion,Codigo_Diagnostico_EPS_Op,Diagnostico_EPS_Desc,Numero_Cantidad_Prestaciones,year,month,year_month,Codigo_Diagnostico_EPS_Op_cod
0,940545,2020-02-25,Medellin,M025848,TACROLIMUS MONOHIDRATADO -INDICACIONES DIFEREN...,L309,"DERMATITIS, NO ESPECIFICADA",1.0,2020,2,202002,l
1,725649,2020-02-26,Medellin,M029434,MONTELUKAST/LEVOCETIRIZINA DIHIDROCLORURO,J304,"RINITIS ALÉRGICA, NO ESPECIFICADA",28.0,2020,2,202002,j
2,535255,2020-02-27,Medellin,M026578,FLUTICASONA PROPIONATO/SALMETEROL,D848,OTRAS INMUNODEFICIENCIAS ESPECIFICADAS,1.0,2020,2,202002,d
3,1127795,2020-02-28,Medellin,M004049,DOXICICLINA,N760,VAGINITIS AGUDA,28.0,2020,2,202002,n
4,1673566,2020-02-25,Medellin,M280151,BECLOMETASONA NASAL,Z929,HISTORIA PERSONAL DE TRATAMIENTO MÉDICO NO ESP...,1.0,2020,2,202002,z


* Number of delivered doses per month
* Number of different diseases per month

In [7]:
base_ = base.groupby(['id','year','month','Codigo_Diagnostico_EPS_Op_cod'])['Numero_Cantidad_Prestaciones'].sum().\
    reset_index( name = 'num_doses' )
base_['num_dis'] = 1
base_.head()

Unnamed: 0,id,year,month,Codigo_Diagnostico_EPS_Op_cod,num_doses,num_dis
0,417594,2017,6,l,1.0,1
1,423128,2017,6,j,307.0,1
2,500547,2016,12,b,480.0,1
3,500547,2016,12,e,590.0,1
4,500547,2016,12,i,180.0,1


In [8]:
# all diagnoses different form j will be in the same category
base_['diag'] = base_['Codigo_Diagnostico_EPS_Op_cod'].apply( lambda x : 'j' if x == 'j' else 'otra' )

In [9]:
# pivot table to have separate variables, nans replaced with zero
base_p = base_.pivot_table(index=['id','year','month'], columns='diag', values=['num_doses','num_dis'], aggfunc=np.sum).reset_index()
base_p.columns = ['_'.join(col).strip() for col in base_p.columns.values]
base_p.fillna( 0 , inplace = True)
base_p.head()

Unnamed: 0,id_,year_,month_,num_dis_j,num_dis_otra,num_doses_j,num_doses_otra
0,417594,2017,6,0.0,1.0,0.0,1.0
1,423128,2017,6,1.0,0.0,307.0,0.0
2,500547,2016,12,0.0,5.0,0.0,1670.0
3,500547,2017,1,0.0,4.0,0.0,890.0
4,500547,2017,2,0.0,3.0,0.0,530.0


* Flag variables

In [10]:
base_p['num_dis'] = base_p['num_dis_otra'] + base_p['num_dis_j']
base_p['flag_otra'] = base_p['num_dis_otra'].apply( lambda x : 0 if x == 0 else 1 )
base_p.rename( columns={ 'num_dis_j':'flag_j' }, inplace=True)
base_p.head()

Unnamed: 0,id_,year_,month_,flag_j,num_dis_otra,num_doses_j,num_doses_otra,num_dis,flag_otra
0,417594,2017,6,0.0,1.0,0.0,1.0,1.0,1
1,423128,2017,6,1.0,0.0,307.0,0.0,1.0,0
2,500547,2016,12,0.0,5.0,0.0,1670.0,5.0,1
3,500547,2017,1,0.0,4.0,0.0,890.0,4.0,1
4,500547,2017,2,0.0,3.0,0.0,530.0,3.0,1


In [11]:
# add prefix
base_p.columns = ['med_' + s for s in base_p.columns]

In [12]:
base_p.rename( columns={ 'med_id_':'id', 'med_year_':'year', 'med_month_':'month' }, inplace=True)
base_p.head()

Unnamed: 0,id,year,month,med_flag_j,med_num_dis_otra,med_num_doses_j,med_num_doses_otra,med_num_dis,med_flag_otra
0,417594,2017,6,0.0,1.0,0.0,1.0,1.0,1
1,423128,2017,6,1.0,0.0,307.0,0.0,1.0,0
2,500547,2016,12,0.0,5.0,0.0,1670.0,5.0,1
3,500547,2017,1,0.0,4.0,0.0,890.0,4.0,1
4,500547,2017,2,0.0,3.0,0.0,530.0,3.0,1


## 3. Merge with ids table

In [13]:
ids_mensual = gf.base_ids_mensual( ruta_archivos )
base_final_med = ids_mensual.merge( base_p, how='left')
base_final_med.head()

Unnamed: 0,id,year,month,med_flag_j,med_num_dis_otra,med_num_doses_j,med_num_doses_otra,med_num_dis,med_flag_otra
0,500547,2016,1,,,,,,
1,500547,2016,2,,,,,,
2,500547,2016,3,,,,,,
3,500547,2016,4,,,,,,
4,500547,2016,5,,,,,,
