In [2]:
import datetime

import numpy as np
import pandas as pd
import yaml
from sqlalchemy import create_engine


# database connections 

In [62]:
with open('config.yml', 'r') as f:
    config = yaml.safe_load(f)
    config_co = config['CO_SA']
    config_etl = config['ETL_PRO']

# Construct the database URL
url_co = (f"{config_co['drivername']}://{config_co['user']}:{config_co['password']}@{config_co['host']}:"
          f"{config_co['port']}/{config_co['dbname']}")
url_etl = (f"{config_etl['drivername']}://{config_etl['user']}:{config_etl['password']}@{config_etl['host']}:"
           f"{config_etl['port']}/{config_etl['dbname']}")
# Create the SQLAlchemy Engine
co_sa = create_engine(url_co)
etl_conn = create_engine(url_etl)

# Extract

In [79]:
df_citas = pd.read_sql_table('citas_generales', co_sa)
df_urgencias = pd.read_sql_table('urgencias', co_sa)
df_hosp = pd.read_sql_table('hospitalizaciones', co_sa)

In [70]:
df_hosp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 893 entries, 0 to 892
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   codigo_hospitalizacion    893 non-null    object        
 1   id_usuario                893 non-null    object        
 2   id_medico                 893 non-null    object        
 3   fecha_solicitud           893 non-null    datetime64[ns]
 4   hora_solicitud            893 non-null    object        
 5   fecha_atencion            893 non-null    datetime64[ns]
 6   hora_atencion             893 non-null    object        
 7   duracion_hospitalizacion  893 non-null    int64         
 8   diagnostico               893 non-null    object        
dtypes: datetime64[ns](2), int64(1), object(6)
memory usage: 62.9+ KB


# Transformations

In [85]:
from datetime import  timedelta

df_hosp.rename(columns={'codigo_hospitalizacion':'codigo_servicio'} ,inplace=True)
df_urgencias.rename(columns={'codigo_urgencia':'codigo_servicio'}, inplace=True)
df_citas.rename(columns={'codigo_cita':'codigo_servicio'},inplace=True)

df_citas['tipo_servicio'] = 'citas'
df_urgencias['tipo_servicio'] = 'urgencias'
df_hosp['tipo_servicio'] = 'hospitalizacion'

columns = ['codigo_servicio', 'id_usuario','id_medico','fecha_solicitud','fecha_atencion','hora_atencion','hora_solicitud','tipo_servicio']
trans_servicio = pd.concat([df_hosp,df_urgencias,df_citas],axis=0)
trans_servicio.head()
columns = set(trans_servicio.columns) - set(columns)

trans_servicio.drop(columns= columns,inplace=True)
trans_servicio['fecha_atencion'] = pd.to_datetime(trans_servicio['fecha_atencion'])
trans_servicio['fecha_solicitud'] = pd.to_datetime(trans_servicio['fecha_solicitud'])
trans_servicio['hora_atencion'] = trans_servicio['hora_atencion'].apply(lambda x : timedelta(hours=x.hour, minutes=x.minute,seconds=x.second))
trans_servicio['hora_solicitud'] = trans_servicio['hora_solicitud'].apply(lambda x : timedelta(hours=x.hour, minutes=x.minute,seconds=x.second))
trans_servicio['fecha_hora_atencion'] = trans_servicio['fecha_atencion'] + trans_servicio['hora_atencion']
trans_servicio['fecha_hora_solicitud'] = trans_servicio['fecha_solicitud'] +trans_servicio['hora_solicitud'] 


trans_servicio.head()


  trans_servicio.to_sql('trans_servicio',etl_conn,if_exists='replace',index_label='key_trans')


Unnamed: 0,codigo_servicio,id_usuario,id_medico,fecha_solicitud,hora_solicitud,fecha_atencion,hora_atencion,tipo_servicio,fecha_hora_atencion,fecha_hora_solicitud
0,808809,1705225684180,272113543,2007-03-19,0 days 08:00:00,2007-03-19,0 days 08:30:00,hospitalizacion,2007-03-19 08:30:00,2007-03-19 08:00:00
1,808811,1705225699140,81113543,2006-12-25,0 days 08:00:00,2006-12-25,0 days 08:30:00,hospitalizacion,2006-12-25 08:30:00,2006-12-25 08:00:00
2,808813,1705225788780,166113543,2007-12-16,0 days 08:00:00,2007-12-16,0 days 08:28:00,hospitalizacion,2007-12-16 08:28:00,2007-12-16 08:00:00
3,808815,1705225786530,228113543,2007-01-07,0 days 08:00:00,2007-01-07,0 days 08:34:00,hospitalizacion,2007-01-07 08:34:00,2007-01-07 08:00:00
4,808817,1705225880540,172113543,2006-10-29,0 days 08:00:00,2006-10-29,0 days 08:26:00,hospitalizacion,2006-10-29 08:26:00,2006-10-29 08:00:00


# load

In [None]:
trans_servicio.to_sql('trans_servicio',etl_conn,if_exists='replace',index_label='key_trans')