In [6]:
from datetime import date

import pandas as pd
import yaml
from sqlalchemy import create_engine


# database connections 

In [7]:
with open('../config.yml', 'r') as f:
    config = yaml.safe_load(f)
    config_co = config['CO_SA']
    config_etl = config['ETL_PRO']

# Construct the database URL
url_co = (f"{config_co['drivername']}://{config_co['user']}:{config_co['password']}@{config_co['host']}:"
          f"{config_co['port']}/{config_co['dbname']}")
url_etl = (f"{config_etl['drivername']}://{config_etl['user']}:{config_etl['password']}@{config_etl['host']}:"
           f"{config_etl['port']}/{config_etl['dbname']}")
# Create the SQLAlchemy Engine
co_sa = create_engine(url_co)
etl_conn = create_engine(url_etl)
    

# Extract

In [8]:
dim_ips = pd.read_sql_table('ips', co_sa)
dim_ips.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86 entries, 0 to 85
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id_ips        86 non-null     object
 1   tipo_ips      86 non-null     object
 2   nombre        86 non-null     object
 3   direccion     86 non-null     object
 4   nivel         86 non-null     object
 5   municipio     86 non-null     object
 6   departamento  86 non-null     object
dtypes: object(7)
memory usage: 4.8+ KB


# Transformations

In [9]:
dim_ips.replace({'':'0'},inplace=True)

In [10]:
dim_ips["saved"] = date.today()
dim_ips.describe(include='all')

Unnamed: 0,id_ips,tipo_ips,nombre,direccion,nivel,municipio,departamento,saved
count,86,86,86,86,86,86,86,86
unique,86,6,83,86,6,23,7,1
top,IPS_1,Hospital,Hospital Isaías Duarte Cancio,Kra 76 # 38-102,0,Cali,Cundinamarca,2024-09-17
freq,1,27,2,1,59,11,15,86


# load

In [11]:
dim_ips.to_sql('dim_ips', etl_conn, if_exists='replace',index_label='key_dim_ips')

86