In [14]:
from datetime import date

import pandas as pd
import numpy as np
import holidays
import yaml

Para inicializar la dimension creamos un dataframe donde vamos a añadir las fechas y demas campos                   

In [15]:
dim_fecha = pd.DataFrame({
    "date": pd.date_range(start='1/1/2005', end='1/1/2009', freq='D')
})
dim_fecha.head()

Unnamed: 0,date
0,2005-01-01
1,2005-01-02
2,2005-01-03
3,2005-01-04
4,2005-01-05


vamos a añadir algunas columnas como lo son el año, mes, el dia, el dia de la semana y en que quarto del año

In [16]:
dim_fecha["year"] = dim_fecha["date"].dt.year
dim_fecha["month"] = dim_fecha["date"].dt.month
dim_fecha["day"] = dim_fecha["date"].dt.day
dim_fecha["weekday"] = dim_fecha["date"].dt.weekday
dim_fecha["quarter"] = dim_fecha["date"].dt.quarter

dim_fecha.head()

Unnamed: 0,date,year,month,day,weekday,quarter
0,2005-01-01,2005,1,1,5,1
1,2005-01-02,2005,1,2,6,1
2,2005-01-03,2005,1,3,0,1
3,2005-01-04,2005,1,4,1,1
4,2005-01-05,2005,1,5,2,1


ahora vamos a añadir la fecha en formato string para los dias, meses    

In [17]:
dim_fecha["day_of_year"] = dim_fecha["date"].dt.day_of_year
dim_fecha["day_of_month"] = dim_fecha["date"].dt.days_in_month
dim_fecha["month_str"] = dim_fecha["date"].dt.month_name() # run locale -a en unix 
dim_fecha["day_str"] = dim_fecha["date"].dt.day_name() # locale = 'es_CO.UTF8'
dim_fecha["date_str"] = dim_fecha["date"].dt.strftime("%d/%m/%Y")
dim_fecha.head()

Unnamed: 0,date,year,month,day,weekday,quarter,day_of_year,day_of_month,month_str,day_str,date_str
0,2005-01-01,2005,1,1,5,1,1,31,January,Saturday,01/01/2005
1,2005-01-02,2005,1,2,6,1,2,31,January,Sunday,02/01/2005
2,2005-01-03,2005,1,3,0,1,3,31,January,Monday,03/01/2005
3,2005-01-04,2005,1,4,1,1,4,31,January,Tuesday,04/01/2005
4,2005-01-05,2005,1,5,2,1,5,31,January,Wednesday,05/01/2005


# holidays and weekend

In [18]:
co_holidays = holidays.CO(language="es")
dim_fecha["is_Holiday"] = dim_fecha["date"].apply(lambda x:  x in co_holidays)
dim_fecha["holiday"] = dim_fecha["date"].apply(lambda x: co_holidays.get(x))
dim_fecha["saved"] = date.today()
dim_fecha["weekend"] = dim_fecha["weekday"].apply(lambda x: x>4)
dim_fecha.head()



Unnamed: 0,date,year,month,day,weekday,quarter,day_of_year,day_of_month,month_str,day_str,date_str,is_Holiday,holiday,saved,weekend
0,2005-01-01,2005,1,1,5,1,1,31,January,Saturday,01/01/2005,True,Año Nuevo,2024-04-22,True
1,2005-01-02,2005,1,2,6,1,2,31,January,Sunday,02/01/2005,False,,2024-04-22,True
2,2005-01-03,2005,1,3,0,1,3,31,January,Monday,03/01/2005,False,,2024-04-22,False
3,2005-01-04,2005,1,4,1,1,4,31,January,Tuesday,04/01/2005,False,,2024-04-22,False
4,2005-01-05,2005,1,5,2,1,5,31,January,Wednesday,05/01/2005,False,,2024-04-22,False


In [19]:
from sqlalchemy import create_engine

with open('../config.yml', 'r') as f:
    config = yaml.safe_load(f)
    config_co = config['CO_SA']
    config_etl = config['ETL_PRO']

# Construct the database URL
url_co = (f"{config_co['drivername']}://{config_co['user']}:{config_co['password']}@{config_co['host']}:"
          f"{config_co['port']}/{config_co['dbname']}")
url_etl = (f"{config_etl['drivername']}://{config_etl['user']}:{config_etl['password']}@{config_etl['host']}:"
           f"{config_etl['port']}/{config_etl['dbname']}")
# Create the SQLAlchemy Engine
co_sa = create_engine(url_co)
etl_conn = create_engine(url_etl)

In [20]:
dim_fecha.to_sql('dim_fecha', etl_conn, if_exists='replace',index_label='key_dim_fecha')

462

In [21]:
dim_fecha.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1462 entries, 0 to 1461
Data columns (total 15 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          1462 non-null   datetime64[ns]
 1   year          1462 non-null   int32         
 2   month         1462 non-null   int32         
 3   day           1462 non-null   int32         
 4   weekday       1462 non-null   int32         
 5   quarter       1462 non-null   int32         
 6   day_of_year   1462 non-null   int32         
 7   day_of_month  1462 non-null   int32         
 8   month_str     1462 non-null   object        
 9   day_str       1462 non-null   object        
 10  date_str      1462 non-null   object        
 11  is_Holiday    1462 non-null   bool          
 12  holiday       73 non-null     object        
 13  saved         1462 non-null   object        
 14  weekend       1462 non-null   bool          
dtypes: bool(2), datetime64[ns](1), int32(7

In [22]:
dim_fecha.columns

Index(['date', 'year', 'month', 'day', 'weekday', 'quarter', 'day_of_year',
       'day_of_month', 'month_str', 'day_str', 'date_str', 'is_Holiday',
       'holiday', 'saved', 'weekend'],
      dtype='object')

In [23]:
dict_types = dim_fecha.dtypes.to_dict()

In [24]:
type(dim_fecha.dtypes['date'])

numpy.dtypes.DateTime64DType

In [25]:
for i, j in dict_types.items() :
    print(i,j)

date datetime64[ns]
year int32
month int32
day int32
weekday int32
quarter int32
day_of_year int32
day_of_month int32
month_str object
day_str object
date_str object
is_Holiday bool
holiday object
saved object
weekend bool
