## Dependencies

---

In [63]:
import pandas as pd

## Functions

In [40]:
def add_uid(func):
    """
    Add uid to the dimension created.
    :param func: function that creates dimensions.
    :return: the dimension created by the function + an uid.
    """
    def wrapper(**kargs) -> pd.DataFrame:
        dim = func(**kargs)
        dim[f"{kargs['column']}_id"] = dim.index + 1
        return dim
    return wrapper

In [41]:
def create_dim(column:str, dataframe: pd.DataFrame) -> pd.DataFrame:
    """
    Extract the feature and parse it to a dataframe.
    :param column: feature to extract.
    :param dataframe: object where feature will be extracted.
    :return: dataframe with the feature and its new column.
    """
    dim = dataframe[f"{column}"].to_frame()
    return dim

In [83]:
def entities_persistence(dataframe:pd.DataFrame) -> None:
    """
    .csv dataframes to be used as entities in DWH.
    :param dataframe: entity to be exported.
    """
    file_name = dataframe.columns.to_list()[0]
    type_df = str(dataframe.dtypes[0])

    if type_df == "object":
        dataframe.to_csv(f"persistence/dimensions/dim_{file_name}.csv", index=False)
    elif type_df == "int64":
        dataframe.to_csv(f"persistence/facts/fact_{file_name}.csv", index=False)


## Dimensions & facts

First of all is required to split the dataframes in `Dimensions`, where they can be related with the `Facts` by an unique id.

In [None]:
create_dim_uid = add_uid(create_dim)
dimensions_df = []
facts_df = []
dimensions_keys = ["model", "description", "engine"]
facts_keys = ["year", "kilometers", "price"]

In [42]:
df_temp = pd.read_csv("persistence/sedan-cards_march-april.csv")
columns = df_temp.columns.to_list()

Create dataframes and store them in a list regarding if the dataframe is a `Dimension` or is a `Fact`. The rule defining whether it pertains to one or the other are keys defined in `dimensions_keys` and `facts_keys`.

In [50]:
for column in columns:
    if column in dimensions_keys:
        dim = create_dim_uid(column=column, dataframe=df_temp)
        dimensions_df.append(dim)
    elif column in facts_keys:
        fact = create_dim_uid(column=column, dataframe=df_temp)
        facts_df.append(fact)

Start model elements:

In [58]:
dim_model = dimensions_df[0]
dim_engine = dimensions_df[1]
dim_description = dimensions_df[2]

facts = pd.concat(facts_df, axis=1)
facts = facts.rename(columns={"year_id":"model_id","kilometers_id":"engine_id","price_id":"description_id"})

## Export dimensions & facts

In [85]:
for dimension in dimensions_df:
    entities_persistence(dimension)

In [86]:
entities_persistence(facts)