## Configuracion

In [0]:
from config_loader import Config
import mlflow
import mlflow.pyfunc
from mlflow.models.signature import infer_signature
from mlflow.tracking import MlflowClient

paths = Config.get_paths()
params = Config.get_params()

catalog = paths['catalog']
schemas = paths['schemas']
tables = paths['tables']

target_col = params['feature_engineering']['target_column']
features_list = params['feature_engineering']['features']
pk_col = params['feature_engineering']['primary_key']
seed = params['feature_engineering']['seed']
split_ratio = [
    params['feature_engineering']['train_ratio'], 
    params['feature_engineering']['test_ratio']
]
fe_model_name = f"{catalog}.{schemas['feature_store']}.feature_engineering"

## Extraer Data

In [0]:
source_path = f"{catalog}.{schemas['curated']}.{tables['cdz']['df_curated']}"
df = spark.read.table(source_path)
display(df)

## Train y Test

In [0]:
train_df, test_df = df.randomSplit(split_ratio, seed=seed)
train_df.count()
test_df.count()

## Feature Engineering

In [0]:
%run "./feature_engineering"

In [0]:
train_pd = train_df.toPandas()
test_pd = test_df.toPandas()

# FIT
fe = SuperFE(
    features_to_scale=params['feature_engineering']['features'],
    pk_col=params['feature_engineering']['primary_key'],
    target_col=params['feature_engineering']['target_column']
)
fe.fit(train_pd)

# TRANSFORM
train_transformed_pd = fe.predict(None, train_pd)
test_transformed_pd = fe.predict(None, test_pd)
train_transformed_sp = spark.createDataFrame(train_transformed_pd)
test_transformed_sp = spark.createDataFrame(test_transformed_pd)

user_name = "matiasadell@hotmail.com"
experiment_path = f"/Users/{user_name}/{paths['mlflow']['experiment_prefix']}" 
mlflow.set_experiment(experiment_path)

with mlflow.start_run(run_name="feature_engineering"):
    sample_input = train_pd[fe.features_to_scale].head(5)
    sample_output = fe.predict(None, sample_input)

    signature = infer_signature(sample_input, sample_output)

    mlflow.pyfunc.log_model(
        artifact_path="feature_engineering",
        python_model=fe,
        signature=signature,
        input_example=sample_input,
        pip_requirements=["scikit-learn", "pandas", "numpy"]
    )

    result = mlflow.register_model(
        model_uri=f"runs:/{mlflow.active_run().info.run_id}/feature_engineering",
        name=fe_model_name
    )

    client = MlflowClient()
    client.set_registered_model_alias(
        name=fe_model_name,
        alias="inference",
        version=result.version
    )
    print(f"âœ… SuperFE registrada (v{result.version}) con alias 'inference'")


## Guardar en Feature Store

In [0]:
train_target = f"{catalog}.{schemas['feature_store']}.{tables['features']['train_set']}"
test_target = f"{catalog}.{schemas['feature_store']}.{tables['features']['test_set']}"

train_transformed_sp.write.format("delta").mode("overwrite").saveAsTable(train_target)
test_transformed_sp.write.format("delta").mode("overwrite").saveAsTable(test_target)