In [None]:
import joblib
import json
import pandas as pd
import numpy as np

import zipfile
import os

ZIP_PATH = "artifacts_preprocesamiento.zip"
DEST = "artifacts_preprocesamiento"

with zipfile.ZipFile(ZIP_PATH, "r") as z:
    z.extractall(DEST)

print("Contenido extraído:")
print(os.listdir(DEST))


In [None]:
ARTIFACT_DIR = "artifacts_preprocesamiento"

# === Cargar artefactos ===
num_pipe         = joblib.load(f"{ARTIFACT_DIR}/num_pipe.joblib")
preprocessor_cat = joblib.load(f"{ARTIFACT_DIR}/cat_preprocessor.joblib")

with open(f"{ARTIFACT_DIR}/metadata_preprocesamiento.json", "r") as f:
    meta = json.load(f)

cols_num      = meta["cols_num"]
cols_cat      = meta["cols_cat"]
cols_onehot   = meta["cols_onehot"]
feature_names = meta["feature_names"]
cat_out_cols  = meta["cat_out_cols"]

In [None]:
# === Nuevos datos ===

#######################################################
#######################################################
#######################################################

new_df = pd.read_csv()

#######################################################
#######################################################
#######################################################

new_df.columns

In [None]:
#######################################################
#######################################################
#######################################################

target = ""

#######################################################
#######################################################
#######################################################

# Si por error viene el target, lo quitamos
for possible_target in [target]:
    if possible_target in new_df.columns:
        new_df = new_df.drop(columns=[possible_target])


# Asegurar columnas crudas (faltantes -> NA; extras se ignoran)
expected_raw = cols_num + cols_cat
for c in expected_raw:
    if c not in new_df.columns:
        new_df[c] = pd.NA
new_df = new_df[expected_raw]

X_new_num = new_df[cols_num]
X_new_cat = new_df[cols_cat]

# === Categóricas (mismo encoder, sin re-ajustar) ===
X_new_cat_proc = preprocessor_cat.transform(X_new_cat)

# reconstruir nombres de salida categórica EXACTOS como en entrenamiento
# (aplicando el mismo renombrado col___categoria)
cols_out_cat = list(preprocessor_cat.get_feature_names_out())

rename_map = {}
if len(cols_onehot) > 0:
    ohe = preprocessor_cat.named_transformers_["onehot"].named_steps["encoder"]
    ohe_names = list(ohe.get_feature_names_out(cols_onehot))
    for name in ohe_names:
        for col in cols_onehot:
            prefix = col + "_"
            if name.startswith(prefix):
                cat = name[len(prefix):]
                rename_map[name] = f"{col}___{cat}"
                break

cols_out_cat = [rename_map.get(c, c) for c in cols_out_cat]

df_new_cat_encode = pd.DataFrame(X_new_cat_proc, columns=cols_out_cat, index=new_df.index)

# Alinear a cat_out_cols (si faltan columnas porque no apareció alguna categoría -> 0)
for c in cat_out_cols:
    if c not in df_new_cat_encode.columns:
        df_new_cat_encode[c] = 0.0
df_new_cat_encode = df_new_cat_encode[cat_out_cols]

# === Numéricas ===
T_new_num = num_pipe.transform(X_new_num)
T_new_num_df = pd.DataFrame(T_new_num, columns=cols_num, index=new_df.index)

# === Final (num + cat) ===
T_new_final = pd.concat([T_new_num_df, df_new_cat_encode], axis=1)

# Forzar el orden final exacto
for c in feature_names:
    if c not in T_new_final.columns:
        T_new_final[c] = 0.0
T_new_final = T_new_final[feature_names]

T_new_final.to_csv("New_final.csv", index=False)

# Si lo quieres para Keras
X_new_final = T_new_final.to_numpy(dtype=np.float32)

print("Listo:", T_new_final.shape)
