# 🎯 Flujo Completo de Cristian (Data Scientist)

**Modelo**: Autoencoder para detección de anomalías

**Flujo**: Splunk → EDA → Modelo → Telemetría → Producción


In [None]:
# IMPORTS
import sys
sys.path.append("/srv/notebooks_custom/helpers")

# Helpers empresariales
from telemetry_helper import log_metrics, log_training_step, log_error, log_prediction
# Nota: calculate_all_metrics es para modelos supervisados (con target)
# Autoencoder usa MSE/MAE directamente
from metrics_calculator import calculate_all_metrics

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

print("✅ Imports exitosos")


# FASE 1: Consultar datos de Splunk (exploración)


In [None]:
# Consultar datos de Splunk
# Nota: Para desarrollo local usamos datos dummy
# En producción, DSDL pasa los datos automáticamente a fit()
print("🔍 Generando datos dummy para desarrollo...")
np.random.seed(42)
df = pd.DataFrame(np.random.normal(0, 1, (1100, 5)), columns=[f'feature_{i}' for i in range(5)])
print(f"✅ Datos dummy: {df.shape}")


# FASE 2: Exploración (EDA)


In [None]:
print("📊 Info:")
print(df.info())
print("\n📈 Stats:")
print(df.describe())


# FASE 3: Funciones del Modelo (init, fit, apply, summary)


In [None]:
def init(param):
    global model, scaler, n_features
    print(f"🔧 Init: {param}")
    n_features = None
    model = None
    scaler = None
    return model


In [None]:
def fit(df, param):
    global model, scaler, n_features
    try:
        print(f"📊 Fit: {df.shape}")
        feature_cols = df.columns.tolist()
        n_features = len(feature_cols)
        X = df.values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        X_train, X_val = train_test_split(X_scaled, test_size=0.2, random_state=42)
        
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(32, activation='relu', input_shape=(n_features,)),
            tf.keras.layers.Dense(16, activation='relu'),
            tf.keras.layers.Dense(8, activation='relu'),
            tf.keras.layers.Dense(16, activation='relu'),
            tf.keras.layers.Dense(32, activation='relu'),
            tf.keras.layers.Dense(n_features, activation='sigmoid')
        ])
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        
        epochs = param.get('epochs', 20)
        history = model.fit(X_train, X_train, validation_data=(X_val, X_val), epochs=epochs, verbose=0)
        
        val_pred = model.predict(X_val, verbose=0)
        mse = np.mean((X_val - val_pred) ** 2)
        mae = np.mean(np.abs(X_val - val_pred))
        
        # TELEMETRÍA
        log_metrics(model_name='cristian_demo', mae=mae, mse=mse)
        log_training_step(model_name='cristian_demo', epoch=epochs, loss=mse)
        
        print(f"📊 MSE: {mse:.4f}, MAE: {mae:.4f}")
        return model
    except Exception as e:
        log_error(model_name='cristian_demo', error_message=str(e), error_type='fit')
        raise e


In [None]:
def apply(df):
    global model, scaler
    try:
        if model is None:
            raise ValueError("Modelo no entrenado")
        X = df.values
        X_scaled = scaler.transform(X)
        reconstructed = model.predict(X_scaled, verbose=0)
        errors = np.mean((X_scaled - reconstructed) ** 2, axis=1)
        log_prediction(model_name='cristian_demo', num_predictions=len(errors))
        return errors
    except Exception as e:
        log_error(model_name='cristian_demo', error_message=str(e), error_type='apply')
        raise e


In [None]:
def summary(df):
    if model is None:
        return {"status": "not initialized"}
    return {"model_type": "Autoencoder", "trainable_parameters": model.count_params()}


# FASE 4: Probar modelo localmente


In [None]:
# Test local
param = {'epochs': 20, 'batch_size': 32}
model = init(param)
model = fit(df, param)
predictions = apply(df)
summary_result = summary(df)
print("✅ Test exitoso")
