In [1]:
# Importar librerías
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

In [2]:
# 1. Obtener un set de datos.
df = pd.read_csv("data.csv")

# 2. Buscar valores vacios
print("Missing values:")
print(df.isna().sum())

# 3. Eliminar columna que no agrega valor al entrenamiento
df = df.drop('Employee_ID', axis=1)

Missing values:
Employee_ID                      0
Age                              0
Gender                           0
Marital_Status                   0
Department                       0
Job_Role                         0
Job_Level                        0
Monthly_Income                   0
Hourly_Rate                      0
Years_at_Company                 0
Years_in_Current_Role            0
Years_Since_Last_Promotion       0
Work_Life_Balance                0
Job_Satisfaction                 0
Performance_Rating               0
Training_Hours_Last_Year         0
Overtime                         0
Project_Count                    0
Average_Hours_Worked_Per_Week    0
Absenteeism                      0
Work_Environment_Satisfaction    0
Relationship_with_Manager        0
Job_Involvement                  0
Distance_From_Home               0
Number_of_Companies_Worked       0
Attrition                        0
dtype: int64


In [3]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder

# Guardar las columnas categóricas
categorical_cols = ['Gender', 'Marital_Status', 'Department', 'Job_Role', 'Overtime']

# Guardar las columnas numéricas
numerical_cols = [col for col in df.columns if col not in categorical_cols + ['Attrition']]
numerical_df = df[numerical_cols]

# Inicializar herramienta de encoding 
encoder = OneHotEncoder(sparse_output=False)

# 4. Transformar los datos
encoded_features = encoder.fit_transform(df[categorical_cols])

# Guardar los nombres de los features
encoded_feature_names = encoder.get_feature_names_out(categorical_cols)

# Crear DataFrame con los valores transformados
encoded_df = pd.DataFrame(encoded_features, columns=encoded_feature_names)

target = df['Attrition']

# 5. Combinar los features
X = pd.concat([numerical_df.reset_index(drop=True), encoded_df.reset_index(drop=True)], axis=1)
y = target

# Encoder simple para el target feature
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['Attrition'])

In [4]:
# 6. Hacer la separación de los sets de prueba y entrenamiento.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 7. Aplicar las técnicas de escalamiento
# Obtenemos features numéricas que serán escaladas
numeric_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Inicializamos el escalador de datos
scaler = StandardScaler()

# Transformamos los datos
X_train[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
X_test[numeric_cols] = scaler.transform(X_test[numeric_cols])

In [5]:
# 8. Implementación de Modelo 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

input_shape = X_train.shape[1:]

model = Sequential([
    Flatten(input_shape=input_shape),  
    Dense(256, activation="relu"),  
    Dense(1, activation="sigmoid")   
])

  super().__init__(**kwargs)


In [6]:
# 9. Compilamos modelo
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [7]:
# 10. Entrenamiento de modelo
history = model.fit(
    X_train, y_train,
    epochs=100,
    validation_data=(X_test, y_test)
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 947us/step - accuracy: 0.7811 - loss: 0.5333 - val_accuracy: 0.7960 - val_loss: 0.5141
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 436us/step - accuracy: 0.8077 - loss: 0.4843 - val_accuracy: 0.7960 - val_loss: 0.5149
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 334us/step - accuracy: 0.7987 - loss: 0.4899 - val_accuracy: 0.7960 - val_loss: 0.5207
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 332us/step - accuracy: 0.8102 - loss: 0.4685 - val_accuracy: 0.7960 - val_loss: 0.5220
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 326us/step - accuracy: 0.8076 - loss: 0.4695 - val_accuracy: 0.7965 - val_loss: 0.5230
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 328us/step - accuracy: 0.7965 - loss: 0.4778 - val_accuracy: 0.7960 - val_loss: 0.5275
Epoch 7/10

In [8]:
# 11. Evaluación inicial del modelo  
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Accuracy: {test_acc:.4f}')

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219us/step - accuracy: 0.7315 - loss: 0.9890
Accuracy: 0.7330
