In [46]:
# Importar librerias
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np


In [47]:
# Cargando el conjunto de datos
file_path = 'data_sucia_con_coordenadas.csv'
data = pd.read_csv(file_path)

# Filtrando por tipo de renuncia voluntaria
data_voluntaria = data[data['TIPO DE RENUNCIA'] == 'VOLUNTARIA']

# Seleccionando características relevantes y la variable objetivo
features = ['AREA', 'BANDA', 'EDAD', 'HIJOS']
X = data_voluntaria[features]
y = data_voluntaria['ANTIGÜEDAD']

# Conversión de variables categóricas a numéricas
X = pd.get_dummies(X)

In [48]:
# Dividiendo el conjunto de datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Creando el modelo de árbol de decisión
tree_reg = DecisionTreeRegressor(random_state=42)
tree_reg.fit(X_train, y_train)

# Realizando predicciones en el conjunto de prueba
y_pred = tree_reg.predict(X_test)

In [49]:
# Calculando métricas de rendimiento
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Imprimiendo métricas de rendimiento sin notación científica
np.set_printoptions(suppress=True)
print('MSE: ', mse)
print('MAE: ', mae)
print('R2: ', r2)


MSE:  2036192.3340672757
MAE:  1005.0200859611048
R2:  -0.10089277908615912


In [60]:
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Function to load and prepare data
def load_and_prepare_data(file_path):
    data = pd.read_csv(file_path)
    data_logistic = data[data['TIPO DE RENUNCIA'] == 'VOLUNTARIA']
    features_logistic = ['BANDA', 'EDAD', 'HIJOS']
    X_logistic = data_logistic[features_logistic]
    y_logistic = data_logistic['ANTIGÜEDAD']
    y_logistic = np.where(y_logistic < 1460, 0, 1)
    X_logistic = pd.get_dummies(X_logistic)
    scaler = StandardScaler()
    X_logistic_scaled = scaler.fit_transform(X_logistic)
    X_train_logistic, X_test_logistic, y_train_logistic, y_test_logistic = train_test_split(X_logistic_scaled, y_logistic, test_size=0.3, random_state=42)
    return X_train_logistic, X_test_logistic, y_train_logistic, y_test_logistic, scaler, X_logistic.columns

# Function to train and evaluate model
def train_and_evaluate_model(X_train, X_test, y_train, y_test):
    logistic_reg = LogisticRegression()
    logistic_reg.fit(X_train, y_train)
    y_pred_logistic = logistic_reg.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred_logistic, normalize=True)
    report = classification_report(y_test, y_pred_logistic)
    return logistic_reg, accuracy, report

# Load and prepare data
file_path = 'data_sucia_con_coordenadas.csv'
X_train, X_test, y_train, y_test, scaler, feature_columns = load_and_prepare_data(file_path)

# Train and evaluate the model
logistic_reg, accuracy, report = train_and_evaluate_model(X_train, X_test, y_train, y_test)
print(f"Accuracy: {accuracy}\nClassification Report: \n{report}")

# Function for Gradio Interface
def predict_resignation(banda, edad, hijos):
    input_df = pd.DataFrame([[banda, edad, hijos]], columns=['BANDA', 'EDAD', 'HIJOS'])
    input_df = pd.get_dummies(input_df)
    for column in feature_columns:
        if column not in input_df.columns:
            input_df[column] = 0
    input_df = input_df.reindex(columns=feature_columns, fill_value=0)
    input_scaled = scaler.transform(input_df)
    prediction = logistic_reg.predict(input_scaled)[0]
    return "Renuncia antes de 4 años poco probable" if prediction == 1 else "Renuncia antes de 4 años probable"

# BANDA options
banda_options = ['E3', 'DNO', 'DO', 'E4', 'SL1']

# Setup Gradio Interface
iface = gr.Interface(fn=predict_resignation, 
                     inputs=[gr.Dropdown(choices=banda_options, label="BANDA"), gr.Slider(label="EDAD", minimum=18, step=1), gr.Slider(label="HIJOS", minimum=0, maximum=10, step=1)], 
                     outputs=gr.Textbox(label="Prediction"),
                     title="Predictor de Renuncia Voluntaria",
                     description="Introduce los detalles del empleado para predecir si es más probable que renuncie antes de 4 años.")
iface.launch()


Accuracy: 0.7479892761394102
Classification Report: 
              precision    recall  f1-score   support

           0       0.78      0.81      0.80       225
           1       0.70      0.65      0.67       148

    accuracy                           0.75       373
   macro avg       0.74      0.73      0.73       373
weighted avg       0.75      0.75      0.75       373

Running on local URL:  http://127.0.0.1:7885

To create a public link, set `share=True` in `launch()`.


