<a href="https://colab.research.google.com/github/santiagonajera/Regresiones-Aplicaciones-en-Logistica/blob/main/RegresionesCodigo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagonajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar modelo de regresión lineal múltiple
model = LinearRegression()
model.fit(X_train, y_train)

# Realizar predicciones
y_pred = model.predict(X_test)

# Evaluar el modelo
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Error Cuadrático Medio (MSE): {mse:.2f}")
print(f"Raíz del Error Cuadrático Medio (RMSE): {rmse:.2f}")
print(f"Coeficiente de Determinación R²: {r2:.2f}")

# Mostrar coeficientes del modelo
coefficients = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
}).sort_values(by='Coefficient', ascending=False)

print("\nCoeficientes del modelo:")
print(coefficients)

Error Cuadrático Medio (MSE): 25.98
Raíz del Error Cuadrático Medio (RMSE): 5.10
Coeficiente de Determinación R²: 0.87

Coeficientes del modelo:
                  Feature  Coefficient
3              Dia_Semana     2.978725
6             Clima_Nieve     1.075449
0            Distancia_km     0.402274
1      Cantidad_Productos     0.199699
2             Hora_Pedido     0.143415
7           Clima_Soleado    -1.074598
9        Trafico_Moderado    -1.075760
8            Trafico_Leve    -1.472537
4  Tipo_Transporte_Camion    -1.497803
5    Tipo_Transporte_Moto    -3.101193


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (importante para SVR)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

# Escalar el target también
y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Entrenar modelo SVR
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)  # kernel='rbf' es común para problemas no lineales
svr_model.fit(X_train_scaled, y_train_scaled)

# Realizar predicciones
y_pred_scaled = svr_model.predict(X_test_scaled)

# Desescalar las predicciones para interpretarlas en la escala original
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
y_test_original = scaler_y.inverse_transform(y_test_scaled.reshape(-1, 1)).ravel()

# Evaluar el modelo
mse = mean_squared_error(y_test_original, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_original, y_pred)

print(f"Error Cuadrático Medio (MSE): {mse:.2f}")
print(f"Raíz del Error Cuadrático Medio (RMSE): {rmse:.2f}")
print(f"Coeficiente de Determinación R²: {r2:.2f}")

# Mostrar parámetros del modelo
print("\nParámetros del modelo SVR:")
print(f"Kernel: {svr_model.kernel}")
print(f"Valor de C: {svr_model.C}")
print(f"Valor de epsilon: {svr_model.epsilon}")

Error Cuadrático Medio (MSE): 26.88
Raíz del Error Cuadrático Medio (RMSE): 5.18
Coeficiente de Determinación R²: 0.86

Parámetros del modelo SVR:
Kernel: rbf
Valor de C: 1.0
Valor de epsilon: 0.1


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (importante para SVR)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

# Escalar el target también
y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Entrenar modelo SVR
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)  # kernel='rbf' es común para problemas no lineales
svr_model.fit(X_train_scaled, y_train_scaled)

# Realizar predicciones
y_pred_scaled = svr_model.predict(X_test_scaled)

# Desescalar las predicciones para interpretarlas en la escala original
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
y_test_original = scaler_y.inverse_transform(y_test_scaled.reshape(-1, 1)).ravel()

# Evaluar el modelo
mse = mean_squared_error(y_test_original, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_original, y_pred)

print(f"Error Cuadrático Medio (MSE): {mse:.2f}")
print(f"Raíz del Error Cuadrático Medio (RMSE): {rmse:.2f}")
print(f"Coeficiente de Determinación R²: {r2:.2f}")

# Interpretación de los resultados
print("\nInterpretación de los resultados:")
print("El modelo SVR no proporciona coeficientes explícitos como en la regresión lineal.")
print("Sin embargo, puedes analizar la importancia relativa de las variables usando técnicas adicionales.")

# Importancia relativa de las variables (aproximación)
# Calculamos la sensibilidad de las predicciones al cambiar cada variable
feature_importance = []
for i in range(X_train_scaled.shape[1]):
    X_temp = X_train_scaled.copy()
    X_temp[:, i] += 0.1  # Pequeño cambio en la variable i
    y_pred_temp = svr_model.predict(X_temp)
    importance = np.mean(np.abs(y_pred_temp - y_train_scaled))
    feature_importance.append(importance)

# Crear un DataFrame con la importancia relativa
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': feature_importance
}).sort_values(by='Importance', ascending=False)

print("\nImportancia relativa de las variables:")
print(importance_df)

Error Cuadrático Medio (MSE): 26.88
Raíz del Error Cuadrático Medio (RMSE): 5.18
Coeficiente de Determinación R²: 0.86

Interpretación de los resultados:
El modelo SVR no proporciona coeficientes explícitos como en la regresión lineal.
Sin embargo, puedes analizar la importancia relativa de las variables usando técnicas adicionales.

Importancia relativa de las variables:
                  Feature  Importance
1      Cantidad_Productos    0.283721
0            Distancia_km    0.278260
3              Dia_Semana    0.276785
4  Tipo_Transporte_Camion    0.276769
5    Tipo_Transporte_Moto    0.276707
8            Trafico_Leve    0.276670
9        Trafico_Moderado    0.276651
7           Clima_Soleado    0.276641
6             Clima_Nieve    0.276513
2             Hora_Pedido    0.276489


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (necesario para SVR)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Modelo 1: Regresión Lineal Múltiple
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)  # No necesita escalamiento
y_pred_linear = linear_model.predict(X_test)

# Evaluar modelo de regresión lineal
mse_linear = mean_squared_error(y_test, y_pred_linear)
rmse_linear = np.sqrt(mse_linear)
r2_linear = r2_score(y_test, y_pred_linear)

# Modelo 2: Support Vector Regression (SVR)
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr_model.fit(X_train_scaled, y_train_scaled)
y_pred_svr_scaled = svr_model.predict(X_test_scaled)
y_pred_svr = scaler_y.inverse_transform(y_pred_svr_scaled.reshape(-1, 1)).ravel()

# Evaluar modelo SVR
mse_svr = mean_squared_error(y_test, y_pred_svr)
rmse_svr = np.sqrt(mse_svr)
r2_svr = r2_score(y_test, y_pred_svr)

# Comparar los modelos
print("Comparación de modelos:")
print("-" * 50)
print(f"Regresión Lineal - MSE: {mse_linear:.2f}, RMSE: {rmse_linear:.2f}, R²: {r2_linear:.2f}")
print(f"SVR - MSE: {mse_svr:.2f}, RMSE: {rmse_svr:.2f}, R²: {r2_svr:.2f}")
print("-" * 50)

# Determinar el mejor modelo
if mse_linear < mse_svr and r2_linear > r2_svr:
    print("El mejor modelo es: Regresión Lineal")
elif mse_svr < mse_linear and r2_svr > r2_linear:
    print("El mejor modelo es: SVR")
else:
    print("Ambos modelos tienen un rendimiento similar. Considera otros factores como interpretabilidad o complejidad.")

# Interpretación adicional
print("\nInterpretación:")
print("- La Regresión Lineal es más interpretable porque proporciona coeficientes explícitos.")
print("- SVR puede capturar relaciones no lineales, pero es menos interpretable y más costoso computacionalmente.")

Comparación de modelos:
--------------------------------------------------
Regresión Lineal - MSE: 25.98, RMSE: 5.10, R²: 0.87
SVR - MSE: 26.88, RMSE: 5.18, R²: 0.86
--------------------------------------------------
El mejor modelo es: Regresión Lineal

Interpretación:
- La Regresión Lineal es más interpretable porque proporciona coeficientes explícitos.
- SVR puede capturar relaciones no lineales, pero es menos interpretable y más costoso computacionalmente.


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica2.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (necesario para SVR)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Modelo 1: Regresión Lineal Múltiple
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)  # No necesita escalamiento
y_pred_linear = linear_model.predict(X_test)

# Evaluar modelo de regresión lineal
mse_linear = mean_squared_error(y_test, y_pred_linear)
rmse_linear = np.sqrt(mse_linear)
r2_linear = r2_score(y_test, y_pred_linear)

# Modelo 2: Support Vector Regression (SVR)
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr_model.fit(X_train_scaled, y_train_scaled)
y_pred_svr_scaled = svr_model.predict(X_test_scaled)
y_pred_svr = scaler_y.inverse_transform(y_pred_svr_scaled.reshape(-1, 1)).ravel()

# Evaluar modelo SVR
mse_svr = mean_squared_error(y_test, y_pred_svr)
rmse_svr = np.sqrt(mse_svr)
r2_svr = r2_score(y_test, y_pred_svr)

# Comparar los modelos
print("Comparación de modelos:")
print("-" * 50)
print(f"Regresión Lineal - MSE: {mse_linear:.2f}, RMSE: {rmse_linear:.2f}, R²: {r2_linear:.2f}")
print(f"SVR - MSE: {mse_svr:.2f}, RMSE: {rmse_svr:.2f}, R²: {r2_svr:.2f}")
print("-" * 50)

# Determinar el mejor modelo
if mse_linear < mse_svr and r2_linear > r2_svr:
    print("El mejor modelo es: Regresión Lineal")
elif mse_svr < mse_linear and r2_svr > r2_linear:
    print("El mejor modelo es: SVR")
else:
    print("Ambos modelos tienen un rendimiento similar. Considera otros factores como interpretabilidad o complejidad.")

# Interpretación adicional
print("\nInterpretación:")
print("- La Regresión Lineal es más interpretable porque proporciona coeficientes explícitos.")
print("- SVR puede capturar relaciones no lineales, pero es menos interpretable y más costoso computacionalmente.")

Comparación de modelos:
--------------------------------------------------
Regresión Lineal - MSE: 27.93, RMSE: 5.28, R²: 0.86
SVR - MSE: 28.84, RMSE: 5.37, R²: 0.85
--------------------------------------------------
El mejor modelo es: Regresión Lineal

Interpretación:
- La Regresión Lineal es más interpretable porque proporciona coeficientes explícitos.
- SVR puede capturar relaciones no lineales, pero es menos interpretable y más costoso computacionalmente.


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar modelo Gradient Boosting
gb_model = GradientBoostingRegressor(
    n_estimators=100,       # Número de árboles
    learning_rate=0.1,      # Tasa de aprendizaje
    max_depth=3,            # Profundidad máxima de cada árbol
    random_state=42
)
gb_model.fit(X_train, y_train)

# Realizar predicciones
y_pred_gb = gb_model.predict(X_test)

# Evaluar el modelo
mse_gb = mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mse_gb)
r2_gb = r2_score(y_test, y_pred_gb)

print("Resultados del modelo Gradient Boosting:")
print(f"MSE: {mse_gb:.2f}")
print(f"RMSE: {rmse_gb:.2f}")
print(f"R²: {r2_gb:.2f}")

# Importancia de las variables
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': gb_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nImportancia de las variables según Gradient Boosting:")
print(importance_df)

Resultados del modelo Gradient Boosting:
MSE: 26.57
RMSE: 5.15
R²: 0.87

Importancia de las variables según Gradient Boosting:
                  Feature  Importance
1      Cantidad_Productos    0.769895
0            Distancia_km    0.194560
3              Dia_Semana    0.012976
5    Tipo_Transporte_Moto    0.007302
2             Hora_Pedido    0.006499
6             Clima_Nieve    0.002556
7           Clima_Soleado    0.002091
4  Tipo_Transporte_Camion    0.002007
8            Trafico_Leve    0.001151
9        Trafico_Moderado    0.000964


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (necesario para SVR)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Modelo 1: Regresión Lineal Múltiple
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

mse_linear = mean_squared_error(y_test, y_pred_linear)
rmse_linear = np.sqrt(mse_linear)
r2_linear = r2_score(y_test, y_pred_linear)

# Modelo 2: Support Vector Regression (SVR)
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr_model.fit(X_train_scaled, y_train_scaled)
y_pred_svr_scaled = svr_model.predict(X_test_scaled)
y_pred_svr = scaler_y.inverse_transform(y_pred_svr_scaled.reshape(-1, 1)).ravel()

mse_svr = mean_squared_error(y_test, y_pred_svr)
rmse_svr = np.sqrt(mse_svr)
r2_svr = r2_score(y_test, y_pred_svr)

# Modelo 3: Gradient Boosting
gb_model = GradientBoostingRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

mse_gb = mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mse_gb)
r2_gb = r2_score(y_test, y_pred_gb)

# Comparación de modelos
print("Comparación de modelos:")
print("-" * 50)
print(f"Regresión Lineal - MSE: {mse_linear:.2f}, RMSE: {rmse_linear:.2f}, R²: {r2_linear:.2f}")
print(f"SVR - MSE: {mse_svr:.2f}, RMSE: {rmse_svr:.2f}, R²: {r2_svr:.2f}")
print(f"Gradient Boosting - MSE: {mse_gb:.2f}, RMSE: {rmse_gb:.2f}, R²: {r2_gb:.2f}")
print("-" * 50)

# Determinar el mejor modelo
if mse_linear < mse_svr and mse_linear < mse_gb and r2_linear > r2_svr and r2_linear > r2_gb:
    print("El mejor modelo es: Regresión Lineal")
elif mse_svr < mse_linear and mse_svr < mse_gb and r2_svr > r2_linear and r2_svr > r2_gb:
    print("El mejor modelo es: SVR")
else:
    print("El mejor modelo es: Gradient Boosting")

# Resumen de importancia de variables (solo para Gradient Boosting)
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': gb_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nImportancia de las variables según Gradient Boosting:")
print(importance_df)

Comparación de modelos:
--------------------------------------------------
Regresión Lineal - MSE: 25.98, RMSE: 5.10, R²: 0.87
SVR - MSE: 26.88, RMSE: 5.18, R²: 0.86
Gradient Boosting - MSE: 26.57, RMSE: 5.15, R²: 0.87
--------------------------------------------------
El mejor modelo es: Regresión Lineal

Importancia de las variables según Gradient Boosting:
                  Feature  Importance
1      Cantidad_Productos    0.769895
0            Distancia_km    0.194560
3              Dia_Semana    0.012976
5    Tipo_Transporte_Moto    0.007302
2             Hora_Pedido    0.006499
6             Clima_Nieve    0.002556
7           Clima_Soleado    0.002091
4  Tipo_Transporte_Camion    0.002007
8            Trafico_Leve    0.001151
9        Trafico_Moderado    0.000964


In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica2.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (necesario para SVR)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Modelo 1: Regresión Lineal Múltiple
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

mse_linear = mean_squared_error(y_test, y_pred_linear)
rmse_linear = np.sqrt(mse_linear)
r2_linear = r2_score(y_test, y_pred_linear)

# Modelo 2: Support Vector Regression (SVR)
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr_model.fit(X_train_scaled, y_train_scaled)
y_pred_svr_scaled = svr_model.predict(X_test_scaled)
y_pred_svr = scaler_y.inverse_transform(y_pred_svr_scaled.reshape(-1, 1)).ravel()

mse_svr = mean_squared_error(y_test, y_pred_svr)
rmse_svr = np.sqrt(mse_svr)
r2_svr = r2_score(y_test, y_pred_svr)

# Modelo 3: Gradient Boosting
gb_model = GradientBoostingRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

mse_gb = mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mse_gb)
r2_gb = r2_score(y_test, y_pred_gb)

# Comparación de modelos
print("Comparación de modelos:")
print("-" * 50)
print(f"Regresión Lineal - MSE: {mse_linear:.2f}, RMSE: {rmse_linear:.2f}, R²: {r2_linear:.2f}")
print(f"SVR - MSE: {mse_svr:.2f}, RMSE: {rmse_svr:.2f}, R²: {r2_svr:.2f}")
print(f"Gradient Boosting - MSE: {mse_gb:.2f}, RMSE: {rmse_gb:.2f}, R²: {r2_gb:.2f}")
print("-" * 50)

# Determinar el mejor modelo
if mse_linear < mse_svr and mse_linear < mse_gb and r2_linear > r2_svr and r2_linear > r2_gb:
    print("El mejor modelo es: Regresión Lineal")
elif mse_svr < mse_linear and mse_svr < mse_gb and r2_svr > r2_linear and r2_svr > r2_gb:
    print("El mejor modelo es: SVR")
else:
    print("El mejor modelo es: Gradient Boosting")

# Resumen de importancia de variables (solo para Gradient Boosting)
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': gb_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nImportancia de las variables según Gradient Boosting:")
print(importance_df)

Comparación de modelos:
--------------------------------------------------
Regresión Lineal - MSE: 27.93, RMSE: 5.28, R²: 0.86
SVR - MSE: 28.84, RMSE: 5.37, R²: 0.85
Gradient Boosting - MSE: 28.40, RMSE: 5.33, R²: 0.86
--------------------------------------------------
El mejor modelo es: Regresión Lineal

Importancia de las variables según Gradient Boosting:
                Feature  Importance
1    Cantidad_Productos    0.777809
0          Distancia_km    0.196797
3            Dia_Semana    0.013182
2           Hora_Pedido    0.006589
5         Clima_Soleado    0.003281
6          Trafico_Leve    0.001204
7      Trafico_Moderado    0.001089
4  Tipo_Transporte_Moto    0.000049


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica3.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (necesario para SVR)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Modelo 1: Regresión Lineal Múltiple
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

mse_linear = mean_squared_error(y_test, y_pred_linear)
rmse_linear = np.sqrt(mse_linear)
r2_linear = r2_score(y_test, y_pred_linear)

# Modelo 2: Support Vector Regression (SVR)
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr_model.fit(X_train_scaled, y_train_scaled)
y_pred_svr_scaled = svr_model.predict(X_test_scaled)
y_pred_svr = scaler_y.inverse_transform(y_pred_svr_scaled.reshape(-1, 1)).ravel()

mse_svr = mean_squared_error(y_test, y_pred_svr)
rmse_svr = np.sqrt(mse_svr)
r2_svr = r2_score(y_test, y_pred_svr)

# Modelo 3: Gradient Boosting
gb_model = GradientBoostingRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

mse_gb = mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mse_gb)
r2_gb = r2_score(y_test, y_pred_gb)

# Comparación de modelos
print("Comparación de modelos:")
print("-" * 50)
print(f"Regresión Lineal - MSE: {mse_linear:.2f}, RMSE: {rmse_linear:.2f}, R²: {r2_linear:.2f}")
print(f"SVR - MSE: {mse_svr:.2f}, RMSE: {rmse_svr:.2f}, R²: {r2_svr:.2f}")
print(f"Gradient Boosting - MSE: {mse_gb:.2f}, RMSE: {rmse_gb:.2f}, R²: {r2_gb:.2f}")
print("-" * 50)

# Determinar el mejor modelo
if mse_linear < mse_svr and mse_linear < mse_gb and r2_linear > r2_svr and r2_linear > r2_gb:
    print("El mejor modelo es: Regresión Lineal")
elif mse_svr < mse_linear and mse_svr < mse_gb and r2_svr > r2_linear and r2_svr > r2_gb:
    print("El mejor modelo es: SVR")
else:
    print("El mejor modelo es: Gradient Boosting")

# Resumen de importancia de variables (solo para Gradient Boosting)
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': gb_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nImportancia de las variables según Gradient Boosting:")
print(importance_df)

Comparación de modelos:
--------------------------------------------------
Regresión Lineal - MSE: 30.01, RMSE: 5.48, R²: 0.83
SVR - MSE: 29.80, RMSE: 5.46, R²: 0.83
Gradient Boosting - MSE: 28.99, RMSE: 5.38, R²: 0.83
--------------------------------------------------
El mejor modelo es: Gradient Boosting

Importancia de las variables según Gradient Boosting:
                Feature  Importance
1    Cantidad_Productos    0.790385
0          Distancia_km    0.184194
3            Dia_Semana    0.012803
2           Hora_Pedido    0.006890
5         Clima_Soleado    0.003485
6      Trafico_Moderado    0.002188
4  Tipo_Transporte_Moto    0.000054


In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Entrenar modelo XGBoost
xgb_model = XGBRegressor(
    n_estimators=100,       # Número de árboles
    learning_rate=0.1,      # Tasa de aprendizaje
    max_depth=3,            # Profundidad máxima de cada árbol
    objective='reg:squarederror',  # Función objetivo para regresión
    random_state=42
)
xgb_model.fit(X_train, y_train)

# Realizar predicciones
y_pred_xgb = xgb_model.predict(X_test)

# Evaluar el modelo
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
rmse_xgb = np.sqrt(mse_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print("Resultados del modelo XGBoost:")
print(f"MSE: {mse_xgb:.2f}")
print(f"RMSE: {rmse_xgb:.2f}")
print(f"R²: {r2_xgb:.2f}")

# Importancia de las variables
importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': xgb_model.feature_importances_
}).sort_values(by='Importance', ascending=False)

print("\nImportancia de las variables según XGBoost:")
print(importance_df)

Resultados del modelo XGBoost:
MSE: 26.55
RMSE: 5.15
R²: 0.87

Importancia de las variables según XGBoost:
                  Feature  Importance
1      Cantidad_Productos    0.626693
0            Distancia_km    0.180773
3              Dia_Semana    0.051328
5    Tipo_Transporte_Moto    0.038378
6             Clima_Nieve    0.024331
4  Tipo_Transporte_Camion    0.022763
7           Clima_Soleado    0.015793
9        Trafico_Moderado    0.014621
2             Hora_Pedido    0.014367
8            Trafico_Leve    0.010952


In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (necesario para SVR)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Modelo 1: Regresión Lineal Múltiple
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

mse_linear = mean_squared_error(y_test, y_pred_linear)
rmse_linear = np.sqrt(mse_linear)
r2_linear = r2_score(y_test, y_pred_linear)

# Modelo 2: Support Vector Regression (SVR)
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)
svr_model.fit(X_train_scaled, y_train_scaled)
y_pred_svr_scaled = svr_model.predict(X_test_scaled)
y_pred_svr = scaler_y.inverse_transform(y_pred_svr_scaled.reshape(-1, 1)).ravel()

mse_svr = mean_squared_error(y_test, y_pred_svr)
rmse_svr = np.sqrt(mse_svr)
r2_svr = r2_score(y_test, y_pred_svr)

# Modelo 3: Gradient Boosting
gb_model = GradientBoostingRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

mse_gb = mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mse_gb)
r2_gb = r2_score(y_test, y_pred_gb)

# Modelo 4: XGBoost
xgb_model = XGBRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    objective='reg:squarederror',
    random_state=42
)
xgb_model.fit(X_train, y_train)
y_pred_xgb = xgb_model.predict(X_test)

mse_xgb = mean_squared_error(y_test, y_pred_xgb)
rmse_xgb = np.sqrt(mse_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

# Comparación de modelos
print("Comparación de modelos:")
print("-" * 50)
print(f"Regresión Lineal - MSE: {mse_linear:.2f}, RMSE: {rmse_linear:.2f}, R²: {r2_linear:.2f}")
print(f"SVR - MSE: {mse_svr:.2f}, RMSE: {rmse_svr:.2f}, R²: {r2_svr:.2f}")
print(f"Gradient Boosting - MSE: {mse_gb:.2f}, RMSE: {rmse_gb:.2f}, R²: {r2_gb:.2f}")
print(f"XGBoost - MSE: {mse_xgb:.2f}, RMSE: {rmse_xgb:.2f}, R²: {r2_xgb:.2f}")
print("-" * 50)

# Determinar el mejor modelo
if mse_linear < mse_svr and mse_linear < mse_gb and mse_linear < mse_xgb and r2_linear > r2_svr and r2_linear > r2_gb and r2_linear > r2_xgb:
    print("El mejor modelo es: Regresión Lineal")
elif mse_svr < mse_linear and mse_svr < mse_gb and mse_svr < mse_xgb and r2_svr > r2_linear and r2_svr > r2_gb and r2_svr > r2_xgb:
    print("El mejor modelo es: SVR")
elif mse_gb < mse_linear and mse_gb < mse_svr and mse_gb < mse_xgb and r2_gb > r2_linear and r2_gb > r2_svr and r2_gb > r2_xgb:
    print("El mejor modelo es: Gradient Boosting")
else:
    print("El mejor modelo es: XGBoost")

# Resumen de importancia de variables (solo para Gradient Boosting y XGBoost)
importance_gb_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance_GB': gb_model.feature_importances_
}).sort_values(by='Importance_GB', ascending=False)

importance_xgb_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance_XGB': xgb_model.feature_importances_
}).sort_values(by='Importance_XGB', ascending=False)

print("\nImportancia de las variables según Gradient Boosting:")
print(importance_gb_df)

print("\nImportancia de las variables según XGBoost:")
print(importance_xgb_df)

Comparación de modelos:
--------------------------------------------------
Regresión Lineal - MSE: 25.98, RMSE: 5.10, R²: 0.87
SVR - MSE: 26.88, RMSE: 5.18, R²: 0.86
Gradient Boosting - MSE: 26.57, RMSE: 5.15, R²: 0.87
XGBoost - MSE: 26.55, RMSE: 5.15, R²: 0.87
--------------------------------------------------
El mejor modelo es: Regresión Lineal

Importancia de las variables según Gradient Boosting:
                  Feature  Importance_GB
1      Cantidad_Productos       0.769895
0            Distancia_km       0.194560
3              Dia_Semana       0.012976
5    Tipo_Transporte_Moto       0.007302
2             Hora_Pedido       0.006499
6             Clima_Nieve       0.002556
7           Clima_Soleado       0.002091
4  Tipo_Transporte_Camion       0.002007
8            Trafico_Leve       0.001151
9        Trafico_Moderado       0.000964

Importancia de las variables según XGBoost:
                  Feature  Importance_XGB
1      Cantidad_Productos        0.626693
0            Di

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica3.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (necesario para SVR y KNN)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Función para evaluar modelos
def evaluate_model(model, X_train, X_test, y_train, y_test, scaled=False):
    if scaled:
        model.fit(X_train_scaled, y_train_scaled)
        y_pred_scaled = model.predict(X_test_scaled)
        y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    return mse, rmse, r2

# Entrenar y evaluar modelos
models = {
    "Regresión Lineal": LinearRegression(),
    "SVR": SVR(kernel='rbf', C=1.0, epsilon=0.1),
    "KNN": KNeighborsRegressor(n_neighbors=5),
    "Decision Tree": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, objective='reg:squarederror', random_state=42),
}

results = []
for name, model in models.items():
    print(f"Entrenando {name}...")
    scaled = name in ["SVR", "KNN"]  # Escalar solo para SVR y KNN
    mse, rmse, r2 = evaluate_model(model, X_train, X_test, y_train, y_test, scaled=scaled)
    results.append((name, mse, rmse, r2))
    print(f"{name} - MSE: {mse:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")

# Modelo de Red Neuronal (Deep Learning)
print("Entrenando Red Neuronal...")
nn_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # Capa de salida para regresión
])
nn_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
nn_model.fit(X_train_scaled, y_train_scaled, epochs=50, batch_size=32, verbose=0)

y_pred_nn_scaled = nn_model.predict(X_test_scaled).ravel()
y_pred_nn = scaler_y.inverse_transform(y_pred_nn_scaled.reshape(-1, 1)).ravel()
mse_nn = mean_squared_error(y_test, y_pred_nn)
rmse_nn = np.sqrt(mse_nn)
r2_nn = r2_score(y_test, y_pred_nn)
results.append(("Red Neuronal", mse_nn, rmse_nn, r2_nn))
print(f"Red Neuronal - MSE: {mse_nn:.2f}, RMSE: {rmse_nn:.2f}, R²: {r2_nn:.2f}")

# Comparación de modelos
print("\nComparación de modelos:")
print("-" * 50)
for name, mse, rmse, r2 in results:
    print(f"{name} - MSE: {mse:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")
print("-" * 50)

# Determinar el mejor modelo
best_model = min(results, key=lambda x: x[1])  # Menor MSE
print(f"\nEl mejor modelo es: {best_model[0]} con MSE: {best_model[1]:.2f}, RMSE: {best_model[2]:.2f}, R²: {best_model[3]:.2f}")

Entrenando Regresión Lineal...
Regresión Lineal - MSE: 30.01, RMSE: 5.48, R²: 0.83
Entrenando SVR...
SVR - MSE: 29.80, RMSE: 5.46, R²: 0.83
Entrenando KNN...
KNN - MSE: 35.75, RMSE: 5.98, R²: 0.79
Entrenando Decision Tree...
Decision Tree - MSE: 60.04, RMSE: 7.75, R²: 0.65
Entrenando Random Forest...
Random Forest - MSE: 33.04, RMSE: 5.75, R²: 0.81
Entrenando Gradient Boosting...
Gradient Boosting - MSE: 28.99, RMSE: 5.38, R²: 0.83
Entrenando XGBoost...
XGBoost - MSE: 28.94, RMSE: 5.38, R²: 0.83
Entrenando Red Neuronal...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Red Neuronal - MSE: 29.33, RMSE: 5.42, R²: 0.83

Comparación de modelos:
--------------------------------------------------
Regresión Lineal - MSE: 30.01, RMSE: 5.48, R²: 0.83
SVR - MSE: 29.80, RMSE: 5.46, R²: 0.83
KNN - MSE: 35.75, RMSE: 5.98, R²: 0.79
Decision Tree - MSE: 60.04, RMSE: 7.75, R²: 0.65
Random Forest - MSE: 33.04, RMSE: 5.75, R²: 0.81
Gradient Boosting - MSE: 28.99, RMSE: 5.38, R²: 0.83
XGBoost - MSE: 28.94, RMSE: 5.38, R²: 0.83
Red Neuronal - MSE: 29.33, RMSE: 5.42, R²: 0.83
--------------------------------------------------

El mejor modelo es: XGBoost con MSE: 28.94, RMSE: 5.38, R²: 0.83


In [15]:
# Verificar e instalar bibliotecas necesarias
import sys
import subprocess

def install_and_import(package):
    try:
        __import__(package)
    except ImportError:
        print(f"Instalando {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    finally:
        globals()[package] = __import__(package)

# Lista de bibliotecas necesarias
required_packages = ["xgboost", "lightgbm", "catboost", "tensorflow"]

for package in required_packages:
    install_and_import(package)

# Continuar con el código principal
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Resto del código...

Instalando catboost...


In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.naive_bayes import GaussianNB
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Cargar los datos
url = 'https://github.com/santiagoNajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica.csv'
data = pd.read_csv(url)

# Preprocesamiento de datos
# Convertir variables categóricas a numéricas con one-hot encoding
data = pd.get_dummies(data, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Separar features y target
X = data.drop('Tiempo_Entrega', axis=1)
y = data['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar las características (necesario para algunos modelos)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Función para evaluar modelos
def evaluate_model(model, X_train, X_test, y_train, y_test, scaled=False):
    if scaled:
        model.fit(X_train_scaled, y_train_scaled)
        y_pred_scaled = model.predict(X_test_scaled)
        y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    return mse, rmse, r2

# Entrenar y evaluar modelos
models = {
    "Regresión Lineal": LinearRegression(),
    "SVR": SVR(kernel='rbf', C=1.0, epsilon=0.1),
    "KNN": KNeighborsRegressor(n_neighbors=5),
    "Árbol de Decisión": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "AdaBoost": AdaBoostRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, objective='reg:squarederror', random_state=42),
    "LightGBM": LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "CatBoost": CatBoostRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, verbose=0, random_state=42),
}

results = []
for name, model in models.items():
    print(f"Entrenando {name}...")
    scaled = name in ["SVR", "KNN"]  # Escalar solo para SVR y KNN
    mse, rmse, r2 = evaluate_model(model, X_train, X_test, y_train, y_test, scaled=scaled)
    results.append((name, mse, rmse, r2))
    print(f"{name} - MSE: {mse:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")

# Modelo de Red Neuronal (Deep Learning)
print("Entrenando Red Neuronal...")
nn_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # Capa de salida para regresión
])
nn_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
nn_model.fit(X_train_scaled, y_train_scaled, epochs=50, batch_size=32, verbose=0)

y_pred_nn_scaled = nn_model.predict(X_test_scaled).ravel()
y_pred_nn = scaler_y.inverse_transform(y_pred_nn_scaled.reshape(-1, 1)).ravel()
mse_nn = mean_squared_error(y_test, y_pred_nn)
rmse_nn = np.sqrt(mse_nn)
r2_nn = r2_score(y_test, y_pred_nn)
results.append(("Red Neuronal", mse_nn, rmse_nn, r2_nn))
print(f"Red Neuronal - MSE: {mse_nn:.2f}, RMSE: {rmse_nn:.2f}, R²: {r2_nn:.2f}")

# Comparación de modelos
print("\nComparación de modelos:")
print("-" * 50)
for name, mse, rmse, r2 in results:
    print(f"{name} - MSE: {mse:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")
print("-" * 50)

# Determinar el mejor modelo
best_model = min(results, key=lambda x: x[1])  # Menor MSE
print(f"\nEl mejor modelo es: {best_model[0]} con MSE: {best_model[1]:.2f}, RMSE: {best_model[2]:.2f}, R²: {best_model[3]:.2f}")

ModuleNotFoundError: No module named 'catboost'

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Cargar los datos originales para entrenamiento
url_train = 'https://github.com/santiagonajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica3.csv'
data_train = pd.read_csv(url_train)

# Cargar los datos nuevos para predicción
url_predict = 'https://github.com/santiagonajera/Regresiones-Aplicaciones-en-Logistica/raw/refs/heads/main/transporte_datos_logistica_por_pred.csv'
data_predict = pd.read_csv(url_predict)

# Preprocesamiento de datos (entrenamiento)
data_train = pd.get_dummies(data_train, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)
X_train_full = data_train.drop('Tiempo_Entrega', axis=1)
y_train_full = data_train['Tiempo_Entrega']

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=42)

# Escalar las características (necesario para algunos modelos)
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

y_train_scaled = scaler_y.fit_transform(np.array(y_train).reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(np.array(y_test).reshape(-1, 1)).ravel()

# Función para evaluar modelos
def evaluate_model(model, X_train, X_test, y_train, y_test, scaled=False):
    if scaled:
        model.fit(X_train_scaled, y_train_scaled)
        y_pred_scaled = model.predict(X_test_scaled)
        y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    return mse, rmse, r2

# Entrenar y evaluar modelos
models = {
    "Regresión Lineal": LinearRegression(),
    "SVR": SVR(kernel='rbf', C=1.0, epsilon=0.1),
    "KNN": KNeighborsRegressor(n_neighbors=5),
    "Árbol de Decisión": DecisionTreeRegressor(random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "AdaBoost": AdaBoostRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, objective='reg:squarederror', random_state=42),
    "LightGBM": LGBMRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "CatBoost": CatBoostRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, verbose=0, random_state=42),
}

results = []
for name, model in models.items():
    print(f"Entrenando {name}...")
    scaled = name in ["SVR", "KNN"]  # Escalar solo para SVR y KNN
    mse, rmse, r2 = evaluate_model(model, X_train, X_test, y_train, y_test, scaled=scaled)
    results.append((name, mse, rmse, r2))
    print(f"{name} - MSE: {mse:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")

# Modelo de Red Neuronal (Deep Learning)
print("Entrenando Red Neuronal...")
nn_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # Capa de salida para regresión
])
nn_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
nn_model.fit(X_train_scaled, y_train_scaled, epochs=50, batch_size=32, verbose=0)

y_pred_nn_scaled = nn_model.predict(X_test_scaled).ravel()
y_pred_nn = scaler_y.inverse_transform(y_pred_nn_scaled.reshape(-1, 1)).ravel()
mse_nn = mean_squared_error(y_test, y_pred_nn)
rmse_nn = np.sqrt(mse_nn)
r2_nn = r2_score(y_test, y_pred_nn)
results.append(("Red Neuronal", mse_nn, rmse_nn, r2_nn))
print(f"Red Neuronal - MSE: {mse_nn:.2f}, RMSE: {rmse_nn:.2f}, R²: {r2_nn:.2f}")

# Comparación de modelos
print("\nComparación de modelos:")
print("-" * 50)
for name, mse, rmse, r2 in results:
    print(f"{name} - MSE: {mse:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}")
print("-" * 50)

# Determinar el mejor modelo
best_model_info = min(results, key=lambda x: x[1])  # Menor MSE
best_model_name = best_model_info[0]
best_model_mse = best_model_info[1]
best_model_rmse = best_model_info[2]
best_model_r2 = best_model_info[3]

print(f"\nEl mejor modelo es: {best_model_name} con MSE: {best_model_mse:.2f}, RMSE: {best_model_rmse:.2f}, R²: {best_model_r2:.2f}")

# Aplicar el mejor modelo a los datos nuevos
# Preprocesamiento de datos nuevos
data_predict = pd.get_dummies(data_predict, columns=['Tipo_Transporte', 'Clima', 'Trafico'], drop_first=True)

# Asegurar que las columnas coincidan con las del conjunto de entrenamiento
missing_cols = set(X_train_full.columns) - set(data_predict.columns)
for col in missing_cols:
    data_predict[col] = 0  # Agregar columnas faltantes con valor 0
data_predict = data_predict[X_train_full.columns]  # Reordenar columnas

# Escalar los datos nuevos si es necesario
scaled_data_predict = scaler_X.transform(data_predict)

# Seleccionar y aplicar el mejor modelo
if best_model_name == "Red Neuronal":
    predictions = nn_model.predict(scaled_data_predict).ravel()
    predictions = scaler_y.inverse_transform(predictions.reshape(-1, 1)).ravel()
else:
    best_model = [model for name, model in models.items() if name == best_model_name][0]
    if best_model_name in ["SVR", "KNN"]:
        predictions = best_model.predict(scaled_data_predict)
        predictions = scaler_y.inverse_transform(predictions.reshape(-1, 1)).ravel()
    else:
        predictions = best_model.predict(data_predict)

# Mostrar las predicciones
data_predict['Tiempo_Entrega_Predicho'] = predictions
print("\nPredicciones de Tiempo_Entrega para los datos nuevos:")
print(data_predict[['Tiempo_Entrega_Predicho']])

Entrenando Regresión Lineal...
Regresión Lineal - MSE: 30.01, RMSE: 5.48, R²: 0.83
Entrenando SVR...
SVR - MSE: 29.80, RMSE: 5.46, R²: 0.83
Entrenando KNN...
KNN - MSE: 35.75, RMSE: 5.98, R²: 0.79
Entrenando Árbol de Decisión...
Árbol de Decisión - MSE: 60.04, RMSE: 7.75, R²: 0.65
Entrenando Random Forest...
Random Forest - MSE: 33.04, RMSE: 5.75, R²: 0.81
Entrenando Gradient Boosting...
Gradient Boosting - MSE: 28.99, RMSE: 5.38, R²: 0.83
Entrenando AdaBoost...
AdaBoost - MSE: 35.25, RMSE: 5.94, R²: 0.79
Entrenando XGBoost...
XGBoost - MSE: 28.94, RMSE: 5.38, R²: 0.83
Entrenando LightGBM...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000608 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 447
[LightGBM] [Info] Number of data points in the train set: 13200, number of used features: 7
[LightGBM] [Info] Start training from score 60.595

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Red Neuronal - MSE: 29.61, RMSE: 5.44, R²: 0.83

Comparación de modelos:
--------------------------------------------------
Regresión Lineal - MSE: 30.01, RMSE: 5.48, R²: 0.83
SVR - MSE: 29.80, RMSE: 5.46, R²: 0.83
KNN - MSE: 35.75, RMSE: 5.98, R²: 0.79
Árbol de Decisión - MSE: 60.04, RMSE: 7.75, R²: 0.65
Random Forest - MSE: 33.04, RMSE: 5.75, R²: 0.81
Gradient Boosting - MSE: 28.99, RMSE: 5.38, R²: 0.83
AdaBoost - MSE: 35.25, RMSE: 5.94, R²: 0.79
XGBoost - MSE: 28.94, RMSE: 5.38, R²: 0.83
LightGBM - MSE: 28.93, RMSE: 5.38, R²: 0.83
CatBoost - MSE: 29.02, RMSE: 5.39, R²: 0.83
Red Neuronal - MSE: 29.61, RMSE: 5.44, R²: 0.83
--------------------------------------------------

El mejor modelo es: LightGBM con MSE: 28.93, RMSE: 5.38, R²: 0.83

Predicciones de Tiempo_Entrega para los datos nuevos:
   Tiempo_Entrega_Predicho
0                48.592036
1                65.620493
2                70.384636
3           