Limpieza de datos requerida para que el algoritmo funcione

In [19]:
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

df = pd.read_csv('../datos/Food_Delivery_Times.csv')

df = df.dropna(subset=['Delivery_Time_min'])

train_df, temp_df = train_test_split(df, test_size=0.4, random_state=42)
test_df, validation_df = train_test_split(temp_df, test_size=0.5, random_state=42)

target = 'Delivery_Time_min'
drop_cols = [target, 'Order_ID']

train_x = train_df.drop(columns=drop_cols, errors='ignore')
train_y = train_df[target]

test_x = test_df.drop(columns=drop_cols, errors='ignore')
test_y = test_df[target]

validation_x = validation_df.drop(columns=drop_cols, errors='ignore')
validation_y = validation_df[target]

train_x = pd.get_dummies(train_x, drop_first=True)
test_x = pd.get_dummies(test_x, drop_first=True)
validation_x = pd.get_dummies(validation_x, drop_first=True)

train_x, test_x = train_x.align(test_x, join='left', axis=1, fill_value=0)
train_x, validation_x = train_x.align(validation_x, join='left', axis=1, fill_value=0)

train_x = train_x.fillna(0)
test_x = test_x.fillna(0)
validation_x = validation_x.fillna(0)



Error que se medirá en el algoritmo lasso:
 MSE: este error calcula la diferencia entre un valor real y un valor estimado o predicho, mide que tan diferentes son los unos de los otro
 se usa en lasso porque dicho algoritmo minimiza el error cuadratico


Variable dependiente: minutos que se demora la entrega.
Variables independientes: distancia.
factores controlables: Vehicle_Type,Preparation_Time_min, distance
factores no-controlables: weather, traffic_level

hiper-parametros:
alpha: fuerza de la penalización [0.001, 0.01, 0.1]
max_iter: numero de iteraciones [1000, 5000, 10000]
tol:  tolerancia de parada [1e-3, 1e-4, 1e-5]


In [20]:
results = []
alpha_values = [0.001, 0.01, 0.1]
max_iter_values = [1000,2000,3000]
tol_values = [1e-3, 1e-4, 1e-5]

In [21]:
for alpha in alpha_values:
    for max_iter in max_iter_values:
        for tol in tol_values:
            model = Lasso(alpha=alpha, max_iter=max_iter, tol=tol, random_state=42)
            model.fit(train_x, train_y)

            validation_pred = model.predict(validation_x)

            mse = mean_squared_error(validation_y, validation_pred)
            r2 = r2_score(validation_y, validation_pred)

            results.append({
                'alpha': alpha,
                'max_iter': max_iter,
                'tol': tol,
                'MSE': mse,
                'R2': r2
            })


In [23]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,alpha,max_iter,tol,MSE,R2
0,0.001,1000,0.001,112.617358,0.750812
1,0.001,1000,0.0001,112.617283,0.750812
2,0.001,1000,1e-05,112.617273,0.750812
3,0.001,2000,0.001,112.617358,0.750812
4,0.001,2000,0.0001,112.617283,0.750812
5,0.001,2000,1e-05,112.617273,0.750812
6,0.001,3000,0.001,112.617358,0.750812
7,0.001,3000,0.0001,112.617283,0.750812
8,0.001,3000,1e-05,112.617273,0.750812
9,0.01,1000,0.001,112.700085,0.750629
