In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine, text
from getpass import getpass
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import math
import scipy.stats as stats
from scipy.stats import spearmanr
from sklearn.ensemble import RandomForestRegressor
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import KBinsDiscretizer
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
# Connection parameters to PostgreSQL
DB_USER = "postgres"         
DB_PASSWORD = getpass()
DB_HOST = "localhost"         
DB_PORT = "5432"             
DB_NAME = "ecommerce"         

# Create connection
engine = create_engine(f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}')

 ········


In [6]:
df = pd.read_sql('''
SELECT *
FROM order_sample_ready_to_ml_features_fixed
''', engine).astype(float)

df.delivery_time = df.delivery_time.clip(lower=0)
df.estimated_delivery_days = df.estimated_delivery_days.clip(lower=0)

df.head()

Unnamed: 0,category_share_other_category_sum,sel2cust_dist,category_count_moveis_decoracao_sum,avg_review_score_bin,product_width_cm_min,category_share_utilidades_domesticas_sum,category_share_informatica_acessorios_sum,product_height_cm_min,category_count_beleza_saude_sum,product_name_lenght_std,...,payment_installments,category_count_telefonia_sum,estimated_delivery_days,category_count_esporte_lazer_sum,category_count_cama_mesa_banho_sum,price_min,category_count_other_category_sum,freight_value_min,category_count_automotivo_sum,delivery_time
0,1.0,0.0,0.0,2.0,14.0,0.0,0.0,9.0,0.0,0.0,...,2.0,0.0,15.0,0.0,0.0,58.9,1.0,13.29,0.0,7.0
1,1.0,5.51012,0.0,1.0,40.0,0.0,0.0,30.0,0.0,0.0,...,3.0,0.0,18.0,0.0,0.0,239.9,1.0,19.93,0.0,16.0
2,0.0,2.865357,1.0,1.0,33.0,0.0,0.0,13.0,0.0,0.0,...,5.0,0.0,21.0,0.0,0.0,199.0,0.0,17.87,0.0,8.0
3,1.0,2.652272,0.0,1.0,15.0,0.0,0.0,10.0,0.0,0.0,...,2.0,0.0,11.0,0.0,0.0,12.99,1.0,12.79,0.0,6.0
4,0.0,6.316069,0.0,1.0,30.0,0.0,0.0,40.0,0.0,0.0,...,3.0,0.0,40.0,0.0,0.0,199.9,0.0,18.14,0.0,25.0


In [28]:
X = df.drop(columns=["delivery_time"])  
y = df["delivery_time"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [9]:
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)  # Выходной слой для регрессии
])

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01),  
              loss='mse',
              metrics=['mae'])

history = model.fit(X_train_scaled, y_train,  
                    validation_data=(X_test_scaled, y_test),  
                    epochs=50, batch_size=32, verbose=1)


# Train metrics
y_pred = np.clip(model.predict(X_train_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_train), np.log(1+y_pred))

print(f"Metrics Train (simple NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

# Test metrics
y_pred = np.clip(model.predict(X_test_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_test), np.log(1+y_pred))

print(f"\nMetrics Test (simple NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 617us/step - loss: 83.0671 - mae: 5.6558 - val_loss: 68.8673 - val_mae: 5.0769
Epoch 2/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 592us/step - loss: 72.8703 - mae: 5.3077 - val_loss: 66.4605 - val_mae: 5.3358
Epoch 3/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 597us/step - loss: 69.9392 - mae: 5.2959 - val_loss: 65.8847 - val_mae: 5.1747
Epoch 4/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 602us/step - loss: 74.7268 - mae: 5.3257 - val_loss: 66.4892 - val_mae: 5.0854
Epoch 5/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 603us/step - loss: 70.7190 - mae: 5.2644 - val_loss: 66.6240 - val_mae: 5.0966
Epoch 6/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 606us/step - loss: 69.6320 - mae: 5.2680 - val_loss: 65.4442 - val_mae: 5.1633
Epoch 7/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

### Not so good, xgb is better so far. Let's upgrade the model

In [12]:
# Improved NN
model = keras.Sequential([
    layers.Dense(256, input_shape=(X_train.shape[1],)),
    layers.LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(128),
    layers.LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    layers.Dense(64),
    layers.LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    layers.Dense(32),
    layers.LeakyReLU(alpha=0.1),
    layers.BatchNormalization(),

    layers.Dense(1)  # Outer layer
])


model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),  
              loss=keras.losses.Huber(),
              metrics=['mae'])

# Callbacks 
early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-6)

history = model.fit(X_train_scaled, y_train,  
                    validation_data=(X_test_scaled, y_test),  
                    epochs=20, batch_size=32, verbose=1,
                    callbacks=[early_stop, reduce_lr]) # reduce number of epochs since we don't need so much (see prev NN training log)

# Train metrics
y_pred = np.clip(model.predict(X_train_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_train), np.log(1+y_pred))

print(f"Metrics Train (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

# Test metrics
y_pred = np.clip(model.predict(X_test_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_test), np.log(1+y_pred))

print(f"\nMetrics Test (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 6.5601 - mae: 7.0395 - val_loss: 4.4286 - val_mae: 4.8985 - learning_rate: 0.0010
Epoch 2/20
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.5566 - mae: 5.0285 - val_loss: 4.4235 - val_mae: 4.8945 - learning_rate: 0.0010
Epoch 3/20
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.5465 - mae: 5.0173 - val_loss: 4.4181 - val_mae: 4.8886 - learning_rate: 0.0010
Epoch 4/20
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.5163 - mae: 4.9867 - val_loss: 4.3995 - val_mae: 4.8693 - learning_rate: 0.0010
Epoch 5/20
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.4871 - mae: 4.9580 - val_loss: 4.3951 - val_mae: 4.8666 - learning_rate: 0.0010
Epoch 6/20
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.4895 - mae: 4.9594 - va

## Even better than XGboost and more stability. Out best model

In [15]:
score = np.clip(model.predict(scaler.fit_transform(X)), a_min=0, a_max=None)
score_df = pd.DataFrame(score, columns=['best_NN_score_alpha1'])
score_df.to_csv('./scores/best_NN_score_full_X_alpha1.csv', index=True)

[1m3012/3012[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 316us/step


# Let's try to add a penalties for underestimation of delivery_time, since it's more important to business not to underestimate (to avoid negative reviews which is our final goal)

In [17]:
import tensorflow.keras.backend as K

def asymmetric_huber(delta=1.0, alpha=2.0):
    """
    Assymetric Huber Loss: penalty for underestiation is higher than for overestimation.
    
    delta - threshold for Huber Loss
    alpha - penalty coeff (the higher it is, the more penalty for underestimation)
    """
    def loss(y_true, y_pred):
        error = y_true - y_pred
        abs_error = K.abs(error)
        
        # Huber loss standart
        loss = K.switch(abs_error < delta, 
                        0.5 * K.square(error),  
                        delta * (abs_error - 0.5 * delta)) 
        
        # Underestimation penalty
        loss = K.switch(error > 0, alpha * loss, loss)  # error > 0 -> underestimation, increase penalty
        
        return K.mean(loss)
    
    return loss

# Try Alpha = 2 first
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),  
              loss=asymmetric_huber(delta=1.0, alpha=2.0),  
              metrics=['mae'])
# Callbacks 
early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-6)

history = model.fit(X_train_scaled, y_train,  
                    validation_data=(X_test_scaled, y_test),  
                    epochs=50, batch_size=32, verbose=1,
                    callbacks=[early_stop, reduce_lr])

# Train metrics
y_pred = np.clip(model.predict(X_train_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_train), np.log(1+y_pred))

print(f"Metrics Train (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

# Test metrics
y_pred = np.clip(model.predict(X_test_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_test), np.log(1+y_pred))

print(f"\nMetrics Test (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")


Epoch 1/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 7.0638 - mae: 5.2879 - val_loss: 6.9329 - val_mae: 5.2897 - learning_rate: 0.0010
Epoch 2/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 7.0445 - mae: 5.2993 - val_loss: 6.9156 - val_mae: 5.1846 - learning_rate: 0.0010
Epoch 3/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 7.0264 - mae: 5.2825 - val_loss: 6.9163 - val_mae: 5.1895 - learning_rate: 0.0010
Epoch 4/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 7.0958 - mae: 5.3330 - val_loss: 6.9152 - val_mae: 5.1582 - learning_rate: 0.0010
Epoch 5/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 7.0381 - mae: 5.2796 - val_loss: 6.9036 - val_mae: 5.1899 - learning_rate: 0.0010
Epoch 6/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 7.1522 - mae: 

In [18]:
score = np.clip(model.predict(scaler.fit_transform(X)), a_min=0, a_max=None)
score_df = pd.DataFrame(score, columns=['best_NN_score_alpha2'])
score_df.to_csv('./scores/best_NN_score_full_X_alpha2.csv', index=True)

[1m3012/3012[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 316us/step


In [19]:
import tensorflow.keras.backend as K

def asymmetric_huber(delta=1.0, alpha=2.0):
    """
    Assymetric Huber Loss: penalty for underestiation is higher than for overestimation.
    
    delta - threshold for Huber Loss
    alpha - penalty coeff (the higher it is, the more penalty for underestimation)
    """
    def loss(y_true, y_pred):
        error = y_true - y_pred
        abs_error = K.abs(error)
        
        # Huber loss standart
        loss = K.switch(abs_error < delta, 
                        0.5 * K.square(error),  
                        delta * (abs_error - 0.5 * delta)) 
        
        # Underestimation penalty
        loss = K.switch(error > 0, alpha * loss, loss)  # error > 0 -> underestimation, increase penalty
        
        return K.mean(loss)
    
    return loss

# Try Alpha = 1.5
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),  
              loss=asymmetric_huber(delta=1.0, alpha=1.5),  
              metrics=['mae'])
# Callbacks 
early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-6)

history = model.fit(X_train_scaled, y_train,  
                    validation_data=(X_test_scaled, y_test),  
                    epochs=50, batch_size=32, verbose=1,
                    callbacks=[early_stop, reduce_lr])

# Train metrics
y_pred = np.clip(model.predict(X_train_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_train), np.log(1+y_pred))

print(f"Metrics Train (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

# Test metrics
y_pred = np.clip(model.predict(X_test_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_test), np.log(1+y_pred))

print(f"\nMetrics Test (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")


Epoch 1/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 5.8021 - mae: 5.0149 - val_loss: 5.7306 - val_mae: 4.9149 - learning_rate: 0.0010
Epoch 2/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.8122 - mae: 5.0041 - val_loss: 5.7315 - val_mae: 4.9147 - learning_rate: 0.0010
Epoch 3/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.8203 - mae: 5.0075 - val_loss: 5.7338 - val_mae: 4.9137 - learning_rate: 0.0010
Epoch 4/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.7866 - mae: 4.9892 - val_loss: 5.7282 - val_mae: 4.9724 - learning_rate: 0.0010
Epoch 5/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.8404 - mae: 5.0306 - val_loss: 5.7381 - val_mae: 4.9283 - learning_rate: 0.0010
Epoch 6/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.8314 - mae: 

In [20]:
score = np.clip(model.predict(scaler.fit_transform(X)), a_min=0, a_max=None)
score_df = pd.DataFrame(score, columns=['best_NN_score_alpha1.5'])
score_df.to_csv('./scores/best_NN_score_full_X_alpha1_5.csv', index=True)

[1m3012/3012[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 341us/step


In [21]:
import tensorflow.keras.backend as K

def asymmetric_huber(delta=1.0, alpha=2.0):
    """
    Assymetric Huber Loss: penalty for underestiation is higher than for overestimation.
    
    delta - threshold for Huber Loss
    alpha - penalty coeff (the higher it is, the more penalty for underestimation)
    """
    def loss(y_true, y_pred):
        error = y_true - y_pred
        abs_error = K.abs(error)
        
        # Huber loss standart
        loss = K.switch(abs_error < delta, 
                        0.5 * K.square(error),  
                        delta * (abs_error - 0.5 * delta)) 
        
        # Underestimation penalty
        loss = K.switch(error > 0, alpha * loss, loss)  # error > 0 -> underestimation, increase penalty
        
        return K.mean(loss)
    
    return loss

# Try Alpha = 3
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),  
              loss=asymmetric_huber(delta=1.0, alpha=3),  
              metrics=['mae'])
# Callbacks 
early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-6)

history = model.fit(X_train_scaled, y_train,  
                    validation_data=(X_test_scaled, y_test),  
                    epochs=50, batch_size=32, verbose=1,
                    callbacks=[early_stop, reduce_lr])

# Train metrics
y_pred = np.clip(model.predict(X_train_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_train), np.log(1+y_pred))

print(f"Metrics Train (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

# Test metrics
y_pred = np.clip(model.predict(X_test_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_test), np.log(1+y_pred))

print(f"\nMetrics Test (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")


Epoch 1/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 9.1265 - mae: 5.8036 - val_loss: 8.8130 - val_mae: 5.7459 - learning_rate: 0.0010
Epoch 2/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 9.1421 - mae: 5.9779 - val_loss: 8.8172 - val_mae: 5.7568 - learning_rate: 0.0010
Epoch 3/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 9.1311 - mae: 5.9677 - val_loss: 8.8176 - val_mae: 5.6917 - learning_rate: 0.0010
Epoch 4/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 8.8923 - mae: 5.8474 - val_loss: 8.8428 - val_mae: 5.8305 - learning_rate: 0.0010
Epoch 5/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 8.9591 - mae: 5.9279 - val_loss: 8.8439 - val_mae: 5.9225 - learning_rate: 0.0010
Epoch 6/50
[1m2410/2410[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 8.9112 - mae: 

In [22]:
score = np.clip(model.predict(scaler.fit_transform(X)), a_min=0, a_max=None)
score_df = pd.DataFrame(score, columns=['best_NN_score_alpha3'])
score_df.to_csv('./scores/best_NN_score_full_X_alpha3.csv', index=True)

[1m3012/3012[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 333us/step


### Try to truncate target (to drop abnormal high values)

In [29]:
X_trun = df[df.delivery_time <= 50].drop(columns=["delivery_time"])  
y_trun = df[df.delivery_time <= 50]["delivery_time"]

X_train, X_test, y_train, y_test = train_test_split(X_trun, y_trun, test_size=0.2, random_state=42)

# Normalize data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [25]:
import tensorflow.keras.backend as K

def asymmetric_huber(delta=1.0, alpha=2.0):
    """
    Assymetric Huber Loss: penalty for underestiation is higher than for overestimation.
    
    delta - threshold for Huber Loss
    alpha - penalty coeff (the higher it is, the more penalty for underestimation)
    """
    def loss(y_true, y_pred):
        error = y_true - y_pred
        abs_error = K.abs(error)
        
        # Huber loss standart
        loss = K.switch(abs_error < delta, 
                        0.5 * K.square(error),  
                        delta * (abs_error - 0.5 * delta)) 
        
        # Underestimation penalty
        loss = K.switch(error > 0, alpha * loss, loss)  # error > 0 -> underestimation, increase penalty
        
        return K.mean(loss)
    
    return loss

# Try Alpha = 1
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),  
              loss=asymmetric_huber(delta=1.0, alpha=1),  
              metrics=['mae'])
# Callbacks 
early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-6)

history = model.fit(X_train_scaled, y_train,  
                    validation_data=(X_test_scaled, y_test),  
                    epochs=50, batch_size=32, verbose=1,
                    callbacks=[early_stop, reduce_lr])

# Train metrics
y_pred = np.clip(model.predict(X_train_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_train), np.log(1+y_pred))

print(f"Metrics Train (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

# Test metrics
y_pred = np.clip(model.predict(X_test_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_test), np.log(1+y_pred))

print(f"\nMetrics Test (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")


Epoch 1/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 4.1831 - mae: 4.6542 - val_loss: 3.9260 - val_mae: 4.3941 - learning_rate: 0.0010
Epoch 2/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.1008 - mae: 4.5702 - val_loss: 3.9372 - val_mae: 4.4073 - learning_rate: 0.0010
Epoch 3/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.0977 - mae: 4.5677 - val_loss: 3.9192 - val_mae: 4.3880 - learning_rate: 0.0010
Epoch 4/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.0888 - mae: 4.5582 - val_loss: 3.9272 - val_mae: 4.3966 - learning_rate: 0.0010
Epoch 5/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.0611 - mae: 4.5301 - val_loss: 3.9317 - val_mae: 4.3996 - learning_rate: 0.0010
Epoch 6/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.0765 - mae: 

In [30]:
score = np.clip(model.predict(scaler.fit_transform(X)), a_min=0, a_max=None)
score_df = pd.DataFrame(score, columns=['best_NN_score_alpha1_tr'])
score_df.to_csv('./scores/best_NN_score_full_X_alpha1_tr.csv', index=True)

[1m3012/3012[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 321us/step


In [31]:
# Alpha 1.5, truncated
import tensorflow.keras.backend as K

def asymmetric_huber(delta=1.0, alpha=2.0):
    """
    Assymetric Huber Loss: penalty for underestiation is higher than for overestimation.
    
    delta - threshold for Huber Loss
    alpha - penalty coeff (the higher it is, the more penalty for underestimation)
    """
    def loss(y_true, y_pred):
        error = y_true - y_pred
        abs_error = K.abs(error)
        
        # Huber loss standart
        loss = K.switch(abs_error < delta, 
                        0.5 * K.square(error),  
                        delta * (abs_error - 0.5 * delta)) 
        
        # Underestimation penalty
        loss = K.switch(error > 0, alpha * loss, loss)  # error > 0 -> underestimation, increase penalty
        
        return K.mean(loss)
    
    return loss

# Try Alpha = 1
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),  
              loss=asymmetric_huber(delta=1.0, alpha=1.5),  
              metrics=['mae'])
# Callbacks 
early_stop = keras.callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-6)

history = model.fit(X_train_scaled, y_train,  
                    validation_data=(X_test_scaled, y_test),  
                    epochs=50, batch_size=32, verbose=1,
                    callbacks=[early_stop, reduce_lr])

# Train metrics
y_pred = np.clip(model.predict(X_train_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_train), np.log(1+y_pred))

print(f"Metrics Train (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")

# Test metrics
y_pred = np.clip(model.predict(X_test_scaled), a_min=0, a_max=None)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(np.log(1+y_test), np.log(1+y_pred))

print(f"\nMetrics Test (complex NN):")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score (log space): {r2:.4f}")


Epoch 1/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - loss: 5.2915 - mae: 4.6284 - val_loss: 5.1171 - val_mae: 4.5165 - learning_rate: 0.0010
Epoch 2/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.3608 - mae: 4.7001 - val_loss: 5.1202 - val_mae: 4.4899 - learning_rate: 0.0010
Epoch 3/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.3520 - mae: 4.6814 - val_loss: 5.1460 - val_mae: 4.5108 - learning_rate: 0.0010
Epoch 4/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.3487 - mae: 4.6851 - val_loss: 5.1251 - val_mae: 4.5180 - learning_rate: 0.0010
Epoch 5/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.3172 - mae: 4.6684 - val_loss: 5.1321 - val_mae: 4.4678 - learning_rate: 0.0010
Epoch 6/50
[1m2395/2395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 5.3800 - mae: 

In [32]:
score = np.clip(model.predict(scaler.fit_transform(X)), a_min=0, a_max=None)
score_df = pd.DataFrame(score, columns=['best_NN_score_alpha1_5_tr'])
score_df.to_csv('./scores/best_NN_score_full_X_alpha1_5_tr.csv', index=True)

[1m3012/3012[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 315us/step


# Let's proceed to further analysis ("Score Analysis" notebook)