In [5]:
#Random Forest
import pandas as pd

train_df = pd.read_csv("train.csv")
val_df = pd.read_csv("val.csv")
test_df = pd.read_csv("test.csv")


from sklearn.ensemble import RandomForestRegressor

features = [
    # Numerical features
    "store_nbr", "item_nbr", "transactions", "dcoilwtico", "dayofweek",
    "is_weekend", "lag_7", "rolling_7", "promo_lag_interaction",
    
    # One-hot encoded categorical features
    "type_y_Work Day",  # Only one category found; check if more exist
    
    # All one-hot encoded 'family' features
    "family_BEAUTY", "family_BEVERAGES", "family_BREAD/BAKERY", "family_CLEANING",
    "family_DAIRY", "family_DELI", "family_EGGS", "family_FROZEN FOODS", "family_GROCERY I",
    "family_GROCERY II", "family_HARDWARE", "family_HOME APPLIANCES", "family_LAWN AND GARDEN",
    "family_LINGERIE", "family_LIQUOR,WINE,BEER", "family_MEATS", "family_PERSONAL CARE",
    "family_POULTRY", "family_PREPARED FOODS", "family_SEAFOOD",

    # All one-hot encoded 'city' features
    "city_Babahoyo", "city_Cayambe", "city_Cuenca", "city_Daule", "city_El Carmen",
    "city_Esmeraldas", "city_Guaranda", "city_Guayaquil", "city_Ibarra", "city_Latacunga",
    "city_Loja", "city_Machala", "city_Playas", "city_Quevedo", "city_Quito", "city_Riobamba",
    "city_Salinas", "city_Santo Domingo",

    # All one-hot encoded 'state' features
    "state_Bolivar", "state_Chimborazo", "state_Cotopaxi", "state_El Oro", "state_Esmeraldas",
    "state_Guayas", "state_Imbabura", "state_Loja", "state_Los Rios", "state_Manabi",
    "state_Pichincha", "state_Santa Elena", "state_Santo Domingo de los Tsachilas", "state_Tungurahua"
]

target = "unit_sales"
# Selecting the data
X_train = train_df[features]
y_train = train_df[target]

X_val = val_df[features]
y_val = val_df[target]

X_test = test_df[features]



rf_model = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_val)

# Evaluate
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae_rf = mean_absolute_error(y_val, y_pred_rf)
rmse_rf = mean_squared_error(y_val, y_pred_rf, squared=False)

print(f"Random Forest MAE: {mae_rf:.4f}")
print(f"Random Forest RMSE: {rmse_rf:.4f}")


Random Forest MAE: 5.4446
Random Forest RMSE: 12.0783




In [6]:
#Cat Boost

from catboost import CatBoostRegressor

cat_model = CatBoostRegressor(iterations=500, learning_rate=0.1, depth=7, random_state=42, verbose=100)
cat_model.fit(X_train, y_train)

y_pred_cat = cat_model.predict(X_val)

mae_cat = mean_absolute_error(y_val, y_pred_cat)
rmse_cat = mean_squared_error(y_val, y_pred_cat, squared=False)

print(f"CatBoost MAE: {mae_cat:.4f}")
print(f"CatBoost RMSE: {rmse_cat:.4f}")


0:	learn: 16.6129981	total: 59.6ms	remaining: 29.7s
100:	learn: 9.5432157	total: 1.14s	remaining: 4.51s
200:	learn: 8.9234522	total: 2.19s	remaining: 3.27s
300:	learn: 8.5510413	total: 3.26s	remaining: 2.15s
400:	learn: 8.2677927	total: 4.32s	remaining: 1.07s
499:	learn: 8.0465132	total: 5.37s	remaining: 0us
CatBoost MAE: 5.2393
CatBoost RMSE: 11.1545




In [7]:
#Neural Networks

import tensorflow as tf
from tensorflow import keras

model = keras.Sequential([
    keras.layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation="relu"),
    keras.layers.Dense(1)
])

model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

y_pred_nn = model.predict(X_val).flatten()

mae_nn = mean_absolute_error(y_val, y_pred_nn)
rmse_nn = mean_squared_error(y_val, y_pred_nn, squared=False)

print(f"Neural Network MAE: {mae_nn:.4f}")
print(f"Neural Network RMSE: {rmse_nn:.4f}")


2025-03-31 18:38:02.925200: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 120125680.0000 - mae: 1686.4803 - val_loss: 888.4341 - val_mae: 23.2576
Epoch 2/50
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 185582.5312 - mae: 102.4474 - val_loss: 2603.6816 - val_mae: 42.6879
Epoch 3/50
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 79118.0938 - mae: 72.2735 - val_loss: 770.1690 - val_mae: 17.5233
Epoch 4/50
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 71166.4766 - mae: 111.0583 - val_loss: 3234.3845 - val_mae: 44.9426
Epoch 5/50
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 29540.8867 - mae: 62.1198 - val_loss: 1132.7050 - val_mae: 24.3507
Epoch 6/50
[1m4375/4375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 110835.0703 - mae: 95.9657 - val_loss: 1871.1566 - val_mae: 37.2867
Epoch 7/50
[1m4375/4375

