In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Bidirectional
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import Callback, EarlyStopping
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import tensorflow as tf

2024-02-19 20:49:58.435585: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-19 20:49:58.435640: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-19 20:49:58.436605: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-19 20:49:58.442812: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load data
sales_train = pd.read_csv('sales_train_evaluation.csv')
sell_prices = pd.read_csv('sell_prices.csv')
calendar = pd.read_csv('calendar.csv')
print(sales_train.head())
print(sell_prices.head())
print(calendar.head())

                              id        item_id    dept_id   cat_id store_id  \
0  HOBBIES_1_001_CA_1_evaluation  HOBBIES_1_001  HOBBIES_1  HOBBIES     CA_1   
1  HOBBIES_1_002_CA_1_evaluation  HOBBIES_1_002  HOBBIES_1  HOBBIES     CA_1   
2  HOBBIES_1_003_CA_1_evaluation  HOBBIES_1_003  HOBBIES_1  HOBBIES     CA_1   
3  HOBBIES_1_004_CA_1_evaluation  HOBBIES_1_004  HOBBIES_1  HOBBIES     CA_1   
4  HOBBIES_1_005_CA_1_evaluation  HOBBIES_1_005  HOBBIES_1  HOBBIES     CA_1   

  state_id  d_1  d_2  d_3  d_4  ...  d_1932  d_1933  d_1934  d_1935  d_1936  \
0       CA    0    0    0    0  ...       2       4       0       0       0   
1       CA    0    0    0    0  ...       0       1       2       1       1   
2       CA    0    0    0    0  ...       1       0       2       0       0   
3       CA    0    0    0    0  ...       1       1       0       4       0   
4       CA    0    0    0    0  ...       0       0       0       2       1   

   d_1937  d_1938  d_1939  d_1940  d_1941  


In [3]:
# Downcast data types to reduce memory usage
def downcast_dtypes(df):
    float_cols = [c for c in df if df[c].dtype == "float64"]
    int_cols = [c for c in df if df[c].dtype in ["int64", "int32"]]
    df[float_cols] = df[float_cols].astype(np.float32)
    df[int_cols] = df[int_cols].astype(np.int16)
    return df

sales_train = downcast_dtypes(sales_train)
sell_prices = downcast_dtypes(sell_prices)
calendar = downcast_dtypes(calendar)

In [4]:
# Unique feature: Calculate average sales price per product
average_price = sell_prices.groupby('item_id')['sell_price'].mean().reset_index()
sales_train = pd.merge(sales_train, average_price, on='item_id', how='left')

sales_train.head()

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941,sell_price
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,4,0,0,0,0,3,3,0,1,8.298076
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,2,1,1,0,0,0,0,0,3.967459
2,HOBBIES_1_003_CA_1_evaluation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,2,0,0,0,2,3,0,1,2.965734
3,HOBBIES_1_004_CA_1_evaluation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,0,4,0,1,3,0,2,6,4.502845
4,HOBBIES_1_005_CA_1_evaluation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,2,1,0,0,2,1,0,2.880902


In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Feature Engineering for calendar dataframe
if 'date' in calendar.columns:
    calendar['date'] = pd.to_datetime(calendar['date'])
    calendar['day_of_week'] = calendar['date'].dt.dayofweek
    calendar['month'] = calendar['date'].dt.month
    calendar['year'] = calendar['date'].dt.year
else:
    raise ValueError("Error: 'date' column not found in the calendar dataframe.")

# Feature Engineering for sales_train dataframe
if all(col in sales_train.columns for col in ['d_345', 'd_1941']):
    # Summing up sales data across columns 'd_345' to 'd_1941'
    sales_train['total_sales'] = sales_train.loc[:, 'd_345':'d_1941'].sum(axis=1)
    # Create lag features
    for i in range(1, 8):
        sales_train[f'sales_lag_{i}'] = sales_train['total_sales'].shift(i)

else:
    raise ValueError("Error: Columns 'd_345' to 'd_1941' are required for summing up sales data.")

# Handle missing values consistently
sales_train.fillna(0, inplace=True)
calendar.fillna(0, inplace=True)

# Transpose the sales_train DataFrame
sales_train_transposed = sales_train.T

# Select data for the past year only
last_year_sales = sales_train.iloc[:, -365:]
last_year_calendar = calendar.iloc[-365:]

# Scale features
numeric_columns = [col for col in last_year_sales.columns if col not in ['date', 'd', 'total_sales']]
if numeric_columns:
    scaler = MinMaxScaler()
    scaled_sales = scaler.fit_transform(last_year_sales[numeric_columns])
else:
    raise ValueError("Error: No numeric columns found for scaling.")

timesteps=7
sales_train.head()

  sales_train['total_sales'] = sales_train.loc[:, 'd_345':'d_1941'].sum(axis=1)
  sales_train[f'sales_lag_{i}'] = sales_train['total_sales'].shift(i)


Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1941,sell_price,total_sales,sales_lag_1,sales_lag_2,sales_lag_3,sales_lag_4,sales_lag_5,sales_lag_6,sales_lag_7
0,HOBBIES_1_001_CA_1_evaluation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,8.298076,633,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,HOBBIES_1_002_CA_1_evaluation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,3.967459,441,633.0,0.0,0.0,0.0,0.0,0.0,0.0
2,HOBBIES_1_003_CA_1_evaluation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,2.965734,309,441.0,633.0,0.0,0.0,0.0,0.0,0.0
3,HOBBIES_1_004_CA_1_evaluation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,6,4.502845,2903,309.0,441.0,633.0,0.0,0.0,0.0,0.0
4,HOBBIES_1_005_CA_1_evaluation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,2.880902,1608,2903.0,309.0,441.0,633.0,0.0,0.0,0.0


In [6]:
# Split data into train and test sets
train_size = int(len(scaled_sales) * 0.8)
test_size = len(scaled_sales) - train_size
train, test = scaled_sales[0:train_size,:], scaled_sales[train_size:len(scaled_sales),:]

In [7]:
# Convert an array of values into a dataset matrix
def create_dataset(dataset, timesteps=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-timesteps):
        a = dataset[i:(i+timesteps), :]
        dataX.append(a)
        dataY.append(dataset[i + timesteps, :])
    return np.array(dataX), np.array(dataY)

X_train, y_train = create_dataset(train, timesteps)
X_test, y_test = create_dataset(test, timesteps)


In [8]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.activations import relu

class CustomCallback(Callback):
    def on_batch_end(self, batch, logs={}):
        if logs.get('loss') is not None and np.isnan(logs.get('loss')):
            print('Batch %d: Invalid loss, terminating training' % (batch))
            self.model.stop_training = True

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)


In [15]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Conv1D, MaxPooling1D

n_features = scaled_sales.shape[1]

model = Sequential([
    LSTM(256, return_sequences=True, input_shape=(timesteps, n_features)),
    Dropout(0.3),
    BatchNormalization(), 
    LSTM(128, return_sequences=True),
    Dropout(0.3),
    LSTM(64),
    BatchNormalization(),
    Dense(364)
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 7, 256)            635904    
                                                                 
 dropout_2 (Dropout)         (None, 7, 256)            0         
                                                                 
 batch_normalization_2 (Bat  (None, 7, 256)            1024      
 chNormalization)                                                
                                                                 
 lstm_4 (LSTM)               (None, 7, 128)            197120    
                                                                 
 dropout_3 (Dropout)         (None, 7, 128)            0         
                                                                 
 lstm_5 (LSTM)               (None, 64)                49408     
                                                      

In [16]:
# Use gradient clipping in the optimizer
optimizer = Adam(learning_rate=0.001, clipvalue=0.5)
model.compile(optimizer=optimizer, loss='mae')

In [17]:
# Add custom callback to stop training if NaN loss is detected
model.fit(X_train, y_train, epochs=150, batch_size=64, validation_split=0.3, verbose=2, callbacks=[CustomCallback(), early_stopping])

Epoch 1/150
267/267 - 11s - loss: 37.7558 - val_loss: 37.6844 - 11s/epoch - 43ms/step
Epoch 2/150
267/267 - 8s - loss: 37.3186 - val_loss: 36.9894 - 8s/epoch - 28ms/step
Epoch 3/150
267/267 - 7s - loss: 36.4419 - val_loss: 35.8233 - 7s/epoch - 28ms/step
Epoch 4/150
267/267 - 8s - loss: 35.2224 - val_loss: 35.0522 - 8s/epoch - 28ms/step
Epoch 5/150
267/267 - 7s - loss: 33.7575 - val_loss: 32.5225 - 7s/epoch - 28ms/step
Epoch 6/150
267/267 - 7s - loss: 32.2855 - val_loss: 31.7174 - 7s/epoch - 28ms/step
Epoch 7/150
267/267 - 7s - loss: 30.9571 - val_loss: 30.7414 - 7s/epoch - 28ms/step
Epoch 8/150
267/267 - 8s - loss: 29.8609 - val_loss: 29.4762 - 8s/epoch - 29ms/step
Epoch 9/150
267/267 - 8s - loss: 29.0152 - val_loss: 29.9290 - 8s/epoch - 29ms/step
Epoch 10/150
267/267 - 8s - loss: 28.3892 - val_loss: 28.7982 - 8s/epoch - 28ms/step
Epoch 11/150
267/267 - 8s - loss: 27.9410 - val_loss: 28.4602 - 8s/epoch - 28ms/step
Epoch 12/150
267/267 - 8s - loss: 27.6333 - val_loss: 28.2265 - 8s/epoch

<keras.src.callbacks.History at 0x2afa23c44eb0>

In [18]:
# Make predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)




In [19]:
# Invert predictions
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform(y_train)
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform(y_test)


In [20]:
def calculate_rmsse(actual, predicted):
    denominator = np.sqrt(np.mean(np.square(np.diff(actual, axis=0))))
    if denominator == 0:
        return np.nan
    rmsse = np.sqrt(np.mean(np.square(predicted - actual))) / denominator
    return rmsse

train_rmsse = calculate_rmsse(y_train, train_predict)
test_rmsse = calculate_rmsse(y_test, test_predict)

print("Train RMSSE:", train_rmsse)
print("Test RMSSE:", test_rmsse)

Train RMSSE: 0.46759127209908874
Test RMSSE: 0.3963367169201146
