In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

df = pd.read_csv("dataset/data.csv")
print(df.shape)
df.columns = ['date', 'sales', 'stock', 'price']

df['date'] = pd.to_datetime(df['date'])

# Sort by date
df = df.sort_values('date').reset_index(drop=True)

# Visual check
print(df.head())

(937, 4)
        date  sales  stock  price
0 2014-01-01      0   4972   1.29
1 2014-01-02     70   4902   1.29
2 2014-01-03     59   4843   1.29
3 2014-01-04     93   4750   1.29
4 2014-01-05     96   4654   1.29


In [3]:
target_col = 'sales'

# Scale features
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.drop(columns=['date']))  # date not needed for LSTM

# Convert to numpy array
scaled_data = np.array(scaled_data)

# Function to create sequences for LSTM
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length, :])      # past seq_length days
        y.append(data[i+seq_length, 0])        # 'sales' is the first column after drop(date)
    return np.array(X), np.array(y)

SEQ_LENGTH = 14  # using past 14 days to predict next day
X, y = create_sequences(scaled_data, SEQ_LENGTH)

print(f"Shape of X: {X.shape}, Shape of y: {y.shape}")


Shape of X: (923, 14, 3), Shape of y: (923,)


In [4]:
# Train: 70%, Validation: 15%, Test: 15%
train_size = int(len(X) * 0.7)
val_size = int(len(X) * 0.15)

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
X_test, y_test = X[train_size+val_size:], y[train_size+val_size:]

print(f"Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}")

Train: (646, 14, 3), Val: (138, 14, 3), Test: (139, 14, 3)


In [13]:
import os
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam, AdamW
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.models import load_model

In [12]:
def create_lstm_model(input_shape, dropout_rate=0.3):
    try:
        model = Sequential([
            InputLayer(shape=input_shape),
            
            LSTM(32, return_sequences=True, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4), recurrent_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
            BatchNormalization(),
            Dropout(dropout_rate),
            
            Dense(32, activation='relu', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
            Dropout(dropout_rate),
            
            Dense(1, activation='linear')
        ])
        return model
    except Exception as e:
        print(f"Error in create_lstm_model: {e}")
        raise

def create_gru_model(input_shape, dropout_rate=0.3):
    try:
        model = Sequential([
            InputLayer(shape=input_shape),
            
            GRU(32, return_sequences=True, kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
            BatchNormalization(),
            Dropout(dropout_rate),
            
            Dense(32, activation='relu', kernel_regularizer=l1_l2(l1=1e-5, l2=1e-4)),
            Dropout(dropout_rate),
            
            Dense(1, activation='linear')
        ])
        return model
    except Exception as e:
        print(f"Error in create_gru_model: {e}")
        raise

In [18]:
# Training function with callbacks
def train_model(model, X_train, y_train, X_val, y_val, model_path, 
                batch_size=32, epochs=5, patience=15, loss_function='mse', learning_rate=1e-3):
    try:
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True),
            ModelCheckpoint(model_path, save_best_only=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
        ]
        
        # Select the loss function based on the input parameter
        if loss_function == 'mse':
            loss = 'mean_squared_error'
        else:
            raise ValueError("Invalid loss function specified.")
        
        model.compile(
            optimizer=AdamW(learning_rate=learning_rate),
            loss=loss,
            metrics=[
                'mse', 
                'mae',
                'root_mean_squared_error'
            ]
        )
        
        history = model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            batch_size=batch_size,
            epochs=epochs,
            callbacks=callbacks,
            verbose=1
        )
        return history
    except Exception as e:
        print(f"Error in train_model: {e}")
        raise

In [14]:
# Evaluation function with focus on false negatives
def evaluate_model(model, X_test, y_test, threshold=0.5):
    try:
        predictions = model.predict(X_test)
        
        # Calculate various metrics
        mse = np.mean((y_test - predictions.flatten())**2)
        mae = np.mean(np.abs(y_test - predictions.flatten()))
        
        # Calculate false negative rate
        binary_actual = y_test > threshold
        binary_pred = predictions.flatten() > threshold
        false_negatives = np.sum((binary_actual == True) & (binary_pred == False))
        false_negative_rate = false_negatives / np.sum(binary_actual)
        
        return {
            'mse': mse,
            'mae': mae,
            'false_negative_rate': false_negative_rate
        }
    except Exception as e:
        print(f"Error in evaluate_model: {e}")
        raise

In [15]:
def plot_predictions(model, X_data, y_data, label, start=50, end=500, ylabel='Price', title_suffix=''):
    """
    Plots predictions vs actual values for a given model and dataset.
    
    Args:
    - model: The trained model to use for predictions.
    - X_data: Input data for predictions.
    - y_data: Actual target values.
    - label: A string indicating the dataset (e.g., 'Train', 'Validation', 'Test').
    - start, end: Range of data points to visualize (default: 50 to 500).
    - ylabel: Label for the y-axis (default: 'Rainfall (mm)').
    - title_suffix: Additional suffix for the title (optional).
    
    Returns:
    - A DataFrame containing the predictions and actual values.
    """
    # Make predictions
    predictions = model.predict(X_data).flatten()

    # Create a DataFrame to store results
    results_df = pd.DataFrame(data={f'{label} Predictions': predictions, 'Actual Values': y_data})
    print(results_df)

    # Plot the predictions and actual values
    plt.figure(figsize=(10, 6))
    plt.plot(results_df[f'{label} Predictions'][start:end], label=f'{label} Predictions', color='blue', linestyle='-')
    plt.plot(results_df['Actual Values'][start:end], label='Actual Values', color='orange', linestyle='--')

    # Add labels and title
    plt.xlabel('Time Stamps', fontsize=12)
    plt.ylabel(ylabel, fontsize=12)
    plt.title(f'{label} Predictions vs Actual Values {title_suffix}', fontsize=14)
    plt.legend(loc='upper right')
    plt.grid(True)
    plt.show()

    return results_df

In [None]:
# Main execution
def main():
    try:
        # Define loss functions to iterate over
        loss_functions = ['mse']
        # OUT_DIR = 'artifacts/results/cycle_1/test_1/'

        for loss_function in loss_functions:
            # Create a directory for the current loss function
            results_dir = f'artifacts/results/cycle_1/test_1/{loss_function}'
            os.makedirs(results_dir, exist_ok=True)
        
            # Train models
            models = {
                'lstm': create_lstm_model(input_shape=(X_train.shape[1], X_train.shape[2])),
                'gru': create_gru_model(input_shape=(X_train.shape[1], X_train.shape[2])),
            }
            
            results = {}
            for name, model in models.items():
                print(f"\nTraining {name.upper()} model with {loss_function.upper()} loss...")
                
                try:
                    hist_path = os.path.join(results_dir, f'model_{name}_{loss_function}.keras')
                    history = train_model(
                        model, X_train, y_train, X_val, y_val,
                        hist_path, 
                        epochs=5, loss_function=loss_function, learning_rate=1e-3
                    )
                except Exception as e:
                    print(f"Error training {name.upper()} model: {e}")
                    continue
                
                try:
                    results[name] = evaluate_model(model, X_test, y_test)
                except Exception as e:
                    print(f"Error evaluating {name.upper()} model: {e}")
                    continue
                
                # Save training history to CSV
                history_df = pd.DataFrame(history.history)
                history_df.to_csv(os.path.join(results_dir, f'{name}_history.csv'), index=False)
                
                # Plot training history
                plt.figure(figsize=(10, 6))
                plt.plot(history.history['loss'], label='Training Loss')
                plt.plot(history.history['val_loss'], label='Validation Loss')
                plt.title(f'{name.upper()} Model Training History')
                plt.xlabel('Epoch')
                plt.ylabel('Loss')
                plt.legend()
                plt.savefig(os.path.join(results_dir, f'{name}_training_history.png'))
                plt.close()  # Close the plot to free memory
                
                # Save evaluation results to a text file
                with open(os.path.join(results_dir, f'{name}_evaluation.txt'), 'w') as f:
                    for metric_name, value in results[name].items():
                        f.write(f"{metric_name}: {value:.4f}\n")
                
                # Plot predictions for Train, Val, and Test datasets and save the plots
                    for dataset, data, true_values in zip(['Train', 'Val', 'Test'], 
                                                        [X_train, X_val, X_test], 
                                                        [y_train, y_val, y_test]):
                        plot_predictions(
                            model=model, 
                            X_data=data, 
                            y_data=true_values, 
                            label=name + ' ' + dataset, 
                            start=100, 
                            end=500
                        )
                        # plt.savefig(os.path.join(results_dir, f'{name}_{dataset.lower()}_predictions.png'))
                        plt.close()  # Close the plot to free memory
            
            print(f"Results for loss function '{loss_function}' saved in '{results_dir}'.")
        
        return results
    except Exception as e:
        print(f"Error in main: {e}")
        raise

if __name__ == "__main__":
    try:
        results = main()
        print("\nModel Evaluation Results:")
        for model_name, metrics in results.items():
            print(f"\n{model_name.upper()}:")
            for metric_name, value in metrics.items():
                print(f"{metric_name}: {value:.4f}")
    except Exception as e:
        print(f"Unhandled error in execution: {e}")


Training LSTM model with MAE loss...
Epoch 1/5
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 225ms/step - loss: 0.5028 - mae: 0.4908 - mse: 0.5035 - root_mean_squared_error: 0.7096 - val_loss: 0.1371 - val_mae: 0.1252 - val_mse: 0.0309 - val_root_mean_squared_error: 0.1759 - learning_rate: 0.0010
Epoch 2/5
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - loss: 0.3091 - mae: 0.2973 - mse: 0.1771 - root_mean_squared_error: 0.4208 - val_loss: 0.1571 - val_mae: 0.1454 - val_mse: 0.0393 - val_root_mean_squared_error: 0.1983 - learning_rate: 0.0010
Epoch 3/5
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - loss: 0.2329 - mae: 0.2213 - mse: 0.0977 - root_mean_squared_error: 0.3125 - val_loss: 0.1543 - val_mae: 0.1428 - val_mse: 0.0381 - val_root_mean_squared_error: 0.1952 - learning_rate: 0.0010
Epoch 4/5
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step - loss: 0.1895 - mae: 0.1781 - mse: 0.0644

In [4]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# ✅ Convert data to PyTorch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# ✅ Create DataLoader for batching
train_dataset = TensorDataset(X_train_t, y_train_t)
test_dataset = TensorDataset(X_test_t, y_test_t)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# ✅ Define LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # Take last time step
        out = self.fc(out)
        return out

# Model parameters
input_size = X_train.shape[2]   # Number of features
hidden_size = 64
num_layers = 1

model = LSTMModel(input_size, hidden_size, num_layers)

# ✅ Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ✅ Training loop
epochs = 50
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss/len(train_loader):.4f}")


Epoch [1/50], Loss: 0.0255
Epoch [2/50], Loss: 0.0160
Epoch [3/50], Loss: 0.0155
Epoch [4/50], Loss: 0.0150
Epoch [5/50], Loss: 0.0152
Epoch [6/50], Loss: 0.0146
Epoch [7/50], Loss: 0.0138
Epoch [8/50], Loss: 0.0133
Epoch [9/50], Loss: 0.0133
Epoch [10/50], Loss: 0.0129
Epoch [11/50], Loss: 0.0124
Epoch [12/50], Loss: 0.0122
Epoch [13/50], Loss: 0.0172
Epoch [14/50], Loss: 0.0119
Epoch [15/50], Loss: 0.0120
Epoch [16/50], Loss: 0.0118
Epoch [17/50], Loss: 0.0113
Epoch [18/50], Loss: 0.0113
Epoch [19/50], Loss: 0.0115
Epoch [20/50], Loss: 0.0120
Epoch [21/50], Loss: 0.0110
Epoch [22/50], Loss: 0.0109
Epoch [23/50], Loss: 0.0107
Epoch [24/50], Loss: 0.0107
Epoch [25/50], Loss: 0.0106
Epoch [26/50], Loss: 0.0111
Epoch [27/50], Loss: 0.0125
Epoch [28/50], Loss: 0.0108
Epoch [29/50], Loss: 0.0106
Epoch [30/50], Loss: 0.0105
Epoch [31/50], Loss: 0.0107
Epoch [32/50], Loss: 0.0104
Epoch [33/50], Loss: 0.0103
Epoch [34/50], Loss: 0.0105
Epoch [35/50], Loss: 0.0104
Epoch [36/50], Loss: 0.0103
E

In [5]:
# Put model in eval mode
model.eval()

predictions = []
actuals = []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        predictions.extend(outputs.numpy())
        actuals.extend(y_batch.numpy())

predictions = np.array(predictions)
actuals = np.array(actuals)

# Metrics
mae = np.mean(np.abs(predictions - actuals))
rmse = np.sqrt(np.mean((predictions - actuals) ** 2))

print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")


MAE: 0.0984
RMSE: 0.1384
