In [None]:
# Libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import zipfile
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt



# Prepare the dataset.

#Unzip the file ('Walmart_Store_sales.zip')
with zipfile.ZipFile('Walmart_Store_sales.zip', 'r') as zip_ref:
    zip_ref.extractall('unzipped_dataset')

#Load the CSV file (replace 'Walmart.csv' with your actual file name)
file_path = 'unzipped_dataset/Walmart.csv'  # Update with the correct file name
data = pd.read_csv(file_path)

# Print the column names to verify
print("Columns in the dataset:", data.columns)

# Convert 'Date' to datetime format with dayfirst=True to match 'dd-mm-yyyy' format
data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)

# Convert the 'Date' column to a numerical format (Unix timestamp)
data['Date'] = data['Date'].astype(int) / 10**9  # Convert to seconds since epoch

# Preprocess the data
data.fillna(method='ffill', inplace=True)
feature_columns = ['Date', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment']
data[feature_columns] = StandardScaler().fit_transform(data[feature_columns])
sales_scaler = StandardScaler()
data['Weekly_Sales'] = sales_scaler.fit_transform(data[['Weekly_Sales']])
data = pd.get_dummies(data, columns=['Store', 'Holiday_Flag'])

# Split the data
X = data.drop(columns=['Weekly_Sales'])
y = data['Weekly_Sales']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to numpy arrays and ensure correct data types
X_train_np = X_train.values.astype(np.float32)
X_val_np = X_val.values.astype(np.float32)
y_train_np = y_train.values.astype(np.float32)
y_val_np = y_val.values.astype(np.float32)



# LSTM and training

# Reshape the data to 3D for LSTM
X_train_np = np.reshape(X_train_np, (X_train_np.shape[0], 1, X_train_np.shape[1]))
X_val_np = np.reshape(X_val_np, (X_val_np.shape[0], 1, X_val_np.shape[1]))

# Check the shapes of the arrays
print("Reshaped training data shape:", X_train_np.shape)
print("Reshaped validation data shape:", X_val_np.shape)

# Define the LSTM model
model = Sequential()

# Add an LSTM layer
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train_np.shape[1], X_train_np.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))

# Add the output layer
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Model summary
model.summary()

# Train the model
history = model.fit(X_train_np, y_train_np, epochs=50, batch_size=32, validation_data=(X_val_np, y_val_np), shuffle=False)



# Model evaluation, predictions and plotting.

# Evaluate the model
val_loss = model.evaluate(X_val_np, y_val_np)
print("Validation Loss:", val_loss)

# Make predictions
predictions = model.predict(X_val_np)

# Inverse transform the predictions and actual values if needed
predictions = sales_scaler.inverse_transform(predictions)
y_val_actual = sales_scaler.inverse_transform(y_val_np.reshape(-1, 1))

# Display the first few predictions
print("Predicted values:", predictions[:5])
print("Actual values:", y_val_actual[:5])

# Visualize the Results
plt.figure(figsize=(14, 5))
plt.plot(y_val_actual, color='blue', label='Actual Weekly Sales')
