In [1]:
# Import necessary libraries and the SalesPrediction class from sales_prediction.py
import sys
import os
sys.path.append(os.path.abspath("../scripts"))
from sales_prediction import SalesPrediction
import matplotlib.pyplot as plt
import pandas as pd


# Step 2: Define file paths for your data
store_data_path = 'C:/Users/user/Desktop/Github/Rossmann_Pharmaceuticals/Data/store.csv'
train_data_path = 'C:/Users/user/Desktop/Github/Rossmann_Pharmaceuticals/Data/train.csv'

# Step 3: Create an instance of the SalesPrediction class
sales_predictor = SalesPrediction(store_data_path, train_data_path)

# Step 4: Load and preprocess data
df = sales_predictor.load_data()
X, y = sales_predictor.preprocess_data(df)

# Step 5: Check stationarity and visualize ACF/PACF plots
sales_predictor.check_stationarity(df)
sales_predictor.plot_acf_pacf(df)

# Step 6: Split data and train the model
X_train_scaled, X_test_scaled, y_train, y_test = sales_predictor.split_and_scale_data(X, y)
sales_predictor.build_and_train_model(X_train_scaled, y_train)

# Step 7: Evaluate the model
mse, y_pred = sales_predictor.evaluate_model(X_test_scaled, y_test)
print(f"Mean Squared Error: {mse}")

# Step 8: Plot the predicted vs actual sales
plt.figure(figsize=(10, 5))
plt.plot(y_test.values[:100], label='Actual Sales')
plt.plot(y_pred[:100], label='Predicted Sales')
plt.title('Actual vs Predicted Sales')
plt.legend()
plt.show()

# Step 9: Feature Importance Visualization
feature_importance = sales_predictor.feature_importance(X)
feature_importance.plot(kind='bar', title='Feature Importance')
plt.show()
