In [1]:
# Importing
from walmart_forecaster import WalmartDemandForecaster

# Running the model
forecaster = WalmartDemandForecaster()
forecaster.load_data("walmart_dummy_sales.csv", "walmart_dummy_weather.csv", "walmart_dummy_events.csv")
forecaster.create_features()
forecaster.train_models()

# Making predictions
predictions = forecaster.predict_demand("store_001", "ice_cream", 7)
print(predictions)

Loading datasets...
Loaded 10950 sales records
Loaded 2190 weather records
Loaded 211 event records
Creating features...
Creating lag features...
Created 17 features
Feature columns: ['temperature', 'is_rainy', 'humidity', 'is_weekend', 'final_event_impact', 'day_of_week', 'day_of_month', 'month', 'sales_lag_7', 'sales_lag_14', 'sales_lag_30', 'sales_rolling_7', 'sales_rolling_14', 'sales_rolling_30', 'product_encoded', 'season_encoded', 'store_id_encoded']
Training models...
Training data: 5550 records
Testing data: 5535 records
Training models for 15 store-product combinations
Trained store_001_hot_coffee: MAPE=11.9%, RMSE=17.1
Trained store_001_ice_cream: MAPE=16.5%, RMSE=15.6
Trained store_001_soup_cans: MAPE=11.2%, RMSE=11.6
Trained store_001_sunscreen: MAPE=16.9%, RMSE=4.8
Trained store_001_umbrellas: MAPE=11.9%, RMSE=13.0
Trained store_002_hot_coffee: MAPE=13.3%, RMSE=29.7
Trained store_002_ice_cream: MAPE=18.0%, RMSE=8.2
Trained store_002_soup_cans: MAPE=11.7%, RMSE=18.8
Traine

In [12]:
import numpy as np
# Testing Model Accuracy
print("=" * 50)
print("TESTING MODEL ACCURACY ON KNOWN DATA")
print("=" * 50)

# Getting test data from 2023 (data the model has seen during testing)
test_sample = forecaster.processed_df[
    (forecaster.processed_df['year'] == 2023) & 
    (forecaster.processed_df['store_id'] == 'store_001') & 
    (forecaster.processed_df['product'] == 'ice_cream')
].head(7)  # First 7 days of 2023

print("ACTUAL vs PREDICTED for store_001 ice_cream:")
print("Date\t\tActual\tPredicted\tDifference")
print("-" * 50)

for _, row in test_sample.iterrows():
    # Getting the model for this store-product
    model_key = f"{row['store_id']}_{row['product']}"
    if model_key in forecaster.models:
        model = forecaster.models[model_key]
        
        # Creating features for this specific day
        features = [row[col] for col in forecaster.feature_columns]
        X_test = np.array(features).reshape(1, -1)
        
        # Making prediction
        predicted = int(model.predict(X_test)[0])
        actual = int(row['sales_quantity'])
        difference = actual - predicted
        
        print(f"{row['date'].strftime('%Y-%m-%d')}\t{actual}\t{predicted}\t\t{difference:+d}")

TESTING MODEL ACCURACY ON KNOWN DATA
ACTUAL vs PREDICTED for store_001 ice_cream:
Date		Actual	Predicted	Difference
--------------------------------------------------
2023-01-01	19	22		-3
2023-01-02	16	17		-1
2023-01-03	16	15		+1
2023-01-04	13	14		-1
2023-01-05	18	18		+0
2023-01-06	21	17		+4
2023-01-07	20	20		+0


In [13]:
# Testing Different Combinations
print("\n" + "=" * 50)
print("TESTING DIFFERENT STORE-PRODUCT COMBINATIONS")
print("=" * 50)

# Testing all available combinations
available_stores = forecaster.processed_df['store_id'].unique()
available_products = forecaster.processed_df['product'].unique()

print(f"Available stores: {list(available_stores)}")
print(f"Available products: {list(available_products)}")

# Testing each combination
for store in available_stores:
    for product in available_products:
        predictions = forecaster.predict_demand(store, product, forecast_days=3)
        if predictions:
            print(f"\n{store} - {product}:")
            for pred in predictions:
                print(f"   {pred['date']}: {pred['predicted_demand']} units")
        else:
            print(f"No model available for {store} - {product}")


TESTING DIFFERENT STORE-PRODUCT COMBINATIONS
Available stores: ['store_001', 'store_002', 'store_003']
Available products: ['hot_coffee', 'ice_cream', 'soup_cans', 'sunscreen', 'umbrellas']

store_001 - hot_coffee:
   2024-01-01: 129 units
   2024-01-02: 134 units
   2024-01-03: 102 units

store_001 - ice_cream:
   2024-01-01: 13 units
   2024-01-02: 21 units
   2024-01-03: 15 units

store_001 - soup_cans:
   2024-01-01: 90 units
   2024-01-02: 90 units
   2024-01-03: 93 units

store_001 - sunscreen:
   2024-01-01: 1 units
   2024-01-02: 3 units
   2024-01-03: 2 units

store_001 - umbrellas:
   2024-01-01: 6 units
   2024-01-02: 7 units
   2024-01-03: 7 units

store_002 - hot_coffee:
   2024-01-01: 170 units
   2024-01-02: 172 units
   2024-01-03: 301 units

store_002 - ice_cream:
   2024-01-01: 14 units
   2024-01-02: 36 units
   2024-01-03: 37 units

store_002 - soup_cans:
   2024-01-01: 150 units
   2024-01-02: 95 units
   2024-01-03: 95 units

store_002 - sunscreen:
   2024-01-01:

In [15]:
# Interactive Testing Function
def test_prediction(store_id, product, days=7):
    """Easy function to test predictions"""
    print(f"\nPREDICTING {days} days for {store_id} - {product}")
    print("=" * 50)
    
    # Check if model exists
    model_key = f"{store_id}_{product}"
    if model_key not in forecaster.models:
        print(f"No model trained for {store_id} - {product}")
        return None
    
    # Get model performance
    performance = forecaster.model_performance[model_key]
    print(f"Model Accuracy: {performance['mape']:.1f}% MAPE, {performance['rmse']:.1f} RMSE")
    
    # Make predictions
    predictions = forecaster.predict_demand(store_id, product, days)
    
    if predictions:
        print(f"\nPredictions for next {days} days:")
        total_demand = 0
        for pred in predictions:
            print(f"   {pred['date']}: {pred['predicted_demand']} units")
            total_demand += pred['predicted_demand']
        
        print(f"\nTotal predicted demand: {total_demand} units")
        print(f"Average daily demand: {total_demand/days:.1f} units")
        
        return predictions
    else:
        print("Could not generate predictions")
        return None

# Test it out!
test_prediction("store_001", "ice_cream", 7)
test_prediction("store_002", "umbrellas", 5)
test_prediction("store_003", "hot_coffee", 10)


PREDICTING 7 days for store_001 - ice_cream
Model Accuracy: 16.5% MAPE, 15.6 RMSE

Predictions for next 7 days:
   2024-01-01: 22 units
   2024-01-02: 50 units
   2024-01-03: 23 units
   2024-01-04: 12 units
   2024-01-05: 15 units
   2024-01-06: 23 units
   2024-01-07: 20 units

Total predicted demand: 165 units
Average daily demand: 23.6 units

PREDICTING 5 days for store_002 - umbrellas
Model Accuracy: 11.8% MAPE, 14.0 RMSE

Predictions for next 5 days:
   2024-01-01: 179 units
   2024-01-02: 10 units
   2024-01-03: 11 units
   2024-01-04: 181 units
   2024-01-05: 186 units

Total predicted demand: 567 units
Average daily demand: 113.4 units

PREDICTING 10 days for store_003 - hot_coffee
Model Accuracy: 12.5% MAPE, 20.8 RMSE

Predictions for next 10 days:
   2024-01-01: 190 units
   2024-01-02: 184 units
   2024-01-03: 181 units
   2024-01-04: 240 units
   2024-01-05: 245 units
   2024-01-06: 156 units
   2024-01-07: 157 units
   2024-01-08: 253 units
   2024-01-09: 188 units
   20

[{'date': '2024-01-01', 'predicted_demand': 190, 'confidence': 'medium'},
 {'date': '2024-01-02', 'predicted_demand': 184, 'confidence': 'medium'},
 {'date': '2024-01-03', 'predicted_demand': 181, 'confidence': 'medium'},
 {'date': '2024-01-04', 'predicted_demand': 240, 'confidence': 'medium'},
 {'date': '2024-01-05', 'predicted_demand': 245, 'confidence': 'medium'},
 {'date': '2024-01-06', 'predicted_demand': 156, 'confidence': 'medium'},
 {'date': '2024-01-07', 'predicted_demand': 157, 'confidence': 'medium'},
 {'date': '2024-01-08', 'predicted_demand': 253, 'confidence': 'medium'},
 {'date': '2024-01-09', 'predicted_demand': 188, 'confidence': 'medium'},
 {'date': '2024-01-10', 'predicted_demand': 188, 'confidence': 'medium'}]

In [16]:
# Comparing with Historical Patterns
print("\n" + "=" * 50)
print("COMPARING PREDICTIONS WITH HISTORICAL PATTERNS")
print("=" * 50)

# Picking a store-product combination
store_test = "store_001"
product_test = "ice_cream"

# Getting historical data
historical = forecaster.processed_df[
    (forecaster.processed_df['store_id'] == store_test) & 
    (forecaster.processed_df['product'] == product_test)
].sort_values('date')

print(f"Historical sales for {store_test} - {product_test}:")
print(f"   Average daily sales: {historical['sales_quantity'].mean():.1f} units")
print(f"   Maximum daily sales: {historical['sales_quantity'].max()} units")
print(f"   Minimum daily sales: {historical['sales_quantity'].min()} units")

# Get recent predictions
recent_predictions = forecaster.predict_demand(store_test, product_test, 7)
if recent_predictions:
    pred_values = [p['predicted_demand'] for p in recent_predictions]
    print(f"\nCurrent predictions:")
    print(f"   Average predicted sales: {np.mean(pred_values):.1f} units")
    print(f"   Maximum predicted sales: {max(pred_values)} units")
    print(f"   Minimum predicted sales: {min(pred_values)} units")
    
    # Compare
    historical_avg = historical['sales_quantity'].mean()
    predicted_avg = np.mean(pred_values)
    difference = predicted_avg - historical_avg
    
    print(f"\nComparison:")
    print(f"   Historical average: {historical_avg:.1f} units")
    print(f"   Predicted average: {predicted_avg:.1f} units")
    print(f"   Difference: {difference:+.1f} units ({difference/historical_avg*100:+.1f}%)")


COMPARING PREDICTIONS WITH HISTORICAL PATTERNS
Historical sales for store_001 - ice_cream:
   Average daily sales: 66.4 units
   Maximum daily sales: 261 units
   Minimum daily sales: 9 units

Current predictions:
   Average predicted sales: 19.4 units
   Maximum predicted sales: 36 units
   Minimum predicted sales: 13 units

Comparison:
   Historical average: 66.4 units
   Predicted average: 19.4 units
   Difference: -47.0 units (-70.7%)


In [22]:
#Investigating the Sunscreen Anomaly
print("\n" + "=" * 50)
print("INVESTIGATING THE SUNSCREEN ANOMALY")
print("=" * 50)

# Checking the problematic store_002_sunscreen
sunscreen_data = forecaster.processed_df[
    (forecaster.processed_df['store_id'] == 'store_002') & 
    (forecaster.processed_df['product'] == 'sunscreen')
]

print(f"Sunscreen data for store_002:")
print(f"   Total records: {len(sunscreen_data)}")
print(f"   Sales quantity range: {sunscreen_data['sales_quantity'].min()} to {sunscreen_data['sales_quantity'].max()}")
print(f"   Average sales: {sunscreen_data['sales_quantity'].mean():.2f}")
print(f"   Zero sales days: {(sunscreen_data['sales_quantity'] == 0).sum()}")

# Showing some sample data
print(f"\nSample sunscreen sales data:")
print(sunscreen_data[['date', 'sales_quantity', 'temperature', 'season']].head(10))

# Testing prediction anyway
print(f"\nTesting sunscreen predictions:")
sunscreen_pred = forecaster.predict_demand('store_002', 'sunscreen', 3)
if sunscreen_pred:
    for pred in sunscreen_pred:
        print(f"   {pred['date']}: {pred['predicted_demand']} units")


INVESTIGATING THE SUNSCREEN ANOMALY
Sunscreen data for store_002:
   Total records: 751
   Sales quantity range: 0 to 58
   Average sales: 10.91
   Zero sales days: 7

Sample sunscreen sales data:
           date  sales_quantity  temperature  season
3674 2022-01-01               2         37.1  winter
3675 2022-01-01               2         37.1  winter
3676 2022-01-01               2         37.1  winter
3683 2022-01-02               3         37.4  winter
3688 2022-01-03               1         37.0  winter
3693 2022-01-04               2         40.4  winter
3698 2022-01-05               0         32.8  winter
3703 2022-01-06               0         38.3  winter
3708 2022-01-07               1         36.9  winter
3713 2022-01-08               2         30.1  winter

Testing sunscreen predictions:
   2024-01-01: 4 units
   2024-01-02: 2 units
   2024-01-03: 9 units


In [25]:
# test