In [7]:
from faker import Faker
import random
import pandas as pd
import numpy as np

In [8]:
fake = Faker()

In [9]:
def generate_inventory_data(num_records=1000):
    data = []
    for _ in range(num_records):
        record = {
            "Product_ID": fake.uuid4()[:8],
            "Product_Category": random.choice(["Electronics", "Groceries", "Clothing", "Furniture"]),
            "Price": round(random.uniform(10, 5000), 2),
            "Stock_Quantity": random.randint(0, 500),
            "Demand": random.randint(0, 300),
            "Season": random.choice(["Summer", "Winter", "Spring", "Fall"]),
            "Promotions": random.choice(["Yes", "No"]),
            "Weekday": random.choice(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]),
            "Sales_Channel": random.choice(["Online", "In-Store"]),
            "Weather": random.choice(["Sunny", "Rainy", "Snowy", "Cloudy"]),
            "Lead_Time": random.randint(1, 30),
            "Supplier_Reliability": round(random.uniform(1, 5), 2),
            "Price_Change": random.choice(["Yes", "No"]),
            "Economic_Indicator": round(random.uniform(0.5, 5.0), 2),
            "Customer_Sentiment": random.choice(["Positive", "Neutral", "Negative"]),
        }
        data.append(record)
    df = pd.DataFrame(data)
    
    # Introduce inconsistencies
    # 1. Missing values
    for col in ["Demand", "Stock_Quantity"]:
        df.loc[df.sample(frac=0.1).index, col] = np.nan

    # 2. Outliers
    df.loc[df.sample(frac=0.05).index, "Price"] = df["Price"].max() * 10
    df.loc[df.sample(frac=0.05).index, "Demand"] = df["Demand"].max() * 5

    # 3. Duplicates
    duplicate_rows = df.sample(frac=0.05)
    df = pd.concat([df, duplicate_rows], ignore_index=True)

    # 4. Noisy data
    df.loc[df.sample(frac=0.1).index, "Weather"] = "Unknown"
    df.loc[df.sample(frac=0.1).index, "Sales_Channel"] = "Other"

    return df


In [10]:
# Generate dataset
inventory_data = generate_inventory_data()
# Save the dataset to a CSV file
file_path = "Inventory_Demand_Forecasting_Synthetic_Dataset.csv"
inventory_data.to_csv(file_path, index=False)

In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report

# Load the raw dataset
file_path = "Inventory_Demand_Forecasting_Synthetic_Dataset.csv"
df = pd.read_csv(file_path)

# Separate features and target
X = df.drop(columns=["Demand"])  # Features
y = df["Demand"]                # Target variable

# Handle missing values in the target variable
y = y.fillna(y.mean())  # Fill missing values in the target variable with its mean

# Handle missing values in features
numerical_columns = X.select_dtypes(include=["float64", "int64"]).columns
categorical_columns = X.select_dtypes(include=["object"]).columns

# Impute numerical features with mean
numerical_imputer = SimpleImputer(strategy="mean")
X[numerical_columns] = numerical_imputer.fit_transform(X[numerical_columns])

# Impute categorical features with most frequent value
categorical_imputer = SimpleImputer(strategy="most_frequent")
X[categorical_columns] = categorical_imputer.fit_transform(X[categorical_columns])

# Encode categorical variables using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate Random Forest
y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print("\nRandom Forest:")
print(f"  Mean Squared Error (MSE): {mse_rf:.2f}")
print(f"  R-squared (R²): {r2_rf:.2f}")
print(classification_report(y_test,y_pred_rf))


Random Forest:
  Mean Squared Error (MSE): 102959.37
  R-squared (R²): 0.08


ValueError: continuous is not supported