In [1]:
# Inventory Demand Forecasting GUI with Gradio (with CSV Upload)
# Google Colab Implementation with User-Uploaded Dataset

# Step 1: Install required libraries
!pip install gradio pandas numpy scikit-learn matplotlib seaborn joblib -q

# Step 2: Import libraries
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import joblib
from datetime import datetime
import io

In [2]:
# Step 3: Download and load dataset from Kaggle
# Note: You need to upload your Kaggle API key (kaggle.json) to Colab
# Follow these steps:
# 1. Go to Kaggle > Your Profile > Account > Create New API Token
# 2. Download kaggle.json and upload it to Colab

from google.colab import files
files.upload()  # Upload kaggle.json

# Set up Kaggle API
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json


In [3]:
# Download dataset
!kaggle datasets download -d mohammadtalib786/retail-sales-dataset -p /content
!unzip /content/retail-sales-dataset.zip -d /content/retail-data

Dataset URL: https://www.kaggle.com/datasets/mohammadtalib786/retail-sales-dataset
License(s): CC0-1.0
Downloading retail-sales-dataset.zip to /content
  0% 0.00/11.2k [00:00<?, ?B/s]
100% 11.2k/11.2k [00:00<00:00, 47.9MB/s]
Archive:  /content/retail-sales-dataset.zip
  inflating: /content/retail-data/retail_sales_dataset.csv  


In [4]:
data=pd.read_csv('/content/retail-data/retail_sales_dataset.csv')

In [6]:
# Step 3: Load and preprocess data
def load_and_preprocess_data(file):
    try:
        # Handle file input (Gradio File component returns a file object)
        if isinstance(file, str):
            data = pd.read_csv(file)
        else:
            data = pd.read_csv(io.StringIO(file.read().decode('utf-8')))

        # Validate required columns
        required_columns = ['Date', 'Product Category', 'Quantity', 'Price per Unit', 'Age', 'Gender', 'Total Amount']
        missing_columns = [col for col in required_columns if col not in data.columns]
        if missing_columns:
            return None, None, f"Error: CSV missing required columns: {', '.join(missing_columns)}"

        # Convert Date to datetime
        data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
        if data['Date'].isna().any():
            return None, None, "Error: Invalid or missing dates in 'Date' column"

        # Feature engineering
        data['Year'] = data['Date'].dt.year
        data['Month'] = data['Date'].dt.month
        data['WeekOfYear'] = data['Date'].dt.isocalendar().week
        data['DayOfWeek'] = data['Date'].dt.dayofweek
        data['IsWeekend'] = data['DayOfWeek'].isin([5, 6]).astype(int)
        data['DaysFromStart'] = (data['Date'] - data['Date'].min()).dt.days

        # Aggregate to weekly level by product category
        weekly_data = data.groupby(['Date', 'Product Category']).agg({
            'Quantity': 'sum',
            'Total Amount': 'sum',
            'Price per Unit': 'mean',
            'Age': 'mean',
            'Gender': lambda x: x.mode()[0]
        }).reset_index()

        # Add time features to aggregated data
        weekly_data['Year'] = weekly_data['Date'].dt.year
        weekly_data['Month'] = weekly_data['Date'].dt.month
        weekly_data['WeekOfYear'] = weekly_data['Date'].dt.isocalendar().week
        weekly_data['DayOfWeek'] = weekly_data['Date'].dt.dayofweek
        weekly_data['IsWeekend'] = weekly_data['DayOfWeek'].isin([5, 6]).astype(int)
        weekly_data['DaysFromStart'] = (weekly_data['Date'] - weekly_data['Date'].min()).dt.days

        return data, weekly_data, "Dataset loaded successfully!"
    except Exception as e:
        return None, None, f"Error loading CSV: {str(e)}"

In [7]:
# Step 4: Train model
def train_model(weekly_data):
    try:
        X = weekly_data.drop(['Quantity', 'Date', 'Total Amount'], axis=1)
        y = weekly_data['Quantity']

        # Split data
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, shuffle=False
        )

        # Preprocessing pipeline
        numeric_features = ['Price per Unit', 'Age', 'Year', 'Month', 'WeekOfYear', 'DayOfWeek', 'IsWeekend', 'DaysFromStart']
        categorical_features = ['Product Category', 'Gender']

        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ])

        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ])

        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_features),
                ('cat', categorical_transformer, categorical_features)
            ])

        # Train Random Forest model
        model = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('regressor', RandomForestRegressor(n_estimators=200, random_state=42, n_jobs=-1))
        ])
        model.fit(X_train, y_train)

        # Save model
        joblib.dump(model, 'inventory_model_retail.pkl')
        return model, X_train, X_test, y_train, y_test, "Model trained successfully!"
    except Exception as e:
        return None, None, None, None, None, f"Error training model: {str(e)}"

In [8]:
# Step 5: Prediction function
def predict_demand(date, product_category, price_per_unit, age, gender, model, weekly_data):
    try:
        date = pd.to_datetime(date)
        features = pd.DataFrame({
            'Product Category': [product_category],
            'Price per Unit': [float(price_per_unit)],
            'Age': [float(age)],
            'Gender': [gender],
            'Year': [date.year],
            'Month': [date.month],
            'WeekOfYear': [date.isocalendar().week],
            'DayOfWeek': [date.dayofweek],
            'IsWeekend': [1 if date.dayofweek in [5, 6] else 0],
            'DaysFromStart': [(date - weekly_data['Date'].min()).days]
        })
        prediction = model.predict(features)[0]
        return f"Predicted weekly quantity for {product_category} on {date.strftime('%Y-%m-%d')}: {prediction:.2f} units"
    except Exception as e:
        return f"Error in prediction: {str(e)}"


In [9]:
# Step 6: Visualization functions
def plot_historical_trend(product_category, weekly_data):
    if weekly_data is None:
        return None
    plt.figure(figsize=(10, 5))
    cat_data = weekly_data[weekly_data['Product Category'] == product_category]
    plt.plot(cat_data['Date'], cat_data['Quantity'], label=product_category, color='teal')
    plt.title(f'Historical Quantity Sold for {product_category}')
    plt.xlabel('Date')
    plt.ylabel('Quantity')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.xticks(rotation=45)
    plt.tight_layout()
    return plt.gcf()

def plot_feature_importance(model):
    if model is None:
        return None
    rf_model = model.named_steps['regressor']
    feature_names = ['Price per Unit', 'Age', 'Year', 'Month', 'WeekOfYear', 'DayOfWeek', 'IsWeekend', 'DaysFromStart'] + \
                    list(model.named_steps['preprocessor'].transformers_[1][1].named_steps['onehot'].get_feature_names_out(['Product Category', 'Gender']))
    importances = rf_model.feature_importances_

    feature_importance = pd.DataFrame({
        'Feature': feature_names,
        'Importance': importances
    }).sort_values('Importance', ascending=False)

    plt.figure(figsize=(10, 5))
    sns.barplot(x='Importance', y='Feature', data=feature_importance, palette='viridis')
    plt.title('Feature Importance')
    plt.tight_layout()
    return plt.gcf()

def plot_future_forecast(model, weekly_data):
    if model is None or weekly_data is None:
        return None
    last_date = weekly_data['Date'].max()
    future_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=28, freq='W')
    forecast = []

    for date in future_dates:
        for category in weekly_data['Product Category'].unique():
            features = pd.DataFrame({
                'Product Category': [category],
                'Price per Unit': [50],
                'Age': [40],
                'Gender': ['Male'],
                'Year': [date.year],
                'Month': [date.month],
                'WeekOfYear': [date.isocalendar().week],
                'DayOfWeek': [date.dayofweek],
                'IsWeekend': [1 if date.dayofweek in [5, 6] else 0],
                'DaysFromStart': [(date - weekly_data['Date'].min()).days]
            })
            forecast.append({
                'Date': date,
                'Product Category': category,
                'Predicted_Quantity': model.predict(features)[0]
            })

    forecast_df = pd.DataFrame(forecast)

    plt.figure(figsize=(10, 5))
    for category in forecast_df['Product Category'].unique():
        cat_data = forecast_df[forecast_df['Product Category'] == category]
        plt.plot(cat_data['Date'], cat_data['Predicted_Quantity'], '--o', label=category)
    plt.title('4-Week Quantity Forecast by Product Category')
    plt.xlabel('Date')
    plt.ylabel('Predicted Quantity')
    plt.legend()
    plt.grid(alpha=0.3)
    plt.xticks(rotation=45)
    plt.tight_layout()
    return plt.gcf()

In [10]:
# Step 7: Process uploaded file and update interface
def process_uploaded_file(file):
    global weekly_data, model
    data, weekly_data, message = load_and_preprocess_data(file)
    if data is None:
        return None, None, None, message, []

    model, _, _, _, _, train_message = train_model(weekly_data)
    if model is None:
        return None, None, None, train_message, []

    # Get unique product categories for dropdown
    categories = weekly_data['Product Category'].unique().tolist()

    return (
        plot_historical_trend(categories[0], weekly_data),
        plot_feature_importance(model),
        plot_future_forecast(model, weekly_data),
        f"{message}\n{train_message}",
        categories
    )

In [11]:
# Step 8: Custom CSS for styling and animations
custom_css = """
<style>
/* General layout */
.gradio-container {
    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
    font-family: 'Arial', sans-serif;
}

/* Title styling */
h1 {
    color: #2c3e50;
    text-align: center;
    font-size: 2.5em;
    margin-bottom: 20px;
    animation: fadeIn 1s ease-in;
}

/* Input components */
.gr-input, .gr-select, .gr-slider, .gr-file {
    background-color: white;
    border-radius: 8px;
    padding: 10px;
    margin: 10px 0;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    transition: transform 0.3s ease;
}

.gr-input:hover, .gr-select:hover, .gr-slider:hover, .gr-file:hover {
    transform: scale(1.02);
}

/* Button styling */
.gr-button {
    background-color: #3498db;
    color: white;
    border: none;
    padding: 12px 24px;
    border-radius: 8px;
    font-size: 1.1em;
    cursor: pointer;
    transition: background-color 0.3s ease, transform 0.2s ease;
}

.gr-button:hover {
    background-color: #2980b9;
    transform: translateY(-2px);
}

/* Output area */
.output-text {
    background-color: #ecf0f1;
    padding: 15px;
    border-radius: 8px;
    font-size: 1.2em;
    color: #2c3e50;
    animation: slideIn 0.5s ease;
}

/* Plot containers */
.output-image {
    background-color: white;
    padding: 10px;
    border-radius: 8px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
    margin-top: 20px;
}

/* Animations */
@keyframes fadeIn {
    0% { opacity: 0; }
    100% { opacity: 1; }
}

@keyframes slideIn {
    0% { transform: translateY(20px); opacity: 0; }
    100% { transform: translateY(0); opacity: 1; }
}

/* Responsive design */
@media (max-width: 768px) {
    h1 {
        font-size: 2em;
    }
    .gr-input, .gr-select, .gr-slider, .gr-file {
        padding: 8px;
    }
    .gr-button {
        padding: 10px 20px;
    }
}
</style>
"""



In [12]:
# Step 9: Gradio Interface
with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("# Inventory Demand Forecasting System")
    gr.Markdown("Upload a CSV file with retail sales data to predict weekly demand and visualize trends.")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Upload Dataset")
            file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
            upload_status = gr.Textbox(label="Upload Status")

            gr.Markdown("### Input Parameters")
            date_input = gr.Textbox(label="Date (YYYY-MM-DD)", value="2024-01-01")
            category_input = gr.Dropdown(
                choices=['Beauty', 'Clothing', 'Electronics'],
                label="Product Category",
                value="Beauty"
            )
            price_input = gr.Slider(0, 500, value=50, step=1, label="Price per Unit ($)")
            age_input = gr.Slider(18, 64, value=40, step=1, label="Customer Age")
            gender_input = gr.Dropdown(choices=['Male', 'Female'], label="Customer Gender", value="Male")
            predict_button = gr.Button("Predict Demand")

        with gr.Column(scale=2):
            gr.Markdown("### Prediction Output")
            prediction_output = gr.Textbox(label="Prediction Result")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Historical Sales Trend")
            category_trend = gr.Dropdown(
                choices=['Beauty', 'Clothing', 'Electronics'],
                label="Select Product Category for Trend",
                value="Beauty"
            )
            trend_plot = gr.Plot(label="Historical Trend")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Feature Importance")
            feature_plot = gr.Plot(label="Feature Importance")

        with gr.Column():
            gr.Markdown("### 4-Week Forecast")
            forecast_plot = gr.Plot(label="Future Forecast")

    # Bind functions to inputs
    file_input.change(
        fn=process_uploaded_file,
        inputs=[file_input],
        outputs=[trend_plot, feature_plot, forecast_plot, upload_status, category_input]
    )

    predict_button.click(
        fn=lambda date, category, price, age, gender: predict_demand(date, category, price, age, gender, model, weekly_data),
        inputs=[date_input, category_input, price_input, age_input, gender_input],
        outputs=prediction_output
    )

    category_trend.change(
        fn=lambda category: plot_historical_trend(category, weekly_data),
        inputs=[category_trend],
        outputs=trend_plot
    )

    demo.load(
        fn=lambda: [
            None,
            None,
            None,
            "Please upload a CSV file to begin.",
            ['Beauty', 'Clothing', 'Electronics']
        ],
        outputs=[trend_plot, feature_plot, forecast_plot, upload_status, category_input]
    )

# Step 10: Launch the interface
demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f6cc01c22731ff75e5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


