<a href="https://colab.research.google.com/github/sineka232/data_science/blob/main/E_Commerce_Demand_Forecasting_and_Price_Optimization_Engine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Install necessary Python libraries and the tunneling tool
!pip install streamlit pandas numpy xgboost matplotlib scikit-learn seaborn
!npm install localtunnel

[1G[0Kâ ™[1G[0Kâ ¹[1G[0Kâ ¸[1G[0Kâ ¼[1G[0Kâ ´[1G[0Kâ ¦[1G[0Kâ §[1G[0Kâ ‡[1G[0K
up to date, audited 23 packages in 1s
[1G[0Kâ ‡[1G[0K
[1G[0Kâ ‡[1G[0K3 packages are looking for funding
[1G[0Kâ ‡[1G[0K  run `npm fund` for details
[1G[0Kâ ‡[1G[0K
2 [31m[1mhigh[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
[1G[0Kâ ‡[1G[0K

In [5]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

# Set Streamlit Page Config
st.set_page_config(page_title="Retail Demand AI", layout="wide")

# --- 1. DATA LOADING & PROCESSING ---
@st.cache_data
def load_data():
    # Using a public Retail Price Optimization dataset
    url = "https://raw.githubusercontent.com/Mohshaikh23/Retail-Price-Optimization/master/retail_price.csv"

    try:
        df = pd.read_csv(url)

        # Select relevant columns and rename for clarity
        df = df[['product_id', 'product_category_name', 'month_year', 'qty', 'total_price', 'freight_price', 'unit_price', 'product_score', 'customers', 'weekday', 'weekend']]

        df = df.rename(columns={
            'unit_price': 'Price',
            'qty': 'Demand',
            'product_category_name': 'Category',
            'product_score': 'Rating'
        })

        # Process Date (The raw data is monthly, we will simulate daily for the demo or use as is)
        # For this model, we will treat the index as a time progression
        df['Date'] = pd.to_datetime('2023-01-01') + pd.to_timedelta(df.index, unit='D')

        # Feature Engineering
        df['DayOfWeek'] = df['Date'].dt.dayofweek
        df['Month'] = df['Date'].dt.month
        df['Quarter'] = df['Date'].dt.quarter
        df['DayOfYear'] = df['Date'].dt.dayofyear

        return df

    except Exception as e:
        st.error(f"Error loading data: {e}")
        return pd.DataFrame()

# --- 2. MODEL TRAINING ---
def train_model(df, selected_category):
    # Filter data for the specific category to make the model specific
    if selected_category != "All":
        df = df[df['Category'] == selected_category]

    # Features used for prediction
    features = ['Price', 'Rating', 'DayOfWeek', 'Month', 'Quarter', 'DayOfYear']
    X = df[features]
    y = df['Demand']

    # Split Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # XGBoost Regressor
    model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=200, learning_rate=0.05, max_depth=5)
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    r2 = r2_score(y_test, preds)

    return model, mae, r2, X_test, y_test, preds

# --- 3. DASHBOARD UI ---
st.title("ðŸ›’ AI Demand Forecasting & Price Optimizer")
st.markdown("Using **XGBoost** to predict sales volume based on price changes.")

# Load Data
df = load_data()

# Sidebar Controls
st.sidebar.header("Configuration")
categories = ["All"] + list(df['Category'].unique())
selected_category = st.sidebar.selectbox("Select Product Category", categories)

if st.sidebar.checkbox("Show Raw Data"):
    st.write(df.head())

# Train Model
model, mae, r2, X_test, y_test, preds = train_model(df, selected_category)

# Display Metrics
col1, col2, col3 = st.columns(3)
col1.metric("Model Accuracy (RÂ²)", f"{r2:.2%}")
col2.metric("Avg. Error (MAE)", f"{mae:.1f} Units")
col3.metric("Data Points", len(df))

# --- VISUALIZATIONS ---
st.markdown("---")
c1, c2 = st.columns(2)

with c1:
    st.subheader("ðŸ“‰ Actual vs Predicted Demand")
    # Scatter plot for regression performance
    fig, ax = plt.subplots()
    sns.scatterplot(x=y_test, y=preds, alpha=0.6, color='blue')
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--') # Perfect prediction line
    plt.xlabel("Actual Demand")
    plt.ylabel("Predicted Demand")
    st.pyplot(fig)

with c2:
    st.subheader("ðŸ“Š Feature Importance")
    # Which factors drive demand the most?
    fig2, ax2 = plt.subplots()
    xgb.plot_importance(model, ax=ax2, max_num_features=10, height=0.5)
    st.pyplot(fig2)

# --- WHAT-IF SIMULATOR ---
st.markdown("---")
st.header("ðŸ§ª Price Optimization Simulator")

# Simulation Controls
sim_col1, sim_col2 = st.columns([1, 2])

with sim_col1:
    st.markdown("### Setup Context")
    base_price = st.number_input("Base Unit Price ($)", value=float(df['Price'].mean()))
    product_rating = st.slider("Product Rating (1-5)", 1.0, 5.0, 4.0)
    sim_date = st.date_input("Forecast Date", value=pd.to_datetime("2024-06-01"))

with sim_col2:
    st.markdown("### Price Elasticity Curve")

    # Generate range of prices (-50% to +50%)
    price_range = np.linspace(base_price * 0.5, base_price * 1.5, 50)
    demand_preds = []
    revenue_preds = []

    # Prepare features for the date
    day_of_week = pd.to_datetime(sim_date).dayofweek
    month = pd.to_datetime(sim_date).month
    quarter = pd.to_datetime(sim_date).quarter
    day_of_year = pd.to_datetime(sim_date).dayofyear

    for p in price_range:
        # Create single row dataframe for prediction
        input_data = pd.DataFrame({
            'Price': [p],
            'Rating': [product_rating],
            'DayOfWeek': [day_of_week],
            'Month': [month],
            'Quarter': [quarter],
            'DayOfYear': [day_of_year]
        })
        d = model.predict(input_data)[0]
        # Ensure no negative demand
        d = max(0, d)
        demand_preds.append(d)
        revenue_preds.append(d * p)

    # Find Optimal Price (Max Revenue)
    max_rev_index = np.argmax(revenue_preds)
    optimal_price = price_range[max_rev_index]
    max_revenue = revenue_preds[max_rev_index]

    # Plotting
    fig3, ax3 = plt.subplots(figsize=(10, 4))

    # Plot Demand (Green)
    ax3.plot(price_range, demand_preds, color='green', label='Projected Demand')
    ax3.set_xlabel("Unit Price ($)")
    ax3.set_ylabel("Demand (Qty)", color='green')
    ax3.tick_params(axis='y', labelcolor='green')

    # Plot Revenue (Blue) on twin axis
    ax4 = ax3.twinx()
    ax4.plot(price_range, revenue_preds, color='blue', linestyle='--', label='Projected Revenue')
    ax4.set_ylabel("Revenue ($)", color='blue')
    ax4.tick_params(axis='y', labelcolor='blue')

    # Mark Optimal Price
    ax4.axvline(optimal_price, color='red', linestyle=':', alpha=0.8, label=f'Optimum: ${optimal_price:.2f}')

    plt.title(f"Optimal Price: ${optimal_price:.2f} | Max Revenue: ${max_revenue:.2f}")
    st.pyplot(fig3)

Overwriting app.py


In [7]:
# Cell 3: Run with Stability Flags
print("----------------------------------------------------------")
print("PASSWORD (Copy this IP):")
!wget -q -O - ipv4.icanhazip.com
print("----------------------------------------------------------")

# We added '--server.enableCORS false' and '--server.enableXsrfProtection false'
print("Click the link below named 'your url is'. Paste the IP above as the password.")
!streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false & npx localtunnel --port 8501

----------------------------------------------------------
PASSWORD (Copy this IP):
34.106.135.251
----------------------------------------------------------
Click the link below named 'your url is'. Paste the IP above as the password.
[1G[0Kâ ™[1G[0Kâ ¹[1G[0Kâ ¸[1G[0K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
your url is: https://hot-times-juggle.loca.lt
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.106.135.251:8501[0m
[0m
[34m  Stopping...[0m
^C
