<a href="https://colab.research.google.com/github/prasansree/BusBookingApp/blob/main/ShortageOrder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
import warnings
warnings.filterwarnings('ignore')

# Your data
data_str = '''Id,Status,StartDateTime,DispatchDateTime,Description,PackedQuantity,OrderedQuantity,ShortenedQuantity,OrderlineCount,PackageCount,Version
6220A6103ADFBE45BB6AD7B9068632EE,2,2/2/2026 11:27:00.0000000 AM,2/2/2026 1:27:00.0000000 PM,363600001548092142,0,48,48,38,0,3
013C4F65347C64498B07750F81300CC6,3,2/2/2026 2:12:00.0000000 PM,2/2/2026 4:12:00.0000000 PM,581100001548092157,0,115,0,96,0,2
FC5A488305177B45B553DCD271B47682,3,2/2/2026 2:15:00.0000000 PM,2/2/2026 4:15:00.0000000 PM,583900001548092160,0,127,0,109,0,2
48E5D07D19B50F48B6C143C097291883,2,2/2/2026 10:15:00.0000000 AM,2/2/2026 12:15:00.0000000 PM,301500001548092081,98,100,2,76,13,16
4ECECC7E4A9CD244BEBCCA80B7773DAC,2,2/2/2026 4:30:00.0000000 PM,2/2/2026 6:30:00.0000000 PM,304600001548092094,64,64,0,42,6,9
805D31201E47C74690F3A804A75B4A26,2,2/2/2026 3:36:00.0000000 PM,2/2/2026 5:36:00.0000000 PM,304700001548092095,65,65,0,44,6,9'''

# Load data
df = pd.read_csv(StringIO(data_str))

print("="*100)
print("QUANTITY-RELATED PREDICTIONS - MACHINE LEARNING MODELS")
print("="*100)
print("\nOriginal Data:")
print(df.to_string(index=False))
print("\n" + "="*100 + "\n")

# ==========================================
# FEATURE ENGINEERING
# ==========================================

def create_quantity_features(df):
    """Create features for quantity predictions"""
    df = df.copy()

    # Convert datetime columns
    df['StartDateTime'] = pd.to_datetime(df['StartDateTime'])
    df['DispatchDateTime'] = pd.to_datetime(df['DispatchDateTime'])

    # Time-based features
    df['Start_Hour'] = df['StartDateTime'].dt.hour
    df['Start_DayOfWeek'] = df['StartDateTime'].dt.dayofweek
    df['ProcessingDuration_Minutes'] = (df['DispatchDateTime'] - df['StartDateTime']).dt.total_seconds() / 60

    # Quantity ratios and metrics
    df['Items_Per_Orderline'] = df['OrderedQuantity'] / df['OrderlineCount'].replace(0, 1)
    df['Shortage_Rate'] = df['ShortenedQuantity'] / df['OrderedQuantity'].replace(0, 1)
    df['Fulfillment_Rate'] = df['PackedQuantity'] / df['OrderedQuantity'].replace(0, 1)
    df['Packages_Per_Item'] = df['PackageCount'] / df['OrderedQuantity'].replace(0, 1)
    df['Items_Per_Package'] = df['OrderedQuantity'] / df['PackageCount'].replace(0, 1)

    # Order complexity
    df['Order_Complexity'] = df['OrderlineCount'] * df['Items_Per_Orderline']

    # Binary flags
    df['Has_Shortage'] = (df['ShortenedQuantity'] > 0).astype(int)
    df['Fully_Packed'] = (df['PackedQuantity'] == df['OrderedQuantity']).astype(int)
    df['Has_Packages'] = (df['PackageCount'] > 0).astype(int)

    # Replace inf values with 0
    df = df.replace([np.inf, -np.inf], 0)

    return df

df_processed = create_quantity_features(df)

print("Feature Engineering Complete!")
print("\nKey Metrics:")
print(df_processed[['OrderedQuantity', 'PackedQuantity', 'ShortenedQuantity',
                     'Shortage_Rate', 'Fulfillment_Rate', 'Items_Per_Orderline']].describe())
print("\n" + "="*100 + "\n")

# ==========================================
# PREDICTION 1: SHORTAGE QUANTITY
# ==========================================

print("PREDICTION 1: PREDICTING SHORTAGE QUANTITY")
print("-" * 100)

# Features for shortage prediction
shortage_features = [
    'Status', 'OrderedQuantity', 'OrderlineCount', 'Version',
    'Start_Hour', 'Start_DayOfWeek', 'Items_Per_Orderline', 'Order_Complexity'
]

X_shortage = df_processed[shortage_features].fillna(0)
y_shortage = df_processed['ShortenedQuantity']

print(f"\nDataset: {len(X_shortage)} samples")
print(f"Features: {shortage_features}")
print(f"\nTarget Statistics (ShortenedQuantity):")
print(f"  Mean: {y_shortage.mean():.2f}")
print(f"  Std:  {y_shortage.std():.2f}")
print(f"  Min:  {y_shortage.min():.0f}")
print(f"  Max:  {y_shortage.max():.0f}")

# Train models
ml_model = DecisionTreeRegressor(max_depth=3, random_state=42)

print("\n" + "-" * 100)
print("Model Performance:")
print("-" * 100)

ml_model.fit(X_shortage, y_shortage)
y_pred = ml_model.predict(X_shortage)
# Ensure non-negative predictions
y_pred = np.maximum(y_pred, 0)

mae = mean_absolute_error(y_shortage, y_pred)
rmse = np.sqrt(mean_squared_error(y_shortage, y_pred))
r2 = r2_score(y_shortage, y_pred)

shortage_result = {
        'model': ml_model,
        'predictions': y_pred,
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }

print(f"\n{ml_model}:")
print(f"  MAE:  {mae:.2f} units")
print(f"  RMSE: {rmse:.2f} units")
print(f"  R²:   {r2:.4f}")

print("\n" + "="*100 + "\n")

best_shortage_model = ml_model
print(f" best_shortage_model: {best_shortage_model}")

def predict_quantities(ordered_qty, orderline_count, status=2, version=1,
                       start_hour=10, packed_qty=None):
    """
    Predict all quantity-related metrics for a new order

    Parameters:
    -----------
    ordered_qty : int
        Ordered quantity
    orderline_count : int
        Number of order lines
    status : int
        Order status (default: 2)
    version : int
        Version number (default: 1)
    start_hour : int
        Hour of day order started (default: 10)
    packed_qty : int, optional
        Packed quantity if known (for package prediction)

    Returns:
    --------
    dict : All quantity predictions
    """

    # Calculate derived features
    items_per_orderline = ordered_qty / max(orderline_count, 1)
    order_complexity = orderline_count * items_per_orderline
    start_dayofweek = 0  # Monday default

    # Shortage prediction
    X_shortage_new = pd.DataFrame([{
        'Status': status,
        'OrderedQuantity': ordered_qty,
        'OrderlineCount': orderline_count,
        'Version': version,
        'Start_Hour': start_hour,
        'Start_DayOfWeek': start_dayofweek,
        'Items_Per_Orderline': items_per_orderline,
        'Order_Complexity': order_complexity
    }])[shortage_features]

    predicted_shortage = max(0, shortage_result['model'].predict(X_shortage_new)[0])


    return {
        'ordered_quantity': ordered_qty,
        'predicted_shortage': round(predicted_shortage, 2),
        'models_used': {
            'shortage': best_shortage_model,
        }
    }

# ==========================================
# EXAMPLE PREDICTIONS
# ==========================================

print("\n" + "-" * 100)
print("EXAMPLE 1: Small Order")
print("-" * 100)

example1 = predict_quantities(
    ordered_qty=50,
    orderline_count=35,
    status=2,
    version=2,
    start_hour=9
)

print(f"\nInput:")
print(f"  Ordered Quantity: {example1['ordered_quantity']}")
print(f"\nPredictions:")
print(f"  Shortage Amount:        {example1['predicted_shortage']:.2f} units")

print("\n" + "-" * 100)
print("EXAMPLE 2: Large Order")
print("-" * 100)

example2 = predict_quantities(
    ordered_qty=200,
    orderline_count=150,
    status=2,
    version=5,
    start_hour=14
)

print(f"\nInput:")
print(f"  Ordered Quantity: {example2['ordered_quantity']}")
print(f"\nPredictions:")
print(f"  Shortage Amount:        {example2['predicted_shortage']:.2f} units")

print("\n" + "="*100)
print("\nIMPORTANT NOTE:")
print("With only 6 training samples, these predictions are for DEMONSTRATION purposes.")
print("For reliable production predictions, collect at least 100-1000 historical orders.")
print("="*100)

QUANTITY-RELATED PREDICTIONS - MACHINE LEARNING MODELS

Original Data:
                              Id  Status                StartDateTime             DispatchDateTime        Description  PackedQuantity  OrderedQuantity  ShortenedQuantity  OrderlineCount  PackageCount  Version
6220A6103ADFBE45BB6AD7B9068632EE       2 2/2/2026 11:27:00.0000000 AM  2/2/2026 1:27:00.0000000 PM 363600001548092142               0               48                 48              38             0        3
013C4F65347C64498B07750F81300CC6       3  2/2/2026 2:12:00.0000000 PM  2/2/2026 4:12:00.0000000 PM 581100001548092157               0              115                  0              96             0        2
FC5A488305177B45B553DCD271B47682       3  2/2/2026 2:15:00.0000000 PM  2/2/2026 4:15:00.0000000 PM 583900001548092160               0              127                  0             109             0        2
48E5D07D19B50F48B6C143C097291883       2 2/2/2026 10:15:00.0000000 AM 2/2/2026 12:15:00.00000