### Overall Sequence of Table Creation
- 1. Calendar
- 2. Suppliers
- 3. Customers
- 4. Products
- 5. Warehouses
- 6. Stores
- 7. Manufacturing
- 8. Inventory Records
- 9. Production Schedule
- 10. Sales Orders and Order Line Items
- 11. Purchase Orders and Purchase Order Line Items
- 12. Material Requirements
- 13. Forecast


## Setup

In [1]:
import pandas as pd
import numpy as np
import random
from datetime import timedelta
from itertools import product

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)


## 1. Calendar

In [2]:
# Create date range
start_date = '2022-01-01'
end_date = '2023-12-31'
date_range = pd.date_range(start=start_date, end=end_date)

# List of holidays
holidays = pd.to_datetime([
    '2022-01-01', '2022-12-25', '2022-07-04',
    '2023-01-01', '2023-12-25', '2023-07-04'
])

# Create calendar DataFrame
calendar_df = pd.DataFrame({'date': date_range})
calendar_df['day_of_week'] = calendar_df['date'].dt.day_name()
calendar_df['week_number'] = calendar_df['date'].dt.isocalendar().week
calendar_df['month'] = calendar_df['date'].dt.month_name()
calendar_df['quarter'] = calendar_df['date'].dt.quarter
calendar_df['year'] = calendar_df['date'].dt.year
calendar_df['is_weekend'] = calendar_df['day_of_week'].isin(['Saturday', 'Sunday'])
calendar_df['is_holiday'] = calendar_df['date'].isin(holidays)
calendar_df['is_working_day'] = ~calendar_df['is_weekend'] & ~calendar_df['is_holiday']

# Add maintenance days (e.g., first Monday of each quarter)
maintenance_days = calendar_df[
    (calendar_df['day_of_week'] == 'Monday') &
    (calendar_df['date'].dt.is_month_start)
]['date'].tolist()
calendar_df['is_maintenance_day'] = calendar_df['date'].isin(maintenance_days)
calendar_df['is_working_day'] = calendar_df['is_working_day'] & ~calendar_df['is_maintenance_day']

# Add notes
calendar_df['notes'] = ''
calendar_df.loc[calendar_df['is_holiday'], 'notes'] = 'Holiday'
calendar_df.loc[calendar_df['is_maintenance_day'], 'notes'] = 'Maintenance Day'

# Reset index
calendar_df.reset_index(drop=True, inplace=True)



In [3]:
# Display the calendar DataFrame
print(f"Calendar DataFrame shape: {calendar_df.shape}")
calendar_df


Calendar DataFrame shape: (730, 11)


Unnamed: 0,date,day_of_week,week_number,month,quarter,year,is_weekend,is_holiday,is_working_day,is_maintenance_day,notes
0,2022-01-01,Saturday,52,January,1,2022,True,True,False,False,Holiday
1,2022-01-02,Sunday,52,January,1,2022,True,False,False,False,
2,2022-01-03,Monday,1,January,1,2022,False,False,True,False,
3,2022-01-04,Tuesday,1,January,1,2022,False,False,True,False,
4,2022-01-05,Wednesday,1,January,1,2022,False,False,True,False,
...,...,...,...,...,...,...,...,...,...,...,...
725,2023-12-27,Wednesday,52,December,4,2023,False,False,True,False,
726,2023-12-28,Thursday,52,December,4,2023,False,False,True,False,
727,2023-12-29,Friday,52,December,4,2023,False,False,True,False,
728,2023-12-30,Saturday,52,December,4,2023,True,False,False,False,


## 2. Suppliers

In [4]:
# Number of suppliers
num_suppliers = 30

# Generate supplier IDs
supplier_ids = range(1, num_suppliers + 1)

# Sample data
supplier_names = [f"Supplier_{i}" for i in supplier_ids]
contact_names = [f"Contact_{i}" for i in supplier_ids]
phone_numbers = [f"+1-555-{random.randint(1000,9999)}" for _ in supplier_ids]
emails = [f"contact{i}@supplier.com" for i in supplier_ids]
countries = ['USA', 'Canada', 'China', 'Vietnam', 'Bangladesh', 'Germany']
cities = ['New York', 'Toronto', 'Shanghai', 'Hanoi', 'Dhaka', 'Berlin']

# Simulate supplier disruptions (e.g., 10% chance)
supplier_disruption_flags = np.random.choice([True, False], size=num_suppliers, p=[0.1, 0.9])

# Create DataFrame
suppliers_df = pd.DataFrame({
    'supplier_id': supplier_ids,
    'supplier_name': supplier_names,
    'contact_name': contact_names,
    'phone_number': phone_numbers,
    'email': emails,
    'address': [f"{random.randint(100,999)} Main St" for _ in supplier_ids],
    'city': random.choices(cities, k=num_suppliers),
    'state_province': ['State']*num_suppliers,
    'postal_code': [f"{random.randint(10000,99999)}" for _ in supplier_ids],
    'country': random.choices(countries, k=num_suppliers),
    'lead_time_days': np.random.randint(5, 30, size=num_suppliers),
    'lead_time_variability': np.random.randint(1, 5, size=num_suppliers),
    'on_time_delivery_rate': np.random.uniform(0.7, 1.0, size=num_suppliers).round(2),
    'minimum_order_quantity': np.random.randint(100, 1000, size=num_suppliers),
    'payment_terms': ['Net 30']*num_suppliers,
    'last_inspection_date': pd.to_datetime('2023-01-01') - pd.to_timedelta(np.random.randint(1, 365, size=num_suppliers), unit='d'),
    'sustainability_score': np.random.randint(1, 5, size=num_suppliers),
    'certifications': ['ISO 9001']*num_suppliers,
    'preferred_supplier': np.random.choice([True, False], size=num_suppliers),
    'supplier_rating': np.random.uniform(3, 5, size=num_suppliers).round(2),
    'is_disrupted': supplier_disruption_flags
})

# Reset index
suppliers_df.reset_index(drop=True, inplace=True)



In [5]:
# Display the suppliers DataFrame
print(f"Suppliers DataFrame shape: {suppliers_df.shape}")
suppliers_df

Suppliers DataFrame shape: (30, 21)


Unnamed: 0,supplier_id,supplier_name,contact_name,phone_number,email,address,city,state_province,postal_code,country,...,lead_time_variability,on_time_delivery_rate,minimum_order_quantity,payment_terms,last_inspection_date,sustainability_score,certifications,preferred_supplier,supplier_rating,is_disrupted
0,1,Supplier_1,Contact_1,+1-555-2824,contact1@supplier.com,881 Main St,New York,State,84341,Bangladesh,...,4,0.99,602,Net 30,2022-07-25,4,ISO 9001,True,4.46,False
1,2,Supplier_2,Contact_2,+1-555-1409,contact2@supplier.com,444 Main St,Toronto,State,51245,China,...,2,0.88,866,Net 30,2022-02-08,3,ISO 9001,False,3.74,False
2,3,Supplier_3,Contact_3,+1-555-5506,contact3@supplier.com,204 Main St,Toronto,State,37869,USA,...,1,0.78,497,Net 30,2022-06-28,1,ISO 9001,True,4.26,False
3,4,Supplier_4,Contact_4,+1-555-5012,contact4@supplier.com,194 Main St,New York,State,95909,China,...,3,0.79,970,Net 30,2022-05-03,2,ISO 9001,True,4.27,False
4,5,Supplier_5,Contact_5,+1-555-4657,contact5@supplier.com,489 Main St,Toronto,State,75435,Germany,...,2,0.75,894,Net 30,2022-10-07,1,ISO 9001,True,4.07,False
5,6,Supplier_6,Contact_6,+1-555-3286,contact6@supplier.com,199 Main St,New York,State,61856,Vietnam,...,2,0.7,492,Net 30,2022-03-23,1,ISO 9001,False,3.18,False
6,7,Supplier_7,Contact_7,+1-555-2679,contact7@supplier.com,467 Main St,Toronto,State,94259,Germany,...,4,0.83,306,Net 30,2022-10-27,3,ISO 9001,False,4.67,True
7,8,Supplier_8,Contact_8,+1-555-9935,contact8@supplier.com,967 Main St,Hanoi,State,70142,Germany,...,2,0.82,114,Net 30,2022-07-15,2,ISO 9001,False,3.64,False
8,9,Supplier_9,Contact_9,+1-555-2424,contact9@supplier.com,452 Main St,Shanghai,State,28726,USA,...,2,0.79,957,Net 30,2022-11-17,4,ISO 9001,False,3.37,False
9,10,Supplier_10,Contact_10,+1-555-7912,contact10@supplier.com,718 Main St,Shanghai,State,44718,Bangladesh,...,2,0.7,653,Net 30,2022-10-31,4,ISO 9001,True,3.08,False


## 3. Customers

In [6]:
# Number of customers
num_customers = 2000

# Generate customer IDs
customer_ids = range(1, num_customers + 1)

# Sample data
customer_names = [f"Customer_{i}" for i in customer_ids]
contact_names = [f"Contact_{i}" for i in customer_ids]
customer_types = random.choices(['Retail', 'Wholesale', 'E-commerce'], weights=[0.6, 0.2, 0.2], k=num_customers)
phone_numbers = [f"+1-555-{random.randint(1000,9999)}" for _ in customer_ids]
emails = [f"contact{i}@customer.com" for i in customer_ids]
countries = ['USA', 'Canada']
cities = ['New York', 'Los Angeles', 'Chicago', 'Toronto', 'Vancouver']

# Assign payment terms and credit limits based on customer type
payment_terms_list = []
credit_limit_list = []

for ctype in customer_types:
    if ctype == 'Retail':
        payment_terms_list.append('Prepaid')
        credit_limit_list.append(0)
    elif ctype == 'Wholesale':
        payment_terms_list.append('Net 60')
        credit_limit_list.append(np.random.randint(10000, 50000))
    else:  # E-commerce
        payment_terms_list.append('Prepaid')
        credit_limit_list.append(0)

# Create DataFrame
customers_df = pd.DataFrame({
    'customer_id': customer_ids,
    'customer_name': customer_names,
    'customer_type': customer_types,
    'contact_name': contact_names,
    'phone_number': phone_numbers,
    'email': emails,
    'billing_address': [f"{random.randint(100,999)} Market St" for _ in customer_ids],
    'shipping_address': [f"{random.randint(100,999)} Market St" for _ in customer_ids],
    'city': random.choices(cities, k=num_customers),
    'state_province': ['State']*num_customers,
    'postal_code': [f"{random.randint(10000,99999)}" for _ in customer_ids],
    'country': random.choices(countries, k=num_customers),
    'payment_terms': payment_terms_list,
    'credit_limit': credit_limit_list,
    'account_manager': [f"Manager_{random.randint(1,5)}" for _ in customer_ids],
    'customer_segment': random.choices(['Outdoor Enthusiast', 'Casual Shopper', 'Professional'], k=num_customers),
    'preferred_customer': np.random.choice([True, False], size=num_customers),
    'date_created': pd.to_datetime('2022-01-01') + pd.to_timedelta(np.random.randint(0, 365, size=num_customers), unit='d')
})

# Reset index
customers_df.reset_index(drop=True, inplace=True)




In [7]:
# Display the customers DataFrame
print(f"Customers DataFrame shape: {customers_df.shape}")
customers_df

Customers DataFrame shape: (2000, 18)


Unnamed: 0,customer_id,customer_name,customer_type,contact_name,phone_number,email,billing_address,shipping_address,city,state_province,postal_code,country,payment_terms,credit_limit,account_manager,customer_segment,preferred_customer,date_created
0,1,Customer_1,Wholesale,Contact_1,+1-555-1194,contact1@customer.com,370 Market St,803 Market St,New York,State,87777,Canada,Net 60,12049,Manager_5,Professional,True,2022-01-27
1,2,Customer_2,Retail,Contact_2,+1-555-1274,contact2@customer.com,271 Market St,968 Market St,Vancouver,State,66861,Canada,Prepaid,0,Manager_3,Professional,True,2022-06-06
2,3,Customer_3,Retail,Contact_3,+1-555-9673,contact3@customer.com,841 Market St,708 Market St,Los Angeles,State,29473,USA,Prepaid,0,Manager_3,Casual Shopper,False,2022-08-27
3,4,Customer_4,Retail,Contact_4,+1-555-5501,contact4@customer.com,802 Market St,992 Market St,New York,State,92178,Canada,Prepaid,0,Manager_4,Casual Shopper,True,2022-06-05
4,5,Customer_5,Retail,Contact_5,+1-555-9777,contact5@customer.com,450 Market St,189 Market St,Los Angeles,State,62952,USA,Prepaid,0,Manager_2,Casual Shopper,False,2022-10-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,1996,Customer_1996,Wholesale,Contact_1996,+1-555-4594,contact1996@customer.com,634 Market St,475 Market St,Vancouver,State,90264,Canada,Net 60,18702,Manager_1,Outdoor Enthusiast,False,2022-09-29
1996,1997,Customer_1997,Retail,Contact_1997,+1-555-7730,contact1997@customer.com,218 Market St,457 Market St,New York,State,26116,USA,Prepaid,0,Manager_1,Casual Shopper,False,2022-02-28
1997,1998,Customer_1998,Retail,Contact_1998,+1-555-3338,contact1998@customer.com,104 Market St,928 Market St,New York,State,66814,Canada,Prepaid,0,Manager_2,Professional,True,2022-10-14
1998,1999,Customer_1999,Retail,Contact_1999,+1-555-3979,contact1999@customer.com,418 Market St,859 Market St,Vancouver,State,69641,USA,Prepaid,0,Manager_2,Casual Shopper,False,2022-07-21


## 4. Products

In [8]:
# Number of products
num_products = 300

# Categories and attributes
categories = ['Apparel', 'Equipment']
sub_categories_apparel = ['Jackets', 'Pants', 'Shirts']
sub_categories_equipment = ['Backpacks', 'Tents', 'Sleeping Bags']
sizes = ['S', 'M', 'L', 'XL']
colors = ['Red', 'Blue', 'Green', 'Black', 'Gray']
materials = ['Nylon', 'Polyester', 'Cotton']
genders = ['Male', 'Female', 'Unisex']
seasons = ['Spring', 'Summer', 'Fall', 'Winter']

# Generate product IDs
product_ids = range(1, num_products + 1)

# Initialize lists
product_list = []
bom_list = []
component_id_counter = 1000  # Start IDs for components

for i in product_ids:
    category = random.choice(categories)
    if category == 'Apparel':
        sub_category = random.choice(sub_categories_apparel)
        size = random.choice(sizes)
        components_needed = ['Fabric', 'Thread', 'Buttons', 'Zipper']
    else:
        sub_category = random.choice(sub_categories_equipment)
        size = ''
        components_needed = ['Material', 'Straps', 'Buckles']
    product = {
        'product_id': i,
        'product_name': f"{sub_category} {i}",
        'category': category,
        'sub_category': sub_category,
        'brand': 'OutdoorBrand',
        'description': f"High-quality {sub_category.lower()} for outdoor activities.",
        'sku': f"SKU{i:05d}",
        'upc': f"{random.randint(100000000000,999999999999)}",
        'price': round(random.uniform(50, 500), 2),
        'cost_price': round(random.uniform(30, 300), 2),
        'size': size,
        'color': random.choice(colors),
        'material': random.choice(materials),
        'weight': round(random.uniform(0.5, 5), 2),
        'dimensions': '',
        'season': random.choice(seasons) + ' 2023',
        'gender': random.choice(genders),
        'launch_date': pd.to_datetime('2022-01-01') + pd.to_timedelta(random.randint(0, 365), unit='d'),
        'discontinue_date': pd.NaT,
        'tax_class': 'Standard',
        'status': 'Active'
    }
    product_list.append(product)
    
    # Create BOM for product
    for component in components_needed:
        bom_entry = {
            'product_id': i,
            'component_id': component_id_counter,
            'component_name': component,
            'quantity_required': random.randint(1, 5)
        }
        bom_list.append(bom_entry)
        component_id_counter += 1

# Create DataFrames
products_df = pd.DataFrame(product_list)
bom_df = pd.DataFrame(bom_list)

# Reset index
products_df.reset_index(drop=True, inplace=True)
bom_df.reset_index(drop=True, inplace=True)




In [9]:
# Display the products and BOM DataFrames
print(f"Products DataFrame shape: {products_df.shape}")
print(f"BOM DataFrame shape: {bom_df.shape}")



Products DataFrame shape: (300, 21)
BOM DataFrame shape: (1058, 4)


In [10]:
products_df

Unnamed: 0,product_id,product_name,category,sub_category,brand,description,sku,upc,price,cost_price,...,color,material,weight,dimensions,season,gender,launch_date,discontinue_date,tax_class,status
0,1,Jackets 1,Apparel,Jackets,OutdoorBrand,High-quality jackets for outdoor activities.,SKU00001,733576897548,138.27,298.43,...,Red,Cotton,2.89,,Spring 2023,Female,2022-11-26,NaT,Standard,Active
1,2,Backpacks 2,Equipment,Backpacks,OutdoorBrand,High-quality backpacks for outdoor activities.,SKU00002,498864110266,390.01,135.34,...,Gray,Polyester,3.59,,Spring 2023,Male,2022-12-11,NaT,Standard,Active
2,3,Shirts 3,Apparel,Shirts,OutdoorBrand,High-quality shirts for outdoor activities.,SKU00003,511683273707,170.75,79.29,...,Gray,Polyester,2.57,,Winter 2023,Male,2022-03-13,NaT,Standard,Active
3,4,Shirts 4,Apparel,Shirts,OutdoorBrand,High-quality shirts for outdoor activities.,SKU00004,687051272299,382.71,258.62,...,Black,Nylon,4.40,,Summer 2023,Unisex,2022-10-21,NaT,Standard,Active
4,5,Backpacks 5,Equipment,Backpacks,OutdoorBrand,High-quality backpacks for outdoor activities.,SKU00005,850264082581,462.20,80.38,...,Green,Cotton,4.95,,Winter 2023,Male,2022-06-25,NaT,Standard,Active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,296,Backpacks 296,Equipment,Backpacks,OutdoorBrand,High-quality backpacks for outdoor activities.,SKU00296,528398973780,492.33,263.70,...,Gray,Polyester,1.42,,Summer 2023,Female,2022-02-10,NaT,Standard,Active
296,297,Pants 297,Apparel,Pants,OutdoorBrand,High-quality pants for outdoor activities.,SKU00297,864032049666,340.56,87.78,...,Black,Nylon,0.99,,Spring 2023,Male,2022-04-09,NaT,Standard,Active
297,298,Shirts 298,Apparel,Shirts,OutdoorBrand,High-quality shirts for outdoor activities.,SKU00298,152463507577,263.70,70.31,...,Black,Polyester,3.63,,Summer 2023,Male,2022-03-19,NaT,Standard,Active
298,299,Pants 299,Apparel,Pants,OutdoorBrand,High-quality pants for outdoor activities.,SKU00299,249127456579,81.54,127.31,...,Black,Cotton,3.13,,Spring 2023,Female,2022-08-27,NaT,Standard,Active


In [11]:
bom_df

Unnamed: 0,product_id,component_id,component_name,quantity_required
0,1,1000,Fabric,2
1,1,1001,Thread,2
2,1,1002,Buttons,3
3,1,1003,Zipper,1
4,2,1004,Material,1
...,...,...,...,...
1053,299,2053,Zipper,1
1054,300,2054,Fabric,1
1055,300,2055,Thread,2
1056,300,2056,Buttons,1


## 5. Warehouses

In [12]:
# Number of warehouses
num_warehouses = 5

# Generate warehouse IDs
warehouse_ids = range(1, num_warehouses + 1)

# Sample data
warehouse_names = [f"Warehouse_{i}" for i in warehouse_ids]
locations = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
capacities = np.random.randint(10000, 50000, size=num_warehouses)

# Create DataFrame
warehouses_df = pd.DataFrame({
    'warehouse_id': warehouse_ids,
    'warehouse_name': warehouse_names,
    'location': locations,
    'capacity': capacities,
    'manager_name': [f"Manager_{random.randint(1,5)}" for _ in warehouse_ids],
    'contact_number': [f"+1-555-{random.randint(1000,9999)}" for _ in warehouse_ids],
    'opening_date': pd.to_datetime('2020-01-01') + pd.to_timedelta(np.random.randint(0, 365, size=num_warehouses), unit='d'),
    'status': 'Active'
})

# Reset index
warehouses_df.reset_index(drop=True, inplace=True)




In [13]:
# Display the warehouses DataFrame
print(f"Warehouses DataFrame shape: {warehouses_df.shape}")
warehouses_df

Warehouses DataFrame shape: (5, 8)


Unnamed: 0,warehouse_id,warehouse_name,location,capacity,manager_name,contact_number,opening_date,status
0,1,Warehouse_1,New York,47306,Manager_2,+1-555-9856,2020-05-18,Active
1,2,Warehouse_2,Los Angeles,40881,Manager_2,+1-555-7236,2020-01-10,Active
2,3,Warehouse_3,Chicago,41486,Manager_1,+1-555-1675,2020-08-01,Active
3,4,Warehouse_4,Houston,22541,Manager_1,+1-555-9344,2020-09-29,Active
4,5,Warehouse_5,Phoenix,18712,Manager_2,+1-555-8743,2020-11-19,Active


## 6. Stores

In [14]:
# Number of stores
num_stores = 15

# Generate store IDs
store_ids = range(1, num_stores + 1)

# Sample data
store_names = [f"Store_{i}" for i in store_ids]
regions = ['East', 'West', 'North', 'South', 'Central']
store_types = ['Retail', 'Outlet']

# Create DataFrame
stores_df = pd.DataFrame({
    'store_id': store_ids,
    'store_name': store_names,
    'region': random.choices(regions, k=num_stores),
    'store_size': np.random.randint(1000, 5000, size=num_stores),
    'opening_date': pd.to_datetime('2021-01-01') + pd.to_timedelta(np.random.randint(0, 365, size=num_stores), unit='d'),
    'manager_name': [f"Manager_{random.randint(1,5)}" for _ in store_ids],
    'store_type': random.choices(store_types, k=num_stores),
    'contact_number': [f"+1-555-{random.randint(1000,9999)}" for _ in store_ids],
    'address': [f"{random.randint(100,999)} Store St" for _ in store_ids],
    'status': 'Active'
})

# Reset index
stores_df.reset_index(drop=True, inplace=True)




In [15]:
# Display the stores DataFrame
print(f"Stores DataFrame shape: {stores_df.shape}")
stores_df

Stores DataFrame shape: (15, 10)


Unnamed: 0,store_id,store_name,region,store_size,opening_date,manager_name,store_type,contact_number,address,status
0,1,Store_1,Central,2583,2021-03-29,Manager_1,Retail,+1-555-8773,767 Store St,Active
1,2,Store_2,South,3483,2021-07-13,Manager_4,Retail,+1-555-3953,576 Store St,Active
2,3,Store_3,North,2134,2021-04-14,Manager_1,Retail,+1-555-7157,717 Store St,Active
3,4,Store_4,East,1152,2021-09-28,Manager_4,Retail,+1-555-1370,937 Store St,Active
4,5,Store_5,East,2658,2021-04-06,Manager_3,Retail,+1-555-4411,407 Store St,Active
5,6,Store_6,Central,3964,2021-07-17,Manager_3,Outlet,+1-555-3992,837 Store St,Active
6,7,Store_7,West,3916,2021-01-01,Manager_2,Retail,+1-555-7892,670 Store St,Active
7,8,Store_8,Central,4173,2021-06-16,Manager_4,Retail,+1-555-6785,124 Store St,Active
8,9,Store_9,South,3800,2021-05-06,Manager_1,Outlet,+1-555-6174,793 Store St,Active
9,10,Store_10,South,2856,2021-11-09,Manager_2,Outlet,+1-555-2546,237 Store St,Active


## 7. Manufacturing

In [16]:
# Number of manufacturing facilities
num_manufacturing = 3

# Generate manufacturing IDs
manufacturing_ids = range(1, num_manufacturing + 1)

# Sample data
manufacturing_names = [f"Manufacturing_{i}" for i in manufacturing_ids]
locations = ['China', 'Vietnam', 'Bangladesh']
capacities = np.random.randint(5000, 20000, size=num_manufacturing)

# Create DataFrame
manufacturing_df = pd.DataFrame({
    'manufacturing_id': manufacturing_ids,
    'manufacturing_name': manufacturing_names,
    'factory_id': manufacturing_ids,  # Assuming factory_id is same as manufacturing_id
    'location': locations,
    'capacity_units': capacities,
    'production_lead_time': np.random.randint(5, 20, size=num_manufacturing),
    'on_time_in_full_rate': np.random.uniform(0.8, 1.0, size=num_manufacturing).round(2),
    'operating_hours': 16,
    'shift_count': 2,
    'machine_count': np.random.randint(50, 200, size=num_manufacturing),
    'machine_downtime_rate': np.random.uniform(0.01, 0.05, size=num_manufacturing).round(3),
    'labor_availability': np.random.uniform(0.8, 1.0, size=num_manufacturing).round(2),
    'last_audit_date': pd.to_datetime('2023-01-01') - pd.to_timedelta(np.random.randint(1, 365, size=num_manufacturing), unit='d'),
    'compliance_certificates': ['ISO 9001']*num_manufacturing,
    'status': 'Active'
})

# Reset index
manufacturing_df.reset_index(drop=True, inplace=True)




In [17]:
# Display the manufacturing DataFrame
print(f"Manufacturing DataFrame shape: {manufacturing_df.shape}")
manufacturing_df

Manufacturing DataFrame shape: (3, 15)


Unnamed: 0,manufacturing_id,manufacturing_name,factory_id,location,capacity_units,production_lead_time,on_time_in_full_rate,operating_hours,shift_count,machine_count,machine_downtime_rate,labor_availability,last_audit_date,compliance_certificates,status
0,1,Manufacturing_1,1,China,17869,8,0.82,16,2,158,0.025,0.95,2022-10-15,ISO 9001,Active
1,2,Manufacturing_2,2,Vietnam,9357,9,0.95,16,2,114,0.017,0.88,2022-02-21,ISO 9001,Active
2,3,Manufacturing_3,3,Bangladesh,6363,7,0.93,16,2,130,0.041,0.81,2022-03-18,ISO 9001,Active


## 8. Inventory Records

In [18]:
# Create location list
warehouse_locations = pd.DataFrame({
    'location_id': warehouses_df['warehouse_id'],
    'location_type': 'Warehouse'
})

store_locations = pd.DataFrame({
    'location_id': stores_df['store_id'],
    'location_type': 'Store'
})

locations_df = pd.concat([warehouse_locations, store_locations], ignore_index=True)

# Initialize inventory records
inventory_records_list = []

for _, product in products_df.iterrows():
    for _, location in locations_df.iterrows():
        # Base initial quantity on seasonality
        product_season = product['season'].split(' ')[0]
        if product_season == 'Summer' and location['location_type'] == 'Store':
            quantity_on_hand = np.random.randint(50, 200)
        else:
            quantity_on_hand = np.random.randint(10, 50)
        quantity_reserved = 0  # Will update based on sales orders
        quantity_available = quantity_on_hand - quantity_reserved
        inventory_turnover_rate = 0  # Will calculate later based on sales

        inventory_record = {
            'inventory_id': len(inventory_records_list) + 1,
            'product_id': product['product_id'],
            'location_type': location['location_type'],
            'location_id': location['location_id'],
            'quantity_on_hand': quantity_on_hand,
            'quantity_reserved': quantity_reserved,
            'quantity_available': quantity_available,
            'reorder_point': np.random.randint(20, 50),
            'safety_stock_level': np.random.randint(10, 20),
            'last_restock_date': pd.to_datetime('2023-01-01') - pd.to_timedelta(np.random.randint(1, 90), unit='d'),
            'next_restock_date': pd.NaT,
            'inventory_turnover_rate': inventory_turnover_rate,
            'storage_conditions': '',
            'expiration_date': pd.NaT
        }

        inventory_records_list.append(inventory_record)

# Create DataFrame
inventory_records_df = pd.DataFrame(inventory_records_list)

# Reset index
inventory_records_df.reset_index(drop=True, inplace=True)



In [19]:
# Display the inventory records DataFrame
print(f"Inventory Records DataFrame shape: {inventory_records_df.shape}")
inventory_records_df

Inventory Records DataFrame shape: (6000, 14)


Unnamed: 0,inventory_id,product_id,location_type,location_id,quantity_on_hand,quantity_reserved,quantity_available,reorder_point,safety_stock_level,last_restock_date,next_restock_date,inventory_turnover_rate,storage_conditions,expiration_date
0,1,1,Warehouse,1,23,0,23,21,16,2022-12-28,NaT,0,,NaT
1,2,1,Warehouse,2,33,0,33,46,12,2022-12-26,NaT,0,,NaT
2,3,1,Warehouse,3,42,0,42,36,17,2022-10-10,NaT,0,,NaT
3,4,1,Warehouse,4,10,0,10,24,15,2022-12-19,NaT,0,,NaT
4,5,1,Warehouse,5,44,0,44,38,19,2022-10-04,NaT,0,,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5995,5996,300,Store,11,139,0,139,42,17,2022-12-03,NaT,0,,NaT
5996,5997,300,Store,12,108,0,108,44,15,2022-10-22,NaT,0,,NaT
5997,5998,300,Store,13,72,0,72,25,17,2022-12-18,NaT,0,,NaT
5998,5999,300,Store,14,78,0,78,35,16,2022-12-22,NaT,0,,NaT


## 9. Production Schedule

In [20]:
# Generate production schedules based on capacity and forecasts
production_schedule_list = []
production_schedule_id_counter = 1

for _, manufacturing in manufacturing_df.iterrows():
    # Available capacity
    capacity_units = manufacturing['capacity_units']
    # Generate production orders to fill capacity
    total_units_scheduled = 0
    while total_units_scheduled < capacity_units * 10:  # Multiply by 10 to get enough orders
        product = products_df.sample(1).iloc[0]
        planned_quantity = min(np.random.randint(500, 2000), (capacity_units * 10) - total_units_scheduled)
        planned_start_date = pd.to_datetime('2022-01-01') + pd.to_timedelta(np.random.randint(0, 730), unit='d')
        production_schedule = {
            'production_schedule_id': production_schedule_id_counter,
            'manufacturing_id': manufacturing['manufacturing_id'],
            'product_id': product['product_id'],
            'planned_start_date': planned_start_date,
            'planned_end_date': planned_start_date + pd.to_timedelta(np.random.randint(1, 10), unit='d'),
            'actual_start_date': pd.NaT,
            'actual_end_date': pd.NaT,
            'planned_quantity': planned_quantity,
            'actual_quantity': 0,
            'production_status': 'Scheduled',
            'shift': random.choice(['Day', 'Night']),
            'operator_id': random.randint(1, 50),
            'machine_id': random.randint(1, manufacturing['machine_count']),
            'notes': ''
        }
        production_schedule_list.append(production_schedule)
        total_units_scheduled += planned_quantity
        production_schedule_id_counter += 1
        if production_schedule_id_counter > 10000:
            break
    if production_schedule_id_counter > 10000:
        break

# Create DataFrame
production_schedule_df = pd.DataFrame(production_schedule_list)

# Reset index
production_schedule_df.reset_index(drop=True, inplace=True)



In [21]:
# Display the production schedule DataFrame
print(f"Production Schedule DataFrame shape: {production_schedule_df.shape}")
production_schedule_df

Production Schedule DataFrame shape: (262, 14)


Unnamed: 0,production_schedule_id,manufacturing_id,product_id,planned_start_date,planned_end_date,actual_start_date,actual_end_date,planned_quantity,actual_quantity,production_status,shift,operator_id,machine_id,notes
0,1,1,50,2022-05-26,2022-06-02,NaT,NaT,1255,0,Scheduled,Night,8,98,
1,2,1,112,2022-05-03,2022-05-11,NaT,NaT,1873,0,Scheduled,Day,9,28,
2,3,1,196,2022-08-09,2022-08-16,NaT,NaT,1746,0,Scheduled,Day,37,29,
3,4,1,288,2023-08-05,2023-08-13,NaT,NaT,609,0,Scheduled,Night,35,149,
4,5,1,243,2023-09-25,2023-09-28,NaT,NaT,654,0,Scheduled,Day,11,111,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,258,3,111,2023-07-02,2023-07-06,NaT,NaT,1318,0,Scheduled,Day,4,129,
258,259,3,274,2023-09-14,2023-09-16,NaT,NaT,1582,0,Scheduled,Night,8,93,
259,260,3,77,2022-03-18,2022-03-22,NaT,NaT,815,0,Scheduled,Night,17,124,
260,261,3,203,2023-05-28,2023-05-29,NaT,NaT,1562,0,Scheduled,Day,31,16,


## 10. Sales Orders and Order Line Items

In [22]:
# Number of sales orders
num_sales_orders = 100000

sales_orders_list = []
order_line_items_list = []
backorders_list = []  # New list to store backorders
order_id_counter = 1
order_line_id_counter = 1

for i in range(num_sales_orders):
    customer = customers_df.sample(1).iloc[0]
    customer_id = customer['customer_id']
    customer_type = customer['customer_type']
    
    # Adjust order date based on customer behavior
    if customer_type == 'Retail':
        order_date = pd.to_datetime('2022-06-01') + pd.to_timedelta(np.random.randint(0, 180), unit='d')
    else:
        order_date = pd.to_datetime('2022-01-01') + pd.to_timedelta(np.random.randint(0, 730), unit='d')
    
    num_items = np.random.randint(1, 3) if customer_type == 'Retail' else np.random.randint(3, 10)
    total_order_value = 0
    sales_order = {
        'order_id': order_id_counter,
        'customer_id': customer_id,
        'order_date': order_date,
        'order_status': random.choice(['Pending', 'Confirmed', 'Shipped', 'Delivered']),
        'total_order_value': 0,  # Will calculate
        'currency': 'USD',
        'payment_status': 'Paid' if customer['payment_terms'] == 'Prepaid' else random.choice(['Unpaid', 'Partially Paid']),
        'shipping_method': random.choice(['Standard', 'Express']),
        'shipping_cost': round(random.uniform(5, 20), 2),
        'delivery_date': order_date + pd.to_timedelta(np.random.randint(1, 10), unit='d'),
        'discount_code': '',
        'tax_amount': 0,  # Will calculate
        'notes': ''
    }
    for _ in range(num_items):
        product = products_df.sample(1).iloc[0]
        # Increase demand for seasonal products
        current_month = order_date.month
        product_season = product['season'].split(' ')[0]
        season_months = {'Spring': [3, 4, 5], 'Summer': [6, 7, 8], 'Fall': [9, 10, 11], 'Winter': [12, 1, 2]}
        if current_month in season_months.get(product_season, []):
            quantity_ordered = np.random.randint(5, 10)
        else:
            quantity_ordered = np.random.randint(1, 5)
        unit_price = product['price']
        
        # Update inventory records with backorder logic
        inventory_mask = (
            (inventory_records_df['product_id'] == product['product_id']) &
            (inventory_records_df['location_type'] == 'Warehouse')  # Assuming sales fulfill from warehouses
        )
        available_stock = inventory_records_df.loc[inventory_mask, 'quantity_on_hand'].sum()
        
        if available_stock >= quantity_ordered:
            # Sufficient stock available
            inventory_records_df.loc[inventory_mask, 'quantity_on_hand'] -= quantity_ordered
            inventory_records_df.loc[inventory_mask, 'quantity_available'] -= quantity_ordered
            # Proceed to create order line item
            line_total = quantity_ordered * unit_price
            total_order_value += line_total
            tax_amount = line_total * 0.1  # Assuming 10% tax
            sales_order['tax_amount'] += tax_amount
            order_line_item = {
                'order_line_id': order_line_id_counter,
                'order_id': order_id_counter,
                'product_id': product['product_id'],
                'quantity_ordered': quantity_ordered,
                'unit_price': unit_price,
                'line_total': line_total
            }
            order_line_items_list.append(order_line_item)
            order_line_id_counter += 1
        elif available_stock > 0:
            # Partial fulfillment
            quantity_fulfilled = available_stock
            backorder_quantity = quantity_ordered - quantity_fulfilled
            # Update inventory to zero
            inventory_records_df.loc[inventory_mask, 'quantity_on_hand'] = 0
            inventory_records_df.loc[inventory_mask, 'quantity_available'] = 0
            # Create order line item with fulfilled quantity
            line_total = quantity_fulfilled * unit_price
            total_order_value += line_total
            tax_amount = line_total * 0.1
            sales_order['tax_amount'] += tax_amount
            order_line_item = {
                'order_line_id': order_line_id_counter,
                'order_id': order_id_counter,
                'product_id': product['product_id'],
                'quantity_ordered': quantity_fulfilled,
                'unit_price': unit_price,
                'line_total': line_total
            }
            order_line_items_list.append(order_line_item)
            order_line_id_counter += 1
            # Log backorder
            backorder = {
                'backorder_id': len(backorders_list) + 1,
                'order_id': order_id_counter,
                'product_id': product['product_id'],
                'backorder_quantity': backorder_quantity,
                'order_date': order_date,
                'customer_id': customer_id,
                'expected_delivery_date': order_date + pd.to_timedelta(np.random.randint(5, 15), unit='d'),
                'status': 'Backordered'
            }
            backorders_list.append(backorder)
        else:
            # No stock available
            backorder_quantity = quantity_ordered
            # Log backorder
            backorder = {
                'backorder_id': len(backorders_list) + 1,
                'order_id': order_id_counter,
                'product_id': product['product_id'],
                'backorder_quantity': backorder_quantity,
                'order_date': order_date,
                'customer_id': customer_id,
                'expected_delivery_date': order_date + pd.to_timedelta(np.random.randint(5, 15), unit='d'),
                'status': 'Backordered'
            }
            backorders_list.append(backorder)
            # Optionally, you can create an order line item with zero quantity
            order_line_item = {
                'order_line_id': order_line_id_counter,
                'order_id': order_id_counter,
                'product_id': product['product_id'],
                'quantity_ordered': 0,
                'unit_price': unit_price,
                'line_total': 0
            }
            order_line_items_list.append(order_line_item)
            order_line_id_counter += 1
    sales_order['total_order_value'] = total_order_value + sales_order['tax_amount'] + sales_order['shipping_cost']
    sales_orders_list.append(sales_order)
    order_id_counter += 1
    if order_id_counter % 10000 == 0:
        print(f"Generated {order_id_counter} sales orders")
    
# Create DataFrames
sales_orders_df = pd.DataFrame(sales_orders_list)
order_line_items_df = pd.DataFrame(order_line_items_list)
backorders_df = pd.DataFrame(backorders_list)

# Reset index
sales_orders_df.reset_index(drop=True, inplace=True)
order_line_items_df.reset_index(drop=True, inplace=True)
backorders_df.reset_index(drop=True, inplace=True)


Generated 10000 sales orders
Generated 20000 sales orders
Generated 30000 sales orders
Generated 40000 sales orders
Generated 50000 sales orders
Generated 60000 sales orders
Generated 70000 sales orders
Generated 80000 sales orders
Generated 90000 sales orders
Generated 100000 sales orders


In [25]:
# Display the sales orders DataFrame
print(f"Sales Orders DataFrame shape: {sales_orders_df.shape}")
print(f"Order Line Items DataFrame shape: {order_line_items_df.shape}")
print(f"Backorders DataFrame shape: {backorders_df.shape}")

Sales Orders DataFrame shape: (100000, 13)
Order Line Items DataFrame shape: (334791, 6)
Backorders DataFrame shape: (332179, 8)


In [26]:
sales_orders_df

Unnamed: 0,order_id,customer_id,order_date,order_status,total_order_value,currency,payment_status,shipping_method,shipping_cost,delivery_date,discount_code,tax_amount,notes
0,1,1196,2022-08-10,Shipped,10636.693,USD,Paid,Express,19.68,2022-08-11,,965.183,
1,2,1398,2023-02-21,Pending,3308.031,USD,Unpaid,Express,9.45,2023-02-24,,299.871,
2,3,1266,2022-08-20,Pending,9143.236,USD,Partially Paid,Express,15.59,2022-08-28,,829.786,
3,4,953,2022-10-05,Confirmed,4479.645,USD,Partially Paid,Standard,7.43,2022-10-07,,406.565,
4,5,1324,2023-05-16,Confirmed,12316.680,USD,Paid,Express,8.89,2023-05-21,,1118.890,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,99996,350,2022-09-01,Pending,5.720,USD,Paid,Standard,5.72,2022-09-08,,0.000,
99996,99997,571,2023-01-21,Confirmed,15.140,USD,Unpaid,Express,15.14,2023-01-27,,0.000,
99997,99998,769,2023-06-25,Pending,6.180,USD,Unpaid,Express,6.18,2023-06-28,,0.000,
99998,99999,331,2022-11-22,Delivered,18.540,USD,Paid,Express,18.54,2022-11-23,,0.000,


In [27]:
order_line_items_df

Unnamed: 0,order_line_id,order_id,product_id,quantity_ordered,unit_price,line_total
0,1,1,259,1,431.65,431.65
1,2,1,123,1,498.41,498.41
2,3,1,69,7,146.00,1022.00
3,4,1,93,2,175.48,350.96
4,5,1,32,7,344.19,2409.33
...,...,...,...,...,...,...
334786,334787,99998,253,0,57.78,0.00
334787,334788,99998,92,0,335.47,0.00
334788,334789,99999,222,0,54.65,0.00
334789,334790,99999,269,0,228.22,0.00


In [28]:
backorders_df

Unnamed: 0,backorder_id,order_id,product_id,backorder_quantity,order_date,customer_id,expected_delivery_date,status
0,1,87,167,7,2022-07-04,1006,2022-07-17,Backordered
1,2,142,167,2,2022-04-27,96,2022-05-03,Backordered
2,3,175,167,8,2022-08-13,62,2022-08-22,Backordered
3,4,198,167,7,2023-08-16,1466,2023-08-27,Backordered
4,5,223,147,6,2022-06-26,362,2022-07-08,Backordered
...,...,...,...,...,...,...,...,...
332174,332175,99998,253,1,2023-06-25,769,2023-07-06,Backordered
332175,332176,99998,92,4,2023-06-25,769,2023-07-06,Backordered
332176,332177,99999,222,4,2022-11-22,331,2022-12-06,Backordered
332177,332178,99999,269,1,2022-11-22,331,2022-12-03,Backordered


## 11. Purchase Orders and Purchase Order Line Items

In [29]:
# Number of purchase orders
num_purchase_orders = 10000

purchase_orders_list = []
po_line_items_list = []
purchase_order_id_counter = 1
po_line_id_counter = 1

for i in range(num_purchase_orders):
    supplier = suppliers_df.sample(1).iloc[0]
    supplier_id = supplier['supplier_id']
    order_date = pd.to_datetime('2022-01-01') + pd.to_timedelta(np.random.randint(0, 730), unit='d')
    num_items = np.random.randint(1, 4)
    total_order_value = 0
    purchase_order = {
        'purchase_order_id': purchase_order_id_counter,
        'supplier_id': supplier_id,
        'order_date': order_date,
        'expected_delivery_date': order_date + pd.to_timedelta(supplier['lead_time_days'], unit='d'),
        'total_order_value': 0,  # Will calculate
        'currency': 'USD',
        'payment_terms': supplier['payment_terms'],
        'order_status': random.choice(['Pending', 'Confirmed', 'Received']),
        'notes': ''
    }
    for _ in range(num_items):
        component = bom_df.sample(1).iloc[0]
        component_id = component['component_id']
        quantity_ordered = np.random.randint(100, 1000)
        unit_price = round(random.uniform(5, 20), 2)
        line_total = quantity_ordered * unit_price
        total_order_value += line_total
        po_line_item = {
            'po_line_id': po_line_id_counter,
            'purchase_order_id': purchase_order_id_counter,
            'component_id': component_id,
            'quantity_ordered': quantity_ordered,
            'unit_price': unit_price,
            'line_total': line_total
        }
        po_line_items_list.append(po_line_item)
        po_line_id_counter += 1
    purchase_order['total_order_value'] = total_order_value
    purchase_orders_list.append(purchase_order)
    purchase_order_id_counter += 1
    if purchase_order_id_counter % 1000 == 0:
        print(f"Generated {purchase_order_id_counter} purchase orders")
    # Optional: Break early for testing
    # if purchase_order_id_counter > 1000:
    #     break

# Create DataFrames
purchase_orders_df = pd.DataFrame(purchase_orders_list)
po_line_items_df = pd.DataFrame(po_line_items_list)

# Reset index
purchase_orders_df.reset_index(drop=True, inplace=True)
po_line_items_df.reset_index(drop=True, inplace=True)




Generated 1000 purchase orders
Generated 2000 purchase orders
Generated 3000 purchase orders
Generated 4000 purchase orders
Generated 5000 purchase orders
Generated 6000 purchase orders
Generated 7000 purchase orders
Generated 8000 purchase orders
Generated 9000 purchase orders
Generated 10000 purchase orders


In [30]:
# Display the purchase orders DataFrame
print(f"Purchase Orders DataFrame shape: {purchase_orders_df.shape}")
print(f"Purchase Order Line Items DataFrame shape: {po_line_items_df.shape}")


Purchase Orders DataFrame shape: (10000, 9)
Purchase Order Line Items DataFrame shape: (20010, 6)


In [31]:
purchase_orders_df

Unnamed: 0,purchase_order_id,supplier_id,order_date,expected_delivery_date,total_order_value,currency,payment_terms,order_status,notes
0,1,16,2023-09-23,2023-10-04,33775.84,USD,Net 30,Confirmed,
1,2,28,2023-01-30,2023-02-09,24028.01,USD,Net 30,Confirmed,
2,3,30,2023-04-09,2023-04-23,14701.60,USD,Net 30,Pending,
3,4,28,2022-06-04,2022-06-14,6110.34,USD,Net 30,Received,
4,5,7,2023-07-02,2023-07-10,23771.85,USD,Net 30,Pending,
...,...,...,...,...,...,...,...,...,...
9995,9996,10,2022-07-28,2022-08-19,11172.08,USD,Net 30,Received,
9996,9997,25,2023-02-06,2023-02-18,31322.27,USD,Net 30,Pending,
9997,9998,25,2022-03-24,2022-04-05,3919.86,USD,Net 30,Received,
9998,9999,29,2022-06-10,2022-07-06,9400.54,USD,Net 30,Received,


In [32]:
po_line_items_df

Unnamed: 0,po_line_id,purchase_order_id,component_id,quantity_ordered,unit_price,line_total
0,1,1,1208,976,18.70,18251.20
1,2,1,1056,513,13.60,6976.80
2,3,1,1684,448,19.08,8547.84
3,4,2,1810,321,18.98,6092.58
4,5,2,1861,153,13.59,2079.27
...,...,...,...,...,...,...
20005,20006,9999,2006,306,19.97,6110.82
20006,20007,9999,1040,166,14.68,2436.88
20007,20008,9999,1980,103,8.28,852.84
20008,20009,10000,1212,117,13.81,1615.77


## 12. Material Requirements

In [33]:
material_requirements_list = []
material_requirement_id_counter = 1

for _, schedule in production_schedule_df.iterrows():
    product_id = schedule['product_id']
    planned_quantity = schedule['planned_quantity']
    # Get BOM for the product
    bom_entries = bom_df[bom_df['product_id'] == product_id]
    for _, bom_entry in bom_entries.iterrows():
        component_id = bom_entry['component_id']
        quantity_required = bom_entry['quantity_required'] * planned_quantity
        available_quantity = np.random.randint(0, quantity_required // 2)
        shortage_quantity = max(0, quantity_required - available_quantity)
        supplier = suppliers_df.sample(1).iloc[0]
        supplier_id = supplier['supplier_id']
        lead_time = supplier['lead_time_days']
        if supplier['is_disrupted']:
            lead_time += supplier['lead_time_variability'] * 2  # Simulate delay
        expected_delivery_date = schedule['planned_start_date'] - pd.to_timedelta(lead_time, unit='d')
        material_requirement = {
            'material_requirement_id': material_requirement_id_counter,
            'production_schedule_id': schedule['production_schedule_id'],
            'component_id': component_id,
            'required_quantity': quantity_required,
            'available_quantity': available_quantity,
            'shortage_quantity': shortage_quantity,
            'order_status': 'Pending' if shortage_quantity > 0 else 'Available',
            'supplier_id': supplier_id,
            'expected_delivery_date': expected_delivery_date,
            'unit_cost': round(random.uniform(5, 20), 2),
            'total_cost': quantity_required * round(random.uniform(5, 20), 2),
            'lead_time_days': lead_time,
            'last_updated': pd.Timestamp('today')
        }
        material_requirements_list.append(material_requirement)
        material_requirement_id_counter += 1
        if material_requirement_id_counter > 50000:
            break
    if material_requirement_id_counter > 50000:
        break

# Create DataFrame
material_requirements_df = pd.DataFrame(material_requirements_list)

# Reset index
material_requirements_df.reset_index(drop=True, inplace=True)




In [34]:
# Display the material requirements DataFrame
print(f"Material Requirements DataFrame shape: {material_requirements_df.shape}")
material_requirements_df

Material Requirements DataFrame shape: (938, 13)


Unnamed: 0,material_requirement_id,production_schedule_id,component_id,required_quantity,available_quantity,shortage_quantity,order_status,supplier_id,expected_delivery_date,unit_cost,total_cost,lead_time_days,last_updated
0,1,1,1176,5020,2076,2944,Pending,7,2022-05-10,14.08,60993.00,16,2024-11-04 10:26:19.352631
1,2,1,1177,5020,83,4937,Pending,17,2022-05-10,8.47,97940.20,16,2024-11-04 10:26:19.353804
2,3,1,1178,2510,1068,1442,Pending,11,2022-05-05,15.30,37348.80,21,2024-11-04 10:26:19.354634
3,4,1,1179,1255,400,855,Pending,1,2022-05-03,11.34,8948.15,23,2024-11-04 10:26:19.355285
4,5,2,1388,9365,2076,7289,Pending,14,2022-04-09,11.31,53193.20,24,2024-11-04 10:26:19.356515
...,...,...,...,...,...,...,...,...,...,...,...,...,...
933,934,261,1715,4686,1761,2925,Pending,18,2023-05-16,16.27,68931.06,12,2024-11-04 10:26:19.681770
934,935,262,1641,3148,1485,1663,Pending,15,2022-04-24,18.71,44544.20,19,2024-11-04 10:26:19.682234
935,936,262,1642,2361,1019,1342,Pending,14,2022-04-19,10.62,44882.61,24,2024-11-04 10:26:19.682483
936,937,262,1643,3148,536,2612,Pending,19,2022-04-24,14.95,25215.48,19,2024-11-04 10:26:19.682737


## 13.Forecast

In [35]:
# Import 'product' from itertools with an alias
from itertools import product as itertools_product

# Prepare combinations using 'itertools_product'
product_location_combinations = list(itertools_product(products_df['product_id'], stores_df['store_id']))

forecast_list = []
forecast_id_counter = 1

for (product_id, store_id) in product_location_combinations:
    # Rename variables to avoid shadowing
    product_row = products_df[products_df['product_id'] == product_id].iloc[0]
    store_row = stores_df[stores_df['store_id'] == store_id].iloc[0]
    num_periods = 12  # Monthly forecasts for a year
    for month in range(1, num_periods + 1):
        forecast_date = pd.to_datetime('2023-01-01') + pd.DateOffset(months=month-1)
        current_month = forecast_date.month
        product_season = product_row['season'].split(' ')[0]
        season_months = {'Spring': [3, 4, 5], 'Summer': [6, 7, 8], 'Fall': [9, 10, 11], 'Winter': [12, 1, 2]}
        # Base forecast quantity on seasonality
        if current_month in season_months.get(product_season, []):
            forecast_quantity = np.random.randint(100, 500)
        else:
            forecast_quantity = np.random.randint(10, 100)
        forecast = {
            'forecast_id': forecast_id_counter,
            'product_id': product_id,
            'location_id': store_id,
            'forecast_date': forecast_date,
            'forecast_period': 'Monthly',
            'forecast_quantity': forecast_quantity,
            'forecast_model': random.choice(['Seasonal ARIMA', 'Holt-Winters']),
            'forecast_accuracy': round(np.random.uniform(0.7, 0.95), 2),
            'confidence_interval': round(np.random.uniform(0.8, 0.99), 2),
            'seasonal_adjustment': round(np.random.uniform(0.9, 1.1), 2),
            'last_actuals_date': forecast_date - pd.DateOffset(months=1)
        }
        forecast_list.append(forecast)
        forecast_id_counter += 1

# Create DataFrame
forecast_df = pd.DataFrame(forecast_list)

# Reset index
forecast_df.reset_index(drop=True, inplace=True)

# Display the first few rows
forecast_df.head()



Unnamed: 0,forecast_id,product_id,location_id,forecast_date,forecast_period,forecast_quantity,forecast_model,forecast_accuracy,confidence_interval,seasonal_adjustment,last_actuals_date
0,1,1,1,2023-01-01,Monthly,79,Holt-Winters,0.81,0.97,0.97,2022-12-01
1,2,1,1,2023-02-01,Monthly,79,Seasonal ARIMA,0.87,0.83,1.06,2023-01-01
2,3,1,1,2023-03-01,Monthly,101,Holt-Winters,0.76,0.88,1.05,2023-02-01
3,4,1,1,2023-04-01,Monthly,290,Seasonal ARIMA,0.94,0.89,1.09,2023-03-01
4,5,1,1,2023-05-01,Monthly,328,Holt-Winters,0.92,0.85,1.05,2023-04-01


In [37]:
# Display the forecast DataFrame
print(f"Forecast DataFrame shape: {forecast_df.shape}")
forecast_df

Forecast DataFrame shape: (54000, 11)


Unnamed: 0,forecast_id,product_id,location_id,forecast_date,forecast_period,forecast_quantity,forecast_model,forecast_accuracy,confidence_interval,seasonal_adjustment,last_actuals_date
0,1,1,1,2023-01-01,Monthly,79,Holt-Winters,0.81,0.97,0.97,2022-12-01
1,2,1,1,2023-02-01,Monthly,79,Seasonal ARIMA,0.87,0.83,1.06,2023-01-01
2,3,1,1,2023-03-01,Monthly,101,Holt-Winters,0.76,0.88,1.05,2023-02-01
3,4,1,1,2023-04-01,Monthly,290,Seasonal ARIMA,0.94,0.89,1.09,2023-03-01
4,5,1,1,2023-05-01,Monthly,328,Holt-Winters,0.92,0.85,1.05,2023-04-01
...,...,...,...,...,...,...,...,...,...,...,...
53995,53996,300,15,2023-08-01,Monthly,468,Seasonal ARIMA,0.87,0.89,1.08,2023-07-01
53996,53997,300,15,2023-09-01,Monthly,23,Seasonal ARIMA,0.88,0.81,1.03,2023-08-01
53997,53998,300,15,2023-10-01,Monthly,45,Seasonal ARIMA,0.80,0.97,1.02,2023-09-01
53998,53999,300,15,2023-11-01,Monthly,22,Seasonal ARIMA,0.77,0.80,1.07,2023-10-01


In [38]:
# List of DataFrames and their filenames
dataframes = [
    (calendar_df, 'calendar.csv'),
    (suppliers_df, 'suppliers.csv'),
    (customers_df, 'customers.csv'),
    (products_df, 'products.csv'),
    (bom_df, 'bom.csv'),
    (warehouses_df, 'warehouses.csv'),
    (stores_df, 'stores.csv'),
    (manufacturing_df, 'manufacturing.csv'),
    (inventory_records_df, 'inventory_records.csv'),
    (production_schedule_df, 'production_schedule.csv'),
    (sales_orders_df, 'sales_orders.csv'),
    (order_line_items_df, 'order_line_items.csv'),
    (backorders_df, 'backorders.csv'),
    (purchase_orders_df, 'purchase_orders.csv'),
    (po_line_items_df, 'purchase_order_line_items.csv'),
    (material_requirements_df, 'material_requirements.csv'),
    (forecast_df, 'forecast.csv')
]

# Save each DataFrame to a CSV file
for df, filename in dataframes:
    df.to_csv(filename, index=False)
    print(f"Saved {filename}")


Saved calendar.csv
Saved suppliers.csv
Saved customers.csv
Saved products.csv
Saved bom.csv
Saved warehouses.csv
Saved stores.csv
Saved manufacturing.csv
Saved inventory_records.csv
Saved production_schedule.csv
Saved sales_orders.csv
Saved order_line_items.csv
Saved backorders.csv
Saved purchase_orders.csv
Saved purchase_order_line_items.csv
Saved material_requirements.csv
Saved forecast.csv


In [39]:
# Iterate over each dataframe and print its columns
for df, filename in dataframes:
    print(f"---\nColumns in {filename}:\n")
    for column in df.columns:
        print(f"- {column}")
    print()


---
Columns in calendar.csv:

- date
- day_of_week
- week_number
- month
- quarter
- year
- is_weekend
- is_holiday
- is_working_day
- is_maintenance_day
- notes

---
Columns in suppliers.csv:

- supplier_id
- supplier_name
- contact_name
- phone_number
- email
- address
- city
- state_province
- postal_code
- country
- lead_time_days
- lead_time_variability
- on_time_delivery_rate
- minimum_order_quantity
- payment_terms
- last_inspection_date
- sustainability_score
- certifications
- preferred_supplier
- supplier_rating
- is_disrupted

---
Columns in customers.csv:

- customer_id
- customer_name
- customer_type
- contact_name
- phone_number
- email
- billing_address
- shipping_address
- city
- state_province
- postal_code
- country
- payment_terms
- credit_limit
- account_manager
- customer_segment
- preferred_customer
- date_created

---
Columns in products.csv:

- product_id
- product_name
- category
- sub_category
- brand
- description
- sku
- upc
- price
- cost_price
- size
- col