In [None]:
import random
from faker import Faker
import pandas as pd

In [None]:
fake = Faker('en_US')

In [None]:
fake.company_suffix()

In [None]:
# Generate Customers
def generate_customers(num_customers=100):
    customers = []
    for _ in range(num_customers):
        customer = {
            'CustomerID': fake.uuid4(),
            'Name': fake.company(),
            'CompanyType':fake.company_suffix(),
            'ContactDetails': fake.phone_number(),
            'EmailDetails': fake.company_email(),
            'AddressDetails':fake.address(),
            'AdminDetails':fake.administrative_unit(),
            'CreditLimit': random.randint(10000, 100000),
            'CreditRating': random.choice(['very poor', 'fair', 'good', 'very good','exceptional'])
        }
        customers.append(customer)
    return customers

In [None]:
# Generate Data
customers = generate_customers()

In [None]:
df_customers = pd.DataFrame(customers)

In [None]:
df_customers

In [None]:
# Define the product types for each category
product_types = {
    'Office Supplies': [
        "Laser Printer",
        "Ergonomic Chair",
        "Standing Desk",
        "Wireless Keyboard",
        "Office Stationery Set"
    ],
    'Healthcare': [
        "Digital Thermometer",
        "Blood Pressure Monitor",
        "Glucometer",
        "Surgical Masks",
        "Hand Sanitizer"
    ],
    'Consumer Goods': [
        "LED Light Bulbs",
        "Vacuum Cleaner",
        "Air Purifier",
        "Electric Kettle",
        "Microwave Oven"
    ],
    'Automotive': [
        "Car Battery",
        "Engine Oil",
        "Brake Pads",
        "Tires",
        "Car Air Freshener"
    ],
    'Construction Materials': [
        "Cement",
        "Steel Beams",
        "Insulation Material",
        "PVC Pipes",
        "Roofing Sheets"
    ],
    'Food Beverages': [
        "Organic Coffee Beans",
        "Bottled Water",
        "Energy Drink",
        "Protein Bars",
        "Gourmet Chocolate"
    ],
    'Industrial Equipment': [
        "Forklift",
        "Conveyor Belt",
        "Industrial Robot",
        "3D Printer",
        "CNC Machine"
    ],
    'Electronics': [
        "Laptop",
        "Smartphone",
        "Tablet",
        "Smartwatch",
        "Bluetooth Headphones"
    ]
}

In [None]:
products = {'Laser Printer': [100,425],
'Ergonomic Chair': [1000,299],
'Standing Desk': [1000,420],
'Digital Thermometer': [2500,36],
'Blood Pressure Monitor': [2500,44],
'Glucometer': [2500,29],
'Vacuum Cleaner': [100,364],
'Electric Kettle': [500,199],
'Microwave Oven': [500,325],
'Car Battery': [1000,99],
'Engine Oil': [1000,40],
'Brake Pads': [1000,60],
'Tires': [2000,120],
'Car Air Freshener': [2500,9],
'Cement': [500,100],
'Forklift': [50,21000],
'Conveyor Belt': [1000,1000],
'Industrial Robot': [50,30000],
'3D Printer': [100,980],
'CNC Machine': [100,25000],
'Wireless Keyboard': [1000,78],
'LED Light Bulbs': [5000,5],
'Air Purifier': [1000,99],
'Steel Beams': [1000,600],
'Insulation Material': [10000,2],
'PVC Pipes': [5000,3],
'Roofing Sheets': [5000,7],
'Laptop': [1000,799],
'Smartphone': [1000,675],
'Tablet': [1000,499],
'Smartwatch': [1000,160],
'Bluetooth Headphones': [1000,139],
'Organic Coffee Beans': [5000, 19],
'Bottled Water': [10000, 5],
'Energy Drink': [5000, 7],
'Protein Bars': [5000, 5],
'Gourmet Chocolate': [5000, 9],
'Office Stationery Set':[1000, 15],
'Surgical Masks' : [10000,2],
'Hand Sanitizer' : [5000, 14]
}

In [None]:
# Function to assign product type based on product category
def assign_product_type(product_category):
    if product_category in product_types:
        return random.choice(product_types[product_category])
    else:
        return None

# Example usage
product_categories = [
    'Office Supplies', 'Healthcare', 'Consumer Goods', 'Automotive',
    'Construction Materials', 'Food Beverages', 'Industrial Equipment', 'Electronics'
]

In [None]:
def assign_order_quantity_and_price(sample_product_type):
    for product, values in products.items():
        if product == sample_product_type:
            max_order_limit, unit_price = values
            order_quantity = random.randint(max_order_limit-50, max_order_limit)
            order_price = order_quantity * unit_price
    return order_quantity, order_price, unit_price

In [None]:
# Generate Products
def generate_products(num_products=30):
    products = []
    for _ in range(num_products):
        sample_product_categories = random.choice(product_categories)
        sample_product_type = assign_product_type(sample_product_categories)
        print (sample_product_categories, sample_product_type)
        sample_product_quantity, sample_product_price, stock_price = assign_order_quantity_and_price(sample_product_type)
        product = {
            'ProductID': fake.uuid4(),
            # Create a sample list of product categories
            'ProductCategory' : sample_product_categories,
            'ProductType' : sample_product_type,            
            'Price': sample_product_price,
            'Quantity': sample_product_quantity,
            'UnitPrice' : stock_price
        }
        products.append(product)
    return products

In [None]:
products_input = generate_products()

In [None]:
df_products = pd.DataFrame(products_input)

In [None]:
df_products

In [None]:
customers = df_customers['CustomerID'].tolist()
products = df_products[['ProductID', 'UnitPrice']].to_dict('records')

In [None]:
# Define the date range
from datetime import datetime
start_date = datetime(2022, 8, 1)
end_date = datetime(2024, 8, 1)

# Generate Orders
def generate_orders(num_orders=500):
    orders = []
    for _ in range(num_orders):
        order = {
            'OrderID': fake.uuid4(),
            'CustomerID': random.choice(customers),
            'OrderDate': fake.date_between(start_date, end_date),
            'OrderStatus': random.choice(['Pending', 'Shipped', 'Delivered', 'Cancelled'])
        }
        orders.append(order)
    return orders


In [None]:
orders = generate_orders()

df_orders = pd.DataFrame(orders)

In [None]:
df_orders

In [None]:
# Generate Order Items
def generate_order_items(orders, num_order_items=100):
    order_items = []
    for _ in range(num_order_items):
        order = random.choice(orders)
        product = random.choice(products)
        quantity = random.randint(1, 10)  # Generate a random quantity for each order item
        unit_price = product['UnitPrice']
        total_price = quantity * unit_price
        order_item = {
            'Order_Item_ID': fake.uuid4(),
            'OrderID': order['OrderID'],
            'ProductID': product['ProductID'],
            'Quantity': quantity,
            'Unit_Price': unit_price,
            'Total_Price': total_price
        }
        order_items.append(order_item)
    return order_items

In [None]:
order_items = generate_order_items(orders)

df_order_items = pd.DataFrame(order_items)

In [None]:
# Display the first few rows of the Order Items dataframe
df_order_items.head()