# Use O2C_Template Customized Notebook Template

In [1]:
import random
from faker import Faker
import pandas as pd

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
fake = Faker('en_US')

# Customer Data Setup

In [4]:
# Generate Customers
def generate_customers(num_customers=100):
    customers = []
    for _ in range(num_customers):
        customer = {
            'CustomerID': fake.uuid4(),
            'Name': fake.company(),
            'CompanyType':fake.company_suffix(),
            'ContactDetails': fake.phone_number(),
            'EmailDetails': fake.company_email(),
            'AddressDetails':fake.address(),
            'AdminDetails':fake.administrative_unit(),
            'CreditLimit': random.randint(10000, 100000),
            'CreditRating': random.choice(['very poor', 'fair', 'good', 'very good','exceptional'])
        }
        customers.append(customer)
    return customers

In [5]:
# Generate Data
customers = generate_customers()

In [6]:
df_customers = pd.DataFrame(customers)

In [7]:
df_customers.head()

Unnamed: 0,CustomerID,Name,CompanyType,ContactDetails,EmailDetails,AddressDetails,AdminDetails,CreditLimit,CreditRating
0,46c382d3-500e-4230-8273-28c2cda27df2,Mercer and Sons,Ltd,001-709-874-8375x386,ghines@jones.com,"097 Joe Wall\nPort Renee, MI 99038",Missouri,29952,very poor
1,7eaae3d3-01a2-4ae6-9108-5b621498139e,Martinez and Sons,Inc,5853233936,martinezlee@salazar.com,"93121 Diane Ridges Suite 667\nLucasburgh, PA 6...",Maine,62213,fair
2,bc265871-9cd9-48d7-95a6-3dd12f4e9cf8,"Gonzales, Williams and Austin",and Sons,3797898844,twebb@mason.com,"06353 Christine Viaduct\nJohnstonhaven, NM 33661",Hawaii,38855,fair
3,8aba078a-5e0c-4541-aaa2-d32471e67e23,Kirby LLC,and Sons,+1-536-714-3746,iburgess@schultz.com,"29216 Glenn Cape Suite 112\nLake Stephenview, ...",Vermont,36885,fair
4,0e9dda7c-b148-4f3d-b8f2-1989b1448b89,Baldwin PLC,LLC,769.558.7109,scott21@donovan-murphy.com,"72447 Barnes Mission\nWest Heather, NJ 97315",Alabama,59956,fair


In [8]:
df_customers.columns = ['CustomerID', 'Name', 'CompanyType', 'ContactDetails', 'EmailDetails',
       'AddressDetails', 'AdminDetails', 'CreditLimit', 'CreditRating']

In [9]:
df_train_sf=my_session.createDataFrame(
        df_customers.values.tolist(),
        schema=df_customers.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_SCHEMA.BRONZE_CUSTOMER_DATA")

# Product Data setup

In [10]:
df_product = pd.read_csv('product_files.csv')

In [11]:
df_product.head()

Unnamed: 0,ProductCategory,ProductType,Quantity,UnitPrice
0,Office Supplies,Laser Printer,100,425
1,Office Supplies,Ergonomic Chair,1000,299
2,Office Supplies,Standing Desk,1000,420
3,Office Supplies,Wireless Keyboard,1000,78
4,Office Supplies,Office Stationery Set,1000,15


In [12]:
def generate_products(data):
    data['ProductID'] = ''
    # Iterate over rows and columns
    for index, row in data.iterrows():
        data.at[index, 'ProductID'] = fake.uuid4()
    return data

In [13]:
df_product = generate_products(df_product)

In [14]:
df_train_sf=my_session.createDataFrame(
        df_product.values.tolist(),
        schema=df_product.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_SCHEMA.BRONZE_PRODUCT_DATA")

# Order Items Data Set up

In [None]:
# Assuming you already have the customers and products dataframes
customers = df_customers['CustomerID'].tolist()
products = df_products[['ProductID', 'UnitPrice']].to_dict('records')

In [None]:
# Define the date range
start_date = datetime(2022, 8, 1)
end_date = datetime(2024, 8, 1)

In [None]:
# Generate Order Items and Orders
order_items = []
orders = []

for _ in range(100000):  # Generate 100,000 orders
    order_id = fake.uuid4()
    customer_id = random.choice(customers)
    order_date = fake.date_between(start_date, end_date)
    order_status = random.choice(['Pending', 'Shipped', 'Delivered', 'Cancelled'])
    
    # Create order items for this order
    num_items = random.randint(1, 5)  # Each order has a random number of items between 1 and 5
    for _ in range(num_items):
        product = random.choice(products)
        quantity = random.randint(1, 10)  # Generate a random quantity for each order item
        unit_price = product['UnitPrice']
        total_price = quantity * unit_price
        item_status = random.choice(['Fulfilled', 'Partially Fulfilled', 'Cancelled', 'Delayed'])
        
        order_item = {
            'Order_Item_ID': fake.uuid4(),
            'OrderID': order_id,
            'ProductID': product['ProductID'],
            'Quantity': quantity,
            'Unit_Price': unit_price,
            'Total_Price': total_price,
            'ItemStatus': item_status
        }
        order_items.append(order_item)
    
    # Create the order entry
    order = {
        'OrderID': order_id,
        'CustomerID': customer_id,
        'OrderDate': order_date,
        'OrderStatus': order_status
    }
    orders.append(order)

In [15]:
# Generate Orders and Order Items
def generate_orders_and_items(customers, products, num_orders=100000):
    orders = []
    order_items = []
    shipments = []
    invoices = []
    payments = []
    statuses = ['Fulfilled', 'Delayed Fulfillment', 'Pending Payment']

    for _ in range(num_orders):
        customer = random.choice(customers)
        order_status = random.choice(statuses)
        order_id = fake.uuid4()
        order_date = fake.date_this_year()
        total_amount = 0

        order = {
            'OrderID': order_id,
            'CustomerID': customer['CustomerID'],
            'OrderDate': order_date,
            'OrderStatus': order_status,
            'TotalAmount': total_amount  # Will be updated later
        }
        orders.append(order)

        num_items = random.randint(1, 5)
        for _ in range(num_items):
            product = random.choice(products)
            quantity = random.randint(1, 10)
            total_price = product['Price'] * quantity
            total_amount += total_price

            order_item = {
                'OrderItemID': fake.uuid4(),
                'OrderID': order_id,
                'ProductID': product['ProductID'],
                'Quantity': quantity,
                'UnitPrice': product['Price'],
                'TotalPrice': total_price
            }
            order_items.append(order_item)

        # Update the total amount for the order
        order['TotalAmount'] = total_amount

        shipment = {
            'ShipmentID': fake.uuid4(),
            'OrderID': order_id,
            'ShipmentDate': fake.date_this_year(),
            'Carrier': fake.company(),
            'TrackingNumber': fake.uuid4(),
            'ShipmentStatus': 'Shipped' if order_status == 'Fulfilled' else 'Pending'
        }
        shipments.append(shipment)

        invoice = {
            'InvoiceID': fake.uuid4(),
            'OrderID': order_id,
            'InvoiceDate': fake.date_this_year(),
            'DueDate': fake.date_this_year(),
            'TotalAmount': total_amount,
            'PaymentStatus': 'Paid' if order_status == 'Fulfilled' else 'Pending'
        }
        invoices.append(invoice)

        if order_status == 'Fulfilled':
            payment = {
                'PaymentID': fake.uuid4(),
                'InvoiceID': invoice['InvoiceID'],
                'PaymentDate': fake.date_this_year(),
                'PaymentAmount': total_amount,
                'PaymentMethod': random.choice(['Credit Card', 'Bank Transfer', 'Cash'])
            }
            payments.append(payment)

    return orders, order_items, shipments, invoices, payments

In [16]:
orders, order_items, shipments, invoices, payments = generate_orders_and_items(customers, products)

NameError: name 'products' is not defined

In [None]:
# Convert to DataFrames for easy manipulation


df_orders = pd.DataFrame(orders)
df_order_items = pd.DataFrame(order_items)
df_shipments = pd.DataFrame(shipments)
df_invoices = pd.DataFrame(invoices)
df_payments = pd.DataFrame(payments)

In [None]:
orders