# Use O2C_Template Customized Notebook Template

In [1]:
import random
from faker import Faker
import pandas as pd

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
fake = Faker('en_US')

# Customer Data Setup

In [4]:
# Generate Customers
def generate_customers(num_customers=100):
    customers = []
    for _ in range(num_customers):
        customer = {
            'CustomerID': fake.uuid4(),
            'Name': fake.company(),
            'CompanyType':fake.company_suffix(),
            'ContactDetails': fake.phone_number(),
            'EmailDetails': fake.company_email(),
            'AddressDetails':fake.address(),
            'AdminDetails':fake.administrative_unit(),
            'CreditLimit': random.randint(10000, 100000),
            'CreditRating': random.choice(['very poor', 'fair', 'good', 'very good','exceptional'])
        }
        customers.append(customer)
    return customers

In [5]:
# Generate Data
customers = generate_customers()

In [6]:
df_customers = pd.DataFrame(customers)

In [7]:
df_customers.head()

Unnamed: 0,CustomerID,Name,CompanyType,ContactDetails,EmailDetails,AddressDetails,AdminDetails,CreditLimit,CreditRating
0,46c382d3-500e-4230-8273-28c2cda27df2,Mercer and Sons,Ltd,001-709-874-8375x386,ghines@jones.com,"097 Joe Wall\nPort Renee, MI 99038",Missouri,29952,very poor
1,7eaae3d3-01a2-4ae6-9108-5b621498139e,Martinez and Sons,Inc,5853233936,martinezlee@salazar.com,"93121 Diane Ridges Suite 667\nLucasburgh, PA 6...",Maine,62213,fair
2,bc265871-9cd9-48d7-95a6-3dd12f4e9cf8,"Gonzales, Williams and Austin",and Sons,3797898844,twebb@mason.com,"06353 Christine Viaduct\nJohnstonhaven, NM 33661",Hawaii,38855,fair
3,8aba078a-5e0c-4541-aaa2-d32471e67e23,Kirby LLC,and Sons,+1-536-714-3746,iburgess@schultz.com,"29216 Glenn Cape Suite 112\nLake Stephenview, ...",Vermont,36885,fair
4,0e9dda7c-b148-4f3d-b8f2-1989b1448b89,Baldwin PLC,LLC,769.558.7109,scott21@donovan-murphy.com,"72447 Barnes Mission\nWest Heather, NJ 97315",Alabama,59956,fair


In [8]:
df_customers.columns = ['CustomerID', 'Name', 'CompanyType', 'ContactDetails', 'EmailDetails',
       'AddressDetails', 'AdminDetails', 'CreditLimit', 'CreditRating']

In [9]:
df_train_sf=my_session.createDataFrame(
        df_customers.values.tolist(),
        schema=df_customers.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_SCHEMA.BRONZE_CUSTOMER_DATA")

# Product Data setup

In [10]:
df_product = pd.read_csv('product_files.csv')

In [11]:
df_product.head()

Unnamed: 0,ProductCategory,ProductType,Quantity,UnitPrice
0,Office Supplies,Laser Printer,100,425
1,Office Supplies,Ergonomic Chair,1000,299
2,Office Supplies,Standing Desk,1000,420
3,Office Supplies,Wireless Keyboard,1000,78
4,Office Supplies,Office Stationery Set,1000,15


In [12]:
def generate_products(data):
    data['ProductID'] = ''
    # Iterate over rows and columns
    for index, row in data.iterrows():
        data.at[index, 'ProductID'] = fake.uuid4()
    return data

In [13]:
df_product = generate_products(df_product)

In [14]:
df_train_sf=my_session.createDataFrame(
        df_product.values.tolist(),
        schema=df_product.columns.tolist())
df_train_sf.write.mode("overwrite").save_as_table("FDC_HORIZONTAL.O2C_SCHEMA.BRONZE_PRODUCT_DATA")

# Order Items and Order Details Data Set up

In [18]:
# Assuming you already have the customers and products dataframes
customers = df_customers['CustomerID'].tolist()
products = df_product[['ProductID', 'UnitPrice']].to_dict('records')

In [20]:
# Define the date range
from datetime import datetime
start_date = datetime(2022, 8, 1)
end_date = datetime(2024, 8, 1)

In [21]:
# Generate Order Items and Orders
order_items = []
orders = []

for _ in range(100000):  # Generate 100,000 orders
    order_id = fake.uuid4()
    customer_id = random.choice(customers)
    order_date = fake.date_between(start_date, end_date)
    order_status = random.choice(['Pending', 'Shipped', 'Delivered', 'Cancelled'])
    
    # Create order items for this order
    num_items = random.randint(1, 5)  # Each order has a random number of items between 1 and 5
    for _ in range(num_items):
        product = random.choice(products)
        quantity = random.randint(1, 10)  # Generate a random quantity for each order item
        unit_price = product['UnitPrice']
        total_price = quantity * unit_price
        item_status = random.choice(['Fulfilled', 'Partially Fulfilled', 'Cancelled', 'Delayed'])
        
        order_item = {
            'Order_Item_ID': fake.uuid4(),
            'OrderID': order_id,
            'ProductID': product['ProductID'],
            'Quantity': quantity,
            'Unit_Price': unit_price,
            'Total_Price': total_price,
            'ItemStatus': item_status
        }
        order_items.append(order_item)
    
    # Create the order entry
    order = {
        'OrderID': order_id,
        'CustomerID': customer_id,
        'OrderDate': order_date,
        'OrderStatus': order_status
    }
    orders.append(order)

In [22]:
# Convert to DataFrames
df_order_items = pd.DataFrame(order_items)
df_orders = pd.DataFrame(orders)

In [23]:
# Display the first few rows of the DataFrames
print(df_order_items.head())
print(df_orders.head())

                          Order_Item_ID                               OrderID  \
0  b559eb34-52da-420e-a7d2-9b6841cff6fd  4306cafa-fc49-4078-9d04-58e41f390cb6   
1  9f611308-cea2-494e-99de-9c3207bfe90b  4306cafa-fc49-4078-9d04-58e41f390cb6   
2  458f1492-7264-4cfa-a6ac-31f1e9e566ab  4306cafa-fc49-4078-9d04-58e41f390cb6   
3  805bc8b7-4a02-47ba-87d0-851eb536d16f  4306cafa-fc49-4078-9d04-58e41f390cb6   
4  865518d7-f2c0-4f6c-a520-542d2fa55f11  4306cafa-fc49-4078-9d04-58e41f390cb6   

                              ProductID  Quantity  Unit_Price  Total_Price  \
0  8c59693d-0330-47a4-904f-35ae43a56bf1         5           2           10   
1  7bd16343-58e9-4fbc-8e43-10690b1a27ff         2         425          850   
2  2a08c84e-ea05-46dd-aa60-7f5d9f13c54d         5         325         1625   
3  b1c94193-fbb2-4dcd-b703-c5e60f37f62d         7           5           35   
4  a5624c16-d276-4518-861f-43744eee578c         3         600         1800   

            ItemStatus  
0              Dela

In [24]:
# Check the number of rows in the DataFrames
print(f"Number of rows in df_order_items: {len(df_order_items)}")
print(f"Number of rows in df_orders: {len(df_orders)}")

Number of rows in df_order_items: 300034
Number of rows in df_orders: 100000


In [25]:
my_session = get_session()
sf_df = my_session.createDataFrame(df_order_items)
sf_df.write.mode("overwrite").save_as_table("BRONZE_ORDER_ITEM_DETAILS")
my_session.table("BRONZE_ORDER_ITEM_DETAILS").show()

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"Order_Item_ID"                       |"OrderID"                             |"ProductID"                           |"Quantity"  |"Unit_Price"  |"Total_Price"  |"ItemStatus"         |
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|b559eb34-52da-420e-a7d2-9b6841cff6fd  |4306cafa-fc49-4078-9d04-58e41f390cb6  |8c59693d-0330-47a4-904f-35ae43a56bf1  |5           |2             |10             |Delayed              |
|9f611308-cea2-494e-99de-9c3207bfe90b  |4306cafa-fc49-4078-9d04-58e41f390cb6  |7bd16343-58e9-4fbc-8e43-106

In [26]:
sf_df = my_session.createDataFrame(df_orders)
sf_df.write.mode("overwrite").save_as_table("BRONZE_ORDER_DETAILS")
my_session.table("BRONZE_ORDER_DETAILS").show()

-------------------------------------------------------------------------------------------------------------
|"OrderID"                             |"CustomerID"                          |"OrderDate"  |"OrderStatus"  |
-------------------------------------------------------------------------------------------------------------
|4306cafa-fc49-4078-9d04-58e41f390cb6  |3b939ffb-2896-4e03-9460-6cc763880617  |2024-04-17   |Cancelled      |
|f3cb6ec4-f636-48a6-9965-e64f08090eab  |aef1fc2c-25f4-473a-99b0-1ba11c446b4f  |2023-11-26   |Cancelled      |
|be7e7467-ed28-4fb8-b8d5-1abbb385f87b  |22de95c5-4fd7-4a00-8f80-a5590e1ccce1  |2022-11-24   |Delivered      |
|eda22c3b-037f-4237-a53c-c8c0933dc9e8  |0e9dda7c-b148-4f3d-b8f2-1989b1448b89  |2023-08-30   |Shipped        |
|4aa25840-2371-4218-8173-8652ad6e979c  |85b449ea-32da-41eb-9cdb-db56b3f6e05e  |2023-10-31   |Delivered      |
|ae4d4400-1a7e-4914-a2d9-4052cf2216c9  |4fc29fa6-c18d-492b-a80b-6f98f3b650a2  |2024-04-11   |Shipped        |
|2db64f00-

# Invoice Data Set Up

In [27]:
# Assuming you already have the orders and order_items dataframes
orders = df_orders['OrderID'].tolist()

In [42]:
# Generate Invoices
def generate_invoices():
    invoices = []
    for order_id in orders:
        customer_id = df_orders[df_orders['OrderID'] == order_id]['CustomerID'].values[0]
        order_date = df_orders[df_orders['OrderID'] == order_id]['OrderDate'].values[0]
        order_status = df_orders[df_orders['OrderID'] == order_id]['OrderStatus'].values[0]
        
        # Calculate total amount from order items
        total_amount = df_order_items[df_order_items['OrderID'] == order_id]['Total_Price'].sum()
        
        # Define invoice_date before using it
        invoice_date = pd.to_datetime(order_date) + timedelta(days=random.randint(3, 10))
        due_date = invoice_date + timedelta(days=7)
        payment_status = 'Paid' if order_status in ['Shipped', 'Delivered'] else 'Pending'
        
        invoice = {
            'Invoice_ID': fake.uuid4(),
            'Order_ID': order_id,
            'Invoice_Date': invoice_date,
            'Due_Date': due_date,
            'Total_Amount': total_amount,
            'Payment_Status': payment_status,
            'CustomerID': customer_id,
            'OrderDate': order_date
        }
        invoices.append(invoice)
    return invoices

In [43]:
# Generate Data
invoices = generate_invoices()

df_invoices = pd.DataFrame(invoices)

In [47]:
print(df_invoices.head())

                             Invoice_ID                              Order_ID  \
0  cfabc68a-43db-4ebe-ba60-4d0dc14411d8  4306cafa-fc49-4078-9d04-58e41f390cb6   
1  fe8172e8-cbce-4810-8cc3-335367ce0443  f3cb6ec4-f636-48a6-9965-e64f08090eab   
2  cd51f250-f695-4244-ba9d-0e1f1f9ed639  be7e7467-ed28-4fb8-b8d5-1abbb385f87b   
3  7e7890d4-78e8-4440-816d-d67d2a35951b  eda22c3b-037f-4237-a53c-c8c0933dc9e8   
4  c508616f-d1df-478a-bb5b-957f4a66d346  4aa25840-2371-4218-8173-8652ad6e979c   

  Invoice_Date   Due_Date  Total_Amount Payment_Status  \
0   2024-04-25 2024-05-02          4320        Pending   
1   2023-12-04 2023-12-11           396        Pending   
2   2022-11-27 2022-12-04        228545           Paid   
3   2023-09-08 2023-09-15          7159           Paid   
4   2023-11-05 2023-11-12          1533           Paid   

                             CustomerID   OrderDate  
0  3b939ffb-2896-4e03-9460-6cc763880617  2024-04-17  
1  aef1fc2c-25f4-473a-99b0-1ba11c446b4f  2023-11-26  
2 

In [48]:
print(f"Number of rows in df_invoices: {len(df_invoices)}")

Number of rows in df_invoices: 100000


In [49]:
# Save to Snowflake
sf_df = my_session.createDataFrame(df_invoices)
sf_df.write.mode("overwrite").save_as_table("BRONZE_INVOICE_DETAILS")
my_session.table("BRONZE_INVOICE_DETAILS").show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"Invoice_ID"                          |"Order_ID"                            |"Invoice_Date"       |"Due_Date"           |"Total_Amount"  |"Payment_Status"  |"CustomerID"                          |"OrderDate"  |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|cfabc68a-43db-4ebe-ba60-4d0dc14411d8  |4306cafa-fc49-4078-9d04-58e41f390cb6  |2024-04-25 00:00:00  |2024-05-02 00:00:00  |4320            |Pending           |3b939ffb-2896-4e03-9460-6cc763880617  |2024-04-17   |
|fe8172e8-cbce-4810-8cc3-335367ce0443  |f3cb6ec4-f636-48a6-9965-e64f08090eab  |2023-12-04 00:00:00  |2023-12-11 00:00:00  |396             |Pending 

# Shipments Data Set up

In [50]:
# Generate Shipments
def generate_shipments():
    shipments = []
    for order_id in orders:
        order_status = df_orders[df_orders['OrderID'] == order_id]['OrderStatus'].values[0]
        shipment_date = pd.to_datetime(df_orders[df_orders['OrderID'] == order_id]['OrderDate'].values[0]) + timedelta(days=random.randint(1, 5))
        carrier = random.choice(['FedEx', 'UPS', 'DHL', 'USPS'])
        tracking_number = fake.uuid4()
        
        # Set shipment status based on order status
        if order_status == 'Pending':
            shipment_status = 'Pending'
        elif order_status == 'Shipped':
            shipment_status = 'In Transit'
        elif order_status == 'Delivered':
            shipment_status = 'Delivered'
        elif order_status == 'Cancelled':
            shipment_status = 'Cancelled'
        else:
            shipment_status = 'Unknown'
        
        shipment = {
            'Shipment_ID': fake.uuid4(),
            'Order_ID': order_id,
            'Shipment_Date': shipment_date,
            'Carrier': carrier,
            'Tracking_Number': tracking_number,
            'Shipment_Status': shipment_status
        }
        shipments.append(shipment)
    return shipments


In [None]:
# Generate Data
shipments = generate_shipments()

df_shipments = pd.DataFrame(shipments)

In [None]:
# Display the first few rows of the DataFrame
print(df_shipments.head())

In [None]:
# Save to Snowflake
sf_df = my_session.createDataFrame(df_shipments)
sf_df.write.mode("overwrite").save_as_table("BRONZE_SHIPMENT_DETAILS")
my_session.table("BRONZE_SHIPMENT_DETAILS").show()


In [44]:
# Generate Orders and Order Items
def generate_orders_and_items(customers, products, num_orders=100000):
    orders = []
    order_items = []
    shipments = []
    invoices = []
    payments = []
    statuses = ['Fulfilled', 'Delayed Fulfillment', 'Pending Payment']

    for _ in range(num_orders):
        customer = random.choice(customers)
        order_status = random.choice(statuses)
        order_id = fake.uuid4()
        order_date = fake.date_this_year()
        total_amount = 0

        order = {
            'OrderID': order_id,
            'CustomerID': customer['CustomerID'],
            'OrderDate': order_date,
            'OrderStatus': order_status,
            'TotalAmount': total_amount  # Will be updated later
        }
        orders.append(order)

        num_items = random.randint(1, 5)
        for _ in range(num_items):
            product = random.choice(products)
            quantity = random.randint(1, 10)
            total_price = product['Price'] * quantity
            total_amount += total_price

            order_item = {
                'OrderItemID': fake.uuid4(),
                'OrderID': order_id,
                'ProductID': product['ProductID'],
                'Quantity': quantity,
                'UnitPrice': product['Price'],
                'TotalPrice': total_price
            }
            order_items.append(order_item)

        # Update the total amount for the order
        order['TotalAmount'] = total_amount

        shipment = {
            'ShipmentID': fake.uuid4(),
            'OrderID': order_id,
            'ShipmentDate': fake.date_this_year(),
            'Carrier': fake.company(),
            'TrackingNumber': fake.uuid4(),
            'ShipmentStatus': 'Shipped' if order_status == 'Fulfilled' else 'Pending'
        }
        shipments.append(shipment)

        invoice = {
            'InvoiceID': fake.uuid4(),
            'OrderID': order_id,
            'InvoiceDate': fake.date_this_year(),
            'DueDate': fake.date_this_year(),
            'TotalAmount': total_amount,
            'PaymentStatus': 'Paid' if order_status == 'Fulfilled' else 'Pending'
        }
        invoices.append(invoice)

        if order_status == 'Fulfilled':
            payment = {
                'PaymentID': fake.uuid4(),
                'InvoiceID': invoice['InvoiceID'],
                'PaymentDate': fake.date_this_year(),
                'PaymentAmount': total_amount,
                'PaymentMethod': random.choice(['Credit Card', 'Bank Transfer', 'Cash'])
            }
            payments.append(payment)

    return orders, order_items, shipments, invoices, payments

In [16]:
orders, order_items, shipments, invoices, payments = generate_orders_and_items(customers, products)

NameError: name 'products' is not defined

In [None]:
# Convert to DataFrames for easy manipulation


df_orders = pd.DataFrame(orders)
df_order_items = pd.DataFrame(order_items)
df_shipments = pd.DataFrame(shipments)
df_invoices = pd.DataFrame(invoices)
df_payments = pd.DataFrame(payments)

In [None]:
orders