In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import pandas as pd

In [2]:
# Read csv files to dataframes
project_path = os.getenv("ecomm")

customer_csv_path = os.path.join(project_path, "faker_dataset", "faker_csv", "fake_customers.csv")
product_csv_path = os.path.join(project_path, "faker_dataset", "faker_csv", "fake_products.csv")
output_path = os.path.join(project_path, "faker_dataset", "faker_csv", "fake_email_orders.csv")

df_customers = pd.read_csv(customer_csv_path) 
df_products = pd.read_csv(product_csv_path)

### Generate Orders Dataset for Emails

In [3]:
import random
from datetime import datetime, timedelta
from faker import Faker
from faker.providers import DynamicProvider

In [4]:
# Define a function to generate random number for order number and payment reference
def random_number():
    return f"#{random.randint(100000000000, 999999999999)}"

# Define the list for customers, products, and payment methods
customers_list = df_customers['Full Name'].tolist()
products_list = df_products['Title'].tolist()
payment_methods = ["PayPal","Digital Wallet","Cash on Delivery","Bank Transfer"]

# Get yesterdays date 
yesterday = datetime.now().date() - timedelta(days=1)

In [7]:
data = []

for _ in range(random.randint(3, 4)):  # number_of_rows
    order_number = random_number()
    order_date = datetime.combine(yesterday, datetime.min.time()) + timedelta(seconds=random.randint(0, 86399))
    billing_name = random.choice(customers_list)
    payment_method = random.choice(payment_methods)
    payment_reference = random_number()

    # Each order has 1â€“3 products (line items)
    for _ in range(random.randint(1, 3)):
        lineitem_name = random.choice(products_list)
        lineitem_qty = random.randint(1, 3)

        # Merge customer info
        customer_info = df_customers.loc[df_customers['Full Name'] == billing_name].to_dict('records')[0]
        # Merge product info
        product_info = df_products.loc[df_products['Title'] == lineitem_name].to_dict('records')[0]

        order_dict = {
            'order_number': order_number,
            'order_date': order_date,
            'billing_name': billing_name,
            'lineitem_name': lineitem_name,
            'lineitem_qty': lineitem_qty,
            'payment_method': payment_method,
            'payment_reference': payment_reference,
            'payment_date': order_date + timedelta(days=random.uniform(0, 1)),
            'fulfillment_date': order_date + timedelta(days=random.uniform(1, 2)),
        }

        # Merge additional customer & product fields, standardizing column names
        order_dict.update({k.lower().replace(' ', '_'): v for k, v in customer_info.items() if k != 'Full Name'})
        order_dict.update({k.lower().replace(' ', '_'): v for k, v in product_info.items() if k != 'Title'})

        data.append(order_dict)

# Convert to DataFrame
orders_df = pd.DataFrame(data)

# Drop any unnecessary columns if needed
columns_to_drop = ['first_name', 'last_name', 'product_description', 'product_category', 'image_src']
orders_df = orders_df.drop(columns=[col for col in columns_to_drop if col in orders_df.columns])


In [8]:
orders_df.head()

Unnamed: 0,order_number,order_date,billing_name,lineitem_name,lineitem_qty,payment_method,payment_reference,payment_date,fulfillment_date,email,address_company,address_city,address_province,address_zip,phone,product_sku,vendor,unit_price
0,#600765296260,2025-11-10 00:35:51,Cameron Arnold,"Ben 10 6"" Deluxe Power Up Figures - Four Arms",3,Cash on Delivery,#881266016737,2025-11-10 14:58:58.960023,2025-11-11 16:43:13.104356,arnold.cameron@gmail.com,"B11 L36 Patel Homes Phase 8, 39th Road",Pagadian,Zamboanga del Sur,7016,+63 9819938045,TOY128,BEN 10,18.99
1,#205033743697,2025-11-10 11:41:56,Roger Miranda,Barbie Pizza Chef Doll and Playset,2,PayPal,#812867371322,2025-11-11 03:49:14.901572,2025-11-12 04:36:52.925594,miranda.roger@gmail.com,7153 Venus Avenue,Bayugan,Agusan del Sur,8502,+63 9427532705,TOY188,BARBIE,29.99
2,#205033743697,2025-11-10 11:41:56,Roger Miranda,PLAYSKOOL MR. POTATO HEAD,3,PayPal,#812867371322,2025-11-10 21:23:56.148569,2025-11-12 02:11:06.094868,miranda.roger@gmail.com,7153 Venus Avenue,Bayugan,Agusan del Sur,8502,+63 9427532705,TOY380,PLAYSKOOL,7.99
3,#180319809527,2025-11-10 11:23:26,Tiffany Wright,Lego Captain Phasma,3,PayPal,#975689981056,2025-11-11 05:59:59.355854,2025-11-11 16:45:23.145854,wright.tiffany@gmail.com,"B08 L07 Balete Estates, Carrot Road",Marawi,Lanao del Sur,9700,+63 9742439422,TOY10,LEGO,43.0
4,#180319809527,2025-11-10 11:23:26,Tiffany Wright,Elefun and Friends Barrel of Monkeys Game,1,PayPal,#975689981056,2025-11-11 00:45:40.782622,2025-11-12 08:01:47.051685,wright.tiffany@gmail.com,"B08 L07 Balete Estates, Carrot Road",Marawi,Lanao del Sur,9700,+63 9742439422,TOY307,PLAYSKOOL,4.99


In [None]:
break

In [None]:
data = []

for orders in range(10):
    order_number = random_number()
    order_date = fake.date_time_between_dates(datetime_start=datetime_start, datetime_end=datetime_end)
    billing_name = random.choice(customers_list)
    payment_method = random.choice(payment_methods)
    payment_reference = random_number()

    # Each order has 1-5 products (line items)
    for order_item in range(random.randint(1,3)):
        lineitem_name = random.choice(products_list)
        lineitem_qty = random.randint(1,3)

        data.append({
            'order_number': order_number,
            'order_date': order_date,
            'billing_name': billing_name,
            'lineitem_name': lineitem_name,
            'lineitem_qty': lineitem_qty,
            'payment_method': payment_method,
            'payment_reference': payment_reference
        })


In [None]:
orders_df = pd.DataFrame(data)
len(orders_df)

In [None]:
products_df.columns.tolist()