## Generate cartItems dataset

In [4]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Create unique combinations
customer_product_pairs = []
records = []
cart_item_id = 1

while len(records) < 500:
    customer_id = np.random.randint(1, 21)
    product_size_id = np.random.randint(1, 31)

    # Check if combination exists
    if (customer_id, product_size_id) not in customer_product_pairs:
        customer_product_pairs.append((customer_id, product_size_id))
        quantity = np.random.randint(1, 6)

        records.append({
            'CartItemID': cart_item_id,
            'CustomerID': customer_id,
            'ProductSizeID': product_size_id,
            'Quantity': quantity
        })
        cart_item_id += 1

# Create DataFrame
df = pd.DataFrame(records)

# Sort by CustomerID for better readability
df = df.sort_values('CustomerID')

path="./dataset"
# Export to CSV
df.to_csv(f'{path}/cart_data.csv', index=False)

print("Generated", len(df), "unique cart records")
print(df.head())

Generated 500 unique cart records
     CartItemID  CustomerID  ProductSizeID  Quantity
458         459           1             24         1
187         188           1             22         4
213         214           1             18         1
421         422           1              9         5
150         151           1              1         5


## Generate Orders dataset

In [7]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set seed
np.random.seed(42)

# Generate Orders
n_orders = 1000
start_date = datetime(2023, 1, 1)
end_date = datetime(2024, 3, 1)

orders = []
for order_id in range(1, n_orders + 1):
    # Random date between start and end
    days_between = (end_date - start_date).days
    random_days = np.random.randint(0, days_between)
    date = start_date + timedelta(days=random_days)

    orders.append({
        'OrderID': order_id,
        'DateTime': date,
        'CustomerID': np.random.randint(1, 21),
        'PaymentType': np.random.choice(['VNPay', 'Paypal']),
        'Status': np.random.choice(['Pending', 'Processing', 'Shipped', 'Delivered', 'Cancelled'],
                                 p=[0.1, 0.2, 0.2, 0.4, 0.1]),
        'TransactionID': f'TXN{np.random.randint(100000, 999999)}',
    })

# Create Orders DataFrame
df_orders = pd.DataFrame(orders)

# Calculate TotalPrice later after generating OrderItems
df_orders['TotalPrice'] = 0.0

path="./dataset"
# Export Orders
df_orders.to_csv(f'{path}/orders.csv', index=False)

print("Generated", len(df_orders), "unique orders records")
print(df_orders.head())

Generated 1000 unique orders records
   OrderID   DateTime  CustomerID PaymentType      Status TransactionID  \
0        1 2023-04-13          20       VNPay  Processing     TXN744167   
1        2 2023-07-08           7      Paypal  Processing     TXN187498   
2        3 2023-03-29           4      Paypal   Delivered     TXN429365   
3        4 2023-11-05           2      Paypal   Delivered     TXN876997   
4        5 2024-01-21           1      Paypal   Delivered     TXN358795   

   TotalPrice  
0         0.0  
1         0.0  
2         0.0  
3         0.0  
4         0.0  


## Generate orderItems dataset

In [8]:
import pandas as pd
import numpy as np

# Set seed
np.random.seed(42)

# Generate OrderItems
order_items = []
order_item_id = 1
used_combinations = set()

# Generate 1-5 items for each order
for order_id in range(1, 1001):
    n_items = np.random.randint(1, 6)
    total_price = 0

    for _ in range(n_items):
        while True:
            product_size_id = np.random.randint(1, 31)
            # Check unique combination
            if (order_id, product_size_id) not in used_combinations:
                used_combinations.add((order_id, product_size_id))
                quantity = np.random.randint(1, 6)

                order_items.append({
                    'OrderItemID': order_item_id,
                    'OrderID': order_id,
                    'ProductSizeID': product_size_id,
                    'Quantity': quantity
                })

                order_item_id += 1
                break

# Create OrderItems DataFrame
df_order_items = pd.DataFrame(order_items)

# Export OrderItems
path="/content/drive/MyDrive/Học tập/HK241/Ecommerce/Train Models"
df_order_items.to_csv(f'{path}/order_items.csv', index=False)

print("Generated", len(df_order_items), "unique order items records")
print(df_order_items.head())

Generated 2982 unique order items records
   OrderItemID  OrderID  ProductSizeID  Quantity
0            1        1             29         3
1            2        1              8         5
2            3        1             21         2
3            4        1             19         3
4            5        2             24         5
