## Mock Data Generation for E-commerce Chatbot

In [2]:
import pandas as pd
from faker import Faker
from datetime import datetime, timedelta
import random
import numpy as np

In [4]:
# Initialize Faker
fake = Faker()

# Define the number of rows for the dataset
num_rows = 50

# Helper function to generate random dates
def random_date(start, end):
    return fake.date_between(start_date=start, end_date=end)

# Helper function to calculate expected delivery date
def expected_delivery_date(shipment_date):
    delivery_days = random.randint(3, 10)  # Random delivery time window between 3 and 10 days
    return shipment_date + timedelta(days=delivery_days)

# Helper function to calculate business days between two dates
def business_days_between(start_date, end_date):
    day_generator = (start_date + timedelta(x + 1) for x in range((end_date - start_date).days))
    business_days = sum(1 for day in day_generator if day.weekday() < 5)
    return business_days

In [12]:
# Generate initial data
data = {
    "Order ID": [f"ORD{1000 + i}" for i in range(num_rows)],
    "Tracking Number": [f"{123456789 + i}" for i in range(num_rows)],
    "Shipment Date": [],  # Placeholder for shipment dates
    "Expected Delivery Date": [],  # Placeholder for expected delivery dates
    "Delivery Date": [],  # Placeholder for delivery dates
    "Status": [random.choice(["in process", "in transit", "delivered", "pending", "cancel"]) for _ in range(num_rows)],
    "Customer ID": [f"CUST{1000 + i}" for i in range(num_rows)],
    "Customer Name": [fake.name() for _ in range(num_rows)],
    "Customer Email": [fake.email() for _ in range(num_rows)],
    "Customer Phone": [fake.phone_number() for _ in range(num_rows)],
    "Order Date": [random_date('-60d', '-30d') for _ in range(num_rows)],
    "Refund Requested": [],  # Placeholder for refund requested
    "Refund Reason": [],  # Placeholder for refund reasons
    "Refund Status": [],  # Placeholder for refund statuses
    "Refund Date": [],  # Placeholder for refund dates
    "Notification Status": [random.choice([True, False]) for _ in range(num_rows)]
}

In [13]:
# Calculate expected delivery and delivery dates based on shipment dates
for i in range(num_rows):
    status = data["Status"][i]
    
    if status == "in process":
        if random.choice([True, False]):  # Randomly decide if the address is out of scope
            data["Shipment Date"].append(None)
            data["Expected Delivery Date"].append(None)
            data["Delivery Date"].append(None)
            data["Refund Requested"].append(True)
            data["Refund Reason"].append("Delivery Region Out of Scope")
            data["Refund Status"].append("approved")
            data["Refund Date"].append(None)
        else:
            data["Shipment Date"].append(None)
            data["Expected Delivery Date"].append(None)
            data["Delivery Date"].append(None)
            data["Refund Requested"].append(False)
            data["Refund Reason"].append("")
            data["Refund Status"].append("")
            data["Refund Date"].append(None)
    else:
        shipment_date = random_date('-30d', 'today')
        expected_delivery = expected_delivery_date(shipment_date)
        
        data["Shipment Date"].append(shipment_date)
        data["Expected Delivery Date"].append(expected_delivery)
        
        if status == "delivered":
            delivery_date = expected_delivery + timedelta(days=random.randint(0, 10))
            data["Delivery Date"].append(delivery_date)
            refund_requested = random.choice([True, False])
            
            if refund_requested:
                data["Refund Requested"].append(True)
                days_since_delivery = (datetime.now() - datetime.combine(delivery_date, datetime.min.time())).days
                if days_since_delivery <= 7:
                    reason = random.choice(["Damaged Item", "Defective Item"])
                    data["Refund Reason"].append(reason)
                    data["Refund Status"].append("approved")
                    refund_process_date = delivery_date + timedelta(days=random.randint(1, 3))
                    refund_receive_date = refund_process_date + timedelta(days=business_days_between(refund_process_date, refund_process_date + timedelta(days=7)))
                    data["Refund Date"].append(refund_receive_date)
                else:
                    data["Refund Reason"].append("Request Exceeded 7 Days")
                    data["Refund Status"].append("rejected")
                    data["Refund Date"].append(None)
            else:
                data["Refund Requested"].append(False)
                data["Refund Reason"].append("")
                data["Refund Status"].append("")
                data["Refund Date"].append(None)
        elif status == "cancel":
            delivery_date = expected_delivery + timedelta(days=random.randint(0, 10))
            data["Delivery Date"].append(delivery_date)
            refund_requested = True
            reason = random.choice(["Damaged Item", "Defective Item"])
            data["Refund Requested"].append(refund_requested)
            data["Refund Reason"].append(reason)
            data["Refund Status"].append("approved")
            refund_process_date = delivery_date + timedelta(days=random.randint(1, 3))
            refund_receive_date = refund_process_date + timedelta(days=business_days_between(refund_process_date, refund_process_date + timedelta(days=7)))
            data["Refund Date"].append(refund_receive_date)
        else:
            data["Delivery Date"].append(None)
            data["Refund Requested"].append(False)
            data["Refund Reason"].append("")
            data["Refund Status"].append("")
            data["Refund Date"].append(None)

In [14]:
for key, value in data.items():
    print(f"{key}: {len(value)}")

Order ID: 50
Tracking Number: 50
Shipment Date: 50
Expected Delivery Date: 50
Delivery Date: 50
Status: 50
Customer ID: 50
Customer Name: 50
Customer Email: 50
Customer Phone: 50
Order Date: 50
Refund Requested: 50
Refund Reason: 50
Refund Status: 50
Refund Date: 50
Notification Status: 50


In [15]:
# Create DataFrame
df = pd.DataFrame(data)

In [16]:
df

Unnamed: 0,Order ID,Tracking Number,Shipment Date,Expected Delivery Date,Delivery Date,Status,Customer ID,Customer Name,Customer Email,Customer Phone,Order Date,Refund Requested,Refund Reason,Refund Status,Refund Date,Notification Status
0,ORD1000,123456789,2024-05-10,2024-05-19,2024-05-28,cancel,CUST1000,William Ramirez,dana83@example.com,408.218.3405,2024-03-27,True,Damaged Item,approved,2024-06-03,False
1,ORD1001,123456790,2024-05-05,2024-05-08,2024-05-18,cancel,CUST1001,Marie Alexander,brittanyallen@example.net,001-696-591-4933x380,2024-03-29,True,Damaged Item,approved,2024-05-26,True
2,ORD1002,123456791,,,,in process,CUST1002,Carol Bates,wcain@example.org,+1-913-590-1321x129,2024-04-08,False,,,,True
3,ORD1003,123456792,2024-05-12,2024-05-16,2024-05-23,cancel,CUST1003,Jessica Cooley,jameschen@example.net,690-480-9975,2024-03-25,True,Defective Item,approved,2024-05-30,True
4,ORD1004,123456793,,,,in process,CUST1004,Ronald Jackson,higginsmichele@example.org,001-805-234-1133,2024-03-31,True,Delivery Region Out of Scope,approved,,False
5,ORD1005,123456794,2024-05-11,2024-05-16,2024-05-24,cancel,CUST1005,Christopher Abbott,johnsoncharles@example.org,(611)676-9397,2024-03-19,True,Damaged Item,approved,2024-06-01,True
6,ORD1006,123456795,2024-05-15,2024-05-19,2024-05-19,delivered,CUST1006,Mark Hobbs,andreawalter@example.org,001-820-611-4520,2024-04-16,False,,,,True
7,ORD1007,123456796,2024-04-29,2024-05-06,,in transit,CUST1007,Katherine Vargas,rothshane@example.com,001-400-543-0291,2024-03-29,False,,,,False
8,ORD1008,123456797,2024-05-14,2024-05-24,2024-06-01,delivered,CUST1008,Terri Rios,cory45@example.net,368-387-0547x4120,2024-04-08,True,Defective Item,approved,2024-06-08,False
9,ORD1009,123456798,2024-05-02,2024-05-08,,in transit,CUST1009,Frederick Wong,xfrench@example.org,413-344-2505,2024-03-27,False,,,,False


In [17]:
# Define the product types and descriptions
product_types = {
    'Basic-G': 'Basic Eyeglasses',
    'SunP-G': 'Sun Protection Eyeglasses',
    'Sport-G': 'Sports Eyeglasses',
    'Artist-G': 'Designer Eyeglasses',
    'Thinker-G': 'Reading Eyeglasses'
}

descriptions = {
    'Basic-G': 'Simple yet stylish, perfect for daily wear.',
    'SunP-G': 'Features UV protection for sunny days.',
    'Sport-G': 'Durable frames designed for athletic use.',
    'Artist-G': 'Indie-style frames, ideal for the modern thinker.',
    'Thinker-G': 'Enhanced lenses for close-up reading tasks.'
}

# Randomly assign products and descriptions
np.random.seed(42)  # For reproducibility
random_products = np.random.choice(list(product_types.keys()), size=len(df))
df['Product'] = [product_types[prod] for prod in random_products]
df['Description'] = [descriptions[prod] for prod in random_products]

# Display the updated DataFrame to confirm changes
df.head()


Unnamed: 0,Order ID,Tracking Number,Shipment Date,Expected Delivery Date,Delivery Date,Status,Customer ID,Customer Name,Customer Email,Customer Phone,Order Date,Refund Requested,Refund Reason,Refund Status,Refund Date,Notification Status,Product,Description
0,ORD1000,123456789,2024-05-10,2024-05-19,2024-05-28,cancel,CUST1000,William Ramirez,dana83@example.com,408.218.3405,2024-03-27,True,Damaged Item,approved,2024-06-03,False,Designer Eyeglasses,"Indie-style frames, ideal for the modern thinker."
1,ORD1001,123456790,2024-05-05,2024-05-08,2024-05-18,cancel,CUST1001,Marie Alexander,brittanyallen@example.net,001-696-591-4933x380,2024-03-29,True,Damaged Item,approved,2024-05-26,True,Reading Eyeglasses,Enhanced lenses for close-up reading tasks.
2,ORD1002,123456791,,,,in process,CUST1002,Carol Bates,wcain@example.org,+1-913-590-1321x129,2024-04-08,False,,,,True,Sports Eyeglasses,Durable frames designed for athletic use.
3,ORD1003,123456792,2024-05-12,2024-05-16,2024-05-23,cancel,CUST1003,Jessica Cooley,jameschen@example.net,690-480-9975,2024-03-25,True,Defective Item,approved,2024-05-30,True,Reading Eyeglasses,Enhanced lenses for close-up reading tasks.
4,ORD1004,123456793,,,,in process,CUST1004,Ronald Jackson,higginsmichele@example.org,001-805-234-1133,2024-03-31,True,Delivery Region Out of Scope,approved,,False,Reading Eyeglasses,Enhanced lenses for close-up reading tasks.


In [18]:
# Save DataFrame to CSV
df.to_csv('mock_dataset.csv', index=False)

print("Mock dataset generated and saved as 'mock_dataset.csv'.")

Mock dataset generated and saved as 'mock_dataset.csv'.
