Libraries Installation

In [7]:
%pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [8]:
%pip install pandas faker numpy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


Libraries Initialization

In [9]:
import pandas as pd
import numpy as np
from faker import Faker
import datetime
import json

# Initialize the Faker generator
fake = Faker()

Configuration

In [10]:
# Use these for development (small, fast)
NUM_PRODUCTS = 50
NUM_CUSTOMERS = 1000
NUM_ORDERS = 5000
NUM_REVIEWS = 2000

# Use these for the final "production" run (large, impressive)
# NUM_PRODUCTS = 50
# NUM_CUSTOMERS = 10000
# NUM_ORDERS = 100000
# NUM_REVIEWS = 50000

product.csv

In [18]:
# Define our product catalog with realistic price ranges
product_catalog = {
    'Electronics': [
        ("Smartwatch", (150, 500)), ("Wireless Headphones", (80, 350)), ("4K Monitor", (300, 1500)),
        ("USB-C Hub", (25, 100)), ("Portable Charger", (30, 120)),
        ("Bluetooth Speaker", (40, 200)), ("Gaming Mouse", (50, 150)), ("Webcam", (60, 250)),
        ("Mechanical Keyboard", (100, 300)), ("Tablet Stand", (20, 80))
    ],
    'Home Goods': [
        ("Air Fryer", (80, 250)), ("Scented Candle", (10, 40)), ("Plush Blanket", (40, 150)),
        ("Picture Frame", (15, 60)), ("Knife Set", (50, 400)),
        ("Coffee Maker", (100, 400)), ("Throw Pillow", (20, 60)), ("Wall Clock", (30, 120)),
        ("Essential Oil Diffuser", (40, 100)), ("Storage Basket", (25, 75))
    ],
    'Books': [
        ("The Midnight Library", (12, 25)), ("Project Hail Mary", (15, 30)), ("Atomic Habits", (18, 35)),
        ("Dune", (10, 25)), ("Klara and the Sun", (14, 28)),
        ("Where the Crawdads Sing", (13, 26)), ("The Seven Husbands of Evelyn Hugo", (16, 32)),
        ("Educated", (17, 34)), ("Becoming", (19, 38)), ("The Silent Patient", (15, 30))
    ],
    'Groceries': [
        ("Avocado", (1, 4)), ("Sourdough Bread", (5, 10)), ("Organic Coffee", (12, 25)),
        ("Greek Yogurt", (4, 8)), ("Quinoa", (8, 15)),
        ("Almond Milk", (3, 7)), ("Dark Chocolate", (6, 12)), ("Olive Oil", (15, 30)),
        ("Honey", (8, 18)), ("Pasta", (2, 6))
    ],
    'Apparel': [
        ("Classic T-Shirt", (15, 50)), ("Running Shoes", (60, 200)), ("Denim Jacket", (70, 250)),
        ("Beanie Hat", (12, 40)), ("Sunglasses", (50, 300)),
        ("Hoodie", (40, 120)), ("Sneakers", (80, 180)), ("Leather Belt", (30, 100)),
        ("Baseball Cap", (15, 45)), ("Wool Scarf", (25, 80))
    ]
}

products_data = []
product_id_counter = 1
for category, items in product_catalog.items():
    for item_name, price_range in items:
        products_data.append({
            'product_id': product_id_counter,
            'product_name': item_name,
            'category': category,
            'price': round(np.random.uniform(price_range[0], price_range[1]), 2)
        })
        product_id_counter += 1

products_df = pd.DataFrame(products_data)

print("\nproducts_df:")
print(products_df)


products_df:
    product_id                       product_name     category   price
0            1                         Smartwatch  Electronics  213.43
1            2                Wireless Headphones  Electronics  331.93
2            3                         4K Monitor  Electronics  799.64
3            4                          USB-C Hub  Electronics   31.33
4            5                   Portable Charger  Electronics  112.92
5            6                  Bluetooth Speaker  Electronics   91.20
6            7                       Gaming Mouse  Electronics  127.46
7            8                             Webcam  Electronics  149.99
8            9                Mechanical Keyboard  Electronics  127.12
9           10                       Tablet Stand  Electronics   28.35
10          11                          Air Fryer   Home Goods  157.54
11          12                     Scented Candle   Home Goods   18.18
12          13                      Plush Blanket   Home Goods 

In [23]:
# Save products DataFrame as CSV file (with proper error handling)
import os

# Try multiple approaches to save the file
save_successful = False
file_path = None

# Method 1: Try creating a data directory (avoids read-only issues)
try:
    os.makedirs('data', exist_ok=True)
    file_path = 'data/products.csv'
    products_df.to_csv(file_path, index=False)
    save_successful = True
    print("✅ products.csv saved successfully in 'data' folder!")
except Exception as e:
    print(f"❌ Method 1 failed: {e}")

# Method 2: Try saving to current directory
if not save_successful:
    try:
        file_path = 'products.csv'
        products_df.to_csv(file_path, index=False)
        save_successful = True
        print("✅ products.csv saved successfully in current directory!")
    except Exception as e:
        print(f"❌ Method 2 failed: {e}")

# Method 3: Try saving to Downloads folder
if not save_successful:
    try:
        home_dir = os.path.expanduser("~")
        downloads_dir = os.path.join(home_dir, "Downloads")
        file_path = os.path.join(downloads_dir, "products.csv")
        products_df.to_csv(file_path, index=False)
        save_successful = True
        print(f"✅ products.csv saved successfully to Downloads folder!")
    except Exception as e:
        print(f"❌ Method 3 failed: {e}")

# Results
if save_successful:
    print(f"📊 File contains {len(products_df)} products")
    print(f"📁 File saved as: {file_path}")
    print(f"📏 File size: {os.path.getsize(file_path)} bytes")
    
    print("\n🔽 TO DOWNLOAD:")
    if file_path.startswith('data/'):
        print("1. Look at the VS Code file explorer (left sidebar)")
        print("2. Expand the 'data' folder")
        print("3. Right-click on 'products.csv'")
        print("4. Select 'Download' from the menu")
    elif 'Downloads' in file_path:
        print(f"File is already in your Downloads folder: {file_path}")
    else:
        print("1. Look at the VS Code file explorer (left sidebar)")
        print("2. Find 'products.csv' in the file list")
        print("3. Right-click on 'products.csv'")
        print("4. Select 'Download' from the menu")
    
    print("\n📋 First 5 rows of your data:")
    print(products_df.head())
else:
    print("\n❌ All save methods failed. Let's try displaying the data instead:")
    print(products_df.head())

❌ Method 1 failed: [Errno 30] Read-only file system: 'data'
❌ Method 2 failed: [Errno 30] Read-only file system: 'products.csv'
✅ products.csv saved successfully to Downloads folder!
📊 File contains 50 products
📁 File saved as: /Users/mehul/Downloads/products.csv
📏 File size: 1628 bytes

🔽 TO DOWNLOAD:
File is already in your Downloads folder: /Users/mehul/Downloads/products.csv

📋 First 5 rows of your data:
   product_id         product_name     category   price
0           1           Smartwatch  Electronics  213.43
1           2  Wireless Headphones  Electronics  331.93
2           3           4K Monitor  Electronics  799.64
3           4            USB-C Hub  Electronics   31.33
4           5     Portable Charger  Electronics  112.92


In [24]:
# Troubleshooting: Check current directory and files
import os

print("📂 Current working directory:")
print(os.getcwd())

print("\n📄 All files in current directory:")
for file in os.listdir('.'):
    if os.path.isfile(file):
        print(f"  - {file}")

print("\n🔍 CSV files specifically:")
csv_files = [f for f in os.listdir('.') if f.endswith('.csv')]
if csv_files:
    for csv_file in csv_files:
        print(f"  ✓ {csv_file}")
else:
    print("  No CSV files found")

# Alternative: Save to Downloads folder (if accessible)
try:
    home_dir = os.path.expanduser("~")
    downloads_path = os.path.join(home_dir, "Downloads", "products.csv")
    products_df.to_csv(downloads_path, index=False)
    print(f"\n💾 Also saved to: {downloads_path}")
except:
    print("\n❌ Could not save to Downloads folder")

📂 Current working directory:
/

📄 All files in current directory:
  - .file

🔍 CSV files specifically:
  No CSV files found

💾 Also saved to: /Users/mehul/Downloads/products.csv


customer.csv

In [26]:
customers_data = []
for i in range(1, NUM_CUSTOMERS + 1):
    # Generate datetime and format to remove microseconds
    created_datetime = fake.date_time_between(start_date='-3y', end_date='-1y')
    created_formatted = created_datetime.strftime('%Y-%m-%d %H:%M:%S')
    
    customers_data.append({
        'customer_id': i,
        'first_name': fake.first_name(),
        'last_name': fake.last_name(),
        'email': fake.email(),
        'city': fake.city(),
        'state': fake.state_abbr(),
        'created_at': created_formatted  # Clean datetime format without microseconds
    })

customers_df = pd.DataFrame(customers_data)

# Save customers DataFrame as CSV file (with proper error handling)
import os

# Try multiple approaches to save the file
save_successful = False
file_path = None

# Method 1: Try creating a data directory (avoids read-only issues)
try:
    os.makedirs('data', exist_ok=True)
    file_path = 'data/customers.csv'
    customers_df.to_csv(file_path, index=False)
    save_successful = True
    print("✅ customers.csv saved successfully in 'data' folder!")
except Exception as e:
    print(f"❌ Method 1 failed: {e}")

# Method 2: Try saving to current directory
if not save_successful:
    try:
        file_path = 'customers.csv'
        customers_df.to_csv(file_path, index=False)
        save_successful = True
        print("✅ customers.csv saved successfully in current directory!")
    except Exception as e:
        print(f"❌ Method 2 failed: {e}")

# Method 3: Try saving to Downloads folder
if not save_successful:
    try:
        home_dir = os.path.expanduser("~")
        downloads_dir = os.path.join(home_dir, "Downloads")
        file_path = os.path.join(downloads_dir, "customers.csv")
        customers_df.to_csv(file_path, index=False)
        save_successful = True
        print(f"✅ customers.csv saved successfully to Downloads folder!")
    except Exception as e:
        print(f"❌ Method 3 failed: {e}")

# Results
if save_successful:
    print(f"📊 File contains {len(customers_df)} customers")
    print(f"📁 File saved as: {file_path}")
    print(f"📏 File size: {os.path.getsize(file_path)} bytes")
    
    print("\n🔽 TO DOWNLOAD:")
    if file_path.startswith('data/'):
        print("1. Look at the VS Code file explorer (left sidebar)")
        print("2. Expand the 'data' folder")
        print("3. Right-click on 'customers.csv'")
        print("4. Select 'Download' from the menu")
    elif 'Downloads' in file_path:
        print(f"File is already in your Downloads folder: {file_path}")
    else:
        print("1. Look at the VS Code file explorer (left sidebar)")
        print("2. Find 'customers.csv' in the file list")
        print("3. Right-click on 'customers.csv'")
        print("4. Select 'Download' from the menu")
    
    print("\n📋 First 5 customers:")
    print(customers_df.head())
else:
    print("\n❌ All save methods failed. Here's the customer data:")
    print(customers_df.head())

❌ Method 1 failed: [Errno 30] Read-only file system: 'data'
❌ Method 2 failed: [Errno 30] Read-only file system: 'customers.csv'
✅ customers.csv saved successfully to Downloads folder!
📊 File contains 1000 customers
📁 File saved as: /Users/mehul/Downloads/customers.csv
📏 File size: 77101 bytes

🔽 TO DOWNLOAD:
File is already in your Downloads folder: /Users/mehul/Downloads/customers.csv

📋 First 5 customers:
   customer_id first_name   last_name                       email  \
0            1  Stephanie       Woods        rachel63@example.org   
1            2      Sarah       Ramos          fallen@example.com   
2            3     Daniel      Wilson      connorshaw@example.org   
3            4   Danielle        Diaz  martinveronica@example.net   
4            5    Michael  Mccullough     carterariel@example.org   

              city state           created_at  
0       Amandafort    WY  2023-04-04 09:54:05  
1      Sanchezland    MH  2024-05-06 10:37:02  
2       Adamsmouth    NV  202

order.csv

In [27]:
orders_data = []
for i in range(1, NUM_ORDERS + 1):
    # Generate datetime and format to remove microseconds
    order_datetime = fake.date_time_between(start_date='-2y', end_date='now')
    order_formatted = order_datetime.strftime('%Y-%m-%d %H:%M:%S')
    
    orders_data.append({
        'order_id': i,
        'customer_id': np.random.randint(1, NUM_CUSTOMERS + 1),
        'product_id': np.random.randint(1, NUM_PRODUCTS + 1),
        'quantity': np.random.randint(1, 6),
        'order_date': order_formatted  # Clean datetime format without microseconds
    })

orders_df = pd.DataFrame(orders_data)

# Save orders DataFrame as CSV file (with proper error handling)
import os

# Try multiple approaches to save the file
save_successful = False
file_path = None

# Method 1: Try creating a data directory (avoids read-only issues)
try:
    os.makedirs('data', exist_ok=True)
    file_path = 'data/orders.csv'
    orders_df.to_csv(file_path, index=False)
    save_successful = True
    print("✅ orders.csv saved successfully in 'data' folder!")
except Exception as e:
    print(f"❌ Method 1 failed: {e}")

# Method 2: Try saving to current directory
if not save_successful:
    try:
        file_path = 'orders.csv'
        orders_df.to_csv(file_path, index=False)
        save_successful = True
        print("✅ orders.csv saved successfully in current directory!")
    except Exception as e:
        print(f"❌ Method 2 failed: {e}")

# Method 3: Try saving to Downloads folder
if not save_successful:
    try:
        home_dir = os.path.expanduser("~")
        downloads_dir = os.path.join(home_dir, "Downloads")
        file_path = os.path.join(downloads_dir, "orders.csv")
        orders_df.to_csv(file_path, index=False)
        save_successful = True
        print(f"✅ orders.csv saved successfully to Downloads folder!")
    except Exception as e:
        print(f"❌ Method 3 failed: {e}")

# Results
if save_successful:
    print(f"📊 File contains {len(orders_df)} orders")
    print(f"📁 File saved as: {file_path}")
    print(f"📏 File size: {os.path.getsize(file_path)} bytes")
    
    print("\n🔽 TO DOWNLOAD:")
    if file_path.startswith('data/'):
        print("1. Look at the VS Code file explorer (left sidebar)")
        print("2. Expand the 'data' folder")
        print("3. Right-click on 'orders.csv'")
        print("4. Select 'Download' from the menu")
    elif 'Downloads' in file_path:
        print(f"File is already in your Downloads folder: {file_path}")
    else:
        print("1. Look at the VS Code file explorer (left sidebar)")
        print("2. Find 'orders.csv' in the file list")
        print("3. Right-click on 'orders.csv'")
        print("4. Select 'Download' from the menu")
    
    print("\n📋 First 5 orders:")
    print(orders_df.head())
else:
    print("\n❌ All save methods failed. Here's the order data:")
    print(orders_df.head())

❌ Method 1 failed: [Errno 30] Read-only file system: 'data'
❌ Method 2 failed: [Errno 30] Read-only file system: 'orders.csv'
✅ orders.csv saved successfully to Downloads folder!
📊 File contains 5000 orders
📁 File saved as: /Users/mehul/Downloads/orders.csv
📏 File size: 167555 bytes

🔽 TO DOWNLOAD:
File is already in your Downloads folder: /Users/mehul/Downloads/orders.csv

📋 First 5 orders:
   order_id  customer_id  product_id  quantity           order_date
0         1          445          27         1  2025-01-10 04:29:00
1         2           77          14         2  2024-04-20 10:21:32
2         3          281          41         3  2025-07-13 06:53:25
3         4          897          32         2  2024-12-10 06:27:39
4         5          135          33         3  2024-05-01 05:28:58


reviews.json

In [30]:
reviews_data = []

# Define realistic review templates based on rating and product category
def generate_realistic_review(rating, product_name, category):
    """Generate realistic review text based on rating and product category"""
    
    # Category-specific review templates
    review_templates = {
        'Electronics': {
            5: [
                f"Amazing {product_name}! Crystal clear sound/display and excellent battery life.",
                f"Perfect {product_name}! Easy setup and works flawlessly. Highly recommend!",
                f"Love this {product_name}! Great build quality and all features work perfectly.",
                f"Excellent {product_name}! Fast performance and great value for money.",
                f"Outstanding {product_name}! Connects easily and battery lasts all day.",
            ],
            4: [
                f"Great {product_name}! Works well overall, minor issues with setup but good quality.",
                f"Really good {product_name}. Does everything I need, would recommend.",
                f"Solid {product_name}! Good performance and battery life is decent.",
                f"Happy with this {product_name}. Good features and reliable connection.",
            ],
            3: [
                f"Okay {product_name}. Does the job but nothing special. Average quality.",
                f"The {product_name} works fine but could be better for the price.",
                f"Decent {product_name}. Battery life could be longer but functional.",
                f"Average {product_name}. Gets the job done but room for improvement.",
            ],
            2: [
                f"Disappointing {product_name}. Battery dies quickly and connection issues.",
                f"Poor {product_name}. Stops working randomly and hard to connect.",
                f"Not great {product_name}. Laggy performance and short battery life.",
                f"Mediocre {product_name}. Frequently disconnects and slow to respond.",
            ],
            1: [
                f"Terrible {product_name}! Died after 2 days, complete waste of money.",
                f"Awful {product_name}. Never worked properly, constant connection problems.",
                f"Horrible {product_name}! Broke immediately, don't buy this junk.",
                f"Worst {product_name} ever. Stopped working after a few hours.",
            ]
        },
        'Home Goods': {
            5: [
                f"Love this {product_name}! Perfect size and excellent quality. Looks amazing!",
                f"Beautiful {product_name}! Well-made and exactly what I was looking for.",
                f"Fantastic {product_name}! Sturdy construction and great design.",
                f"Perfect {product_name}! High quality materials and beautiful finish.",
                f"Amazing {product_name}! Easy to use and looks great in my home.",
            ],
            4: [
                f"Great {product_name}! Good quality and nice appearance, very satisfied.",
                f"Really nice {product_name}. Well-built and functions perfectly.",
                f"Good {product_name}! Solid construction and looks nice.",
                f"Happy with this {product_name}. Good value and works well.",
            ],
            3: [
                f"Okay {product_name}. Does what it should but nothing extraordinary.",
                f"Average {product_name}. Functional but could be better quality.",
                f"Decent {product_name}. Works fine but not the best materials.",
                f"The {product_name} is fine. Gets the job done but nothing special.",
            ],
            2: [
                f"Disappointing {product_name}. Feels cheap and flimsy construction.",
                f"Poor quality {product_name}. Already showing wear after light use.",
                f"Not impressed with this {product_name}. Cheaply made and wobbly.",
                f"Mediocre {product_name}. Materials feel cheap for the price.",
            ],
            1: [
                f"Terrible {product_name}! Broke within days, awful quality.",
                f"Horrible {product_name}. Fell apart immediately, complete junk.",
                f"Worst {product_name} ever! Cheaply made and broke right away.",
                f"Awful {product_name}. Damaged on arrival and poor construction.",
            ]
        },
        'Books': {
            5: [
                f"Incredible book! {product_name} kept me hooked from start to finish.",
                f"Amazing read! {product_name} has great characters and engaging plot.",
                f"Loved {product_name}! Couldn't put it down, highly recommend.",
                f"Fantastic book! {product_name} is beautifully written and captivating.",
                f"Outstanding! {product_name} exceeded all my expectations.",
            ],
            4: [
                f"Really enjoyed {product_name}! Good story and well-written.",
                f"Great book! {product_name} was engaging and worth reading.",
                f"Good read! {product_name} has interesting characters and plot.",
                f"Enjoyed {product_name}. Well-written and entertaining.",
            ],
            3: [
                f"Okay book. {product_name} was fine but nothing extraordinary.",
                f"Average read. {product_name} was decent but predictable.",
                f"{product_name} was alright. Not bad but not amazing either.",
                f"Decent book. {product_name} was readable but forgettable.",
            ],
            2: [
                f"Disappointing. {product_name} was boring and hard to get through.",
                f"Not great. {product_name} had a slow plot and weak characters.",
                f"Poor book. {product_name} was confusing and poorly written.",
                f"Mediocre. {product_name} was dull and predictable.",
            ],
            1: [
                f"Terrible book! {product_name} was impossible to finish.",
                f"Awful! {product_name} was boring and poorly written.",
                f"Horrible! {product_name} made no sense and wasted my time.",
                f"Worst book ever! {product_name} was completely unreadable.",
            ]
        },
        'Groceries': {
            5: [
                f"Excellent {product_name}! Fresh, delicious, and great quality.",
                f"Perfect {product_name}! Tastes amazing and arrived fresh.",
                f"Love this {product_name}! Great flavor and excellent packaging.",
                f"Outstanding {product_name}! Fresh delivery and delicious taste.",
                f"Amazing {product_name}! Best quality I've found, will order again.",
            ],
            4: [
                f"Great {product_name}! Good quality and tastes fresh.",
                f"Really good {product_name}. Fresh and flavorful, satisfied.",
                f"Good {product_name}! Arrived fresh and tastes great.",
                f"Happy with this {product_name}. Good quality and fresh.",
            ],
            3: [
                f"Okay {product_name}. Fresh enough but nothing special.",
                f"Average {product_name}. Decent quality but could be fresher.",
                f"The {product_name} was fine. Not bad but not great either.",
                f"Decent {product_name}. Acceptable quality for the price.",
            ],
            2: [
                f"Disappointing {product_name}. Not very fresh and bland taste.",
                f"Poor {product_name}. Arrived stale and didn't taste good.",
                f"Not great {product_name}. Quality was poor and not fresh.",
                f"Mediocre {product_name}. Tasted off and poor packaging.",
            ],
            1: [
                f"Terrible {product_name}! Arrived spoiled and inedible.",
                f"Awful {product_name}! Moldy and disgusting, had to throw away.",
                f"Horrible {product_name}! Expired and tasted terrible.",
                f"Worst {product_name} ever! Completely spoiled on arrival.",
            ]
        },
        'Apparel': {
            5: [
                f"Love this {product_name}! Perfect fit, great material, and comfortable.",
                f"Amazing {product_name}! Excellent quality fabric and true to size.",
                f"Perfect {product_name}! Comfortable, stylish, and well-made.",
                f"Fantastic {product_name}! Great fit and high-quality material.",
                f"Outstanding {product_name}! Comfortable and looks exactly like pictures.",
            ],
            4: [
                f"Great {product_name}! Good fit and comfortable material.",
                f"Really nice {product_name}. Good quality and fits well.",
                f"Good {product_name}! Comfortable and decent quality fabric.",
                f"Happy with this {product_name}. Nice fit and good material.",
            ],
            3: [
                f"Okay {product_name}. Fits fine but material could be better.",
                f"Average {product_name}. Decent fit but nothing special.",
                f"The {product_name} is fine. Fits okay but cheap feeling material.",
                f"Decent {product_name}. Acceptable quality but thin fabric.",
            ],
            2: [
                f"Disappointing {product_name}. Poor fit and cheap material.",
                f"Not great {product_name}. Runs small and fabric feels cheap.",
                f"Poor {product_name}. Uncomfortable and poor quality material.",
                f"Mediocre {product_name}. Wrong size and fabric is scratchy.",
            ],
            1: [
                f"Terrible {product_name}! Fell apart after one wash, awful quality.",
                f"Horrible {product_name}! Completely wrong size and cheap fabric.",
                f"Awful {product_name}! Ripped immediately and terrible fit.",
                f"Worst {product_name} ever! Shrunk drastically and poor material.",
            ]
        }
    }
    
    # Get appropriate review template
    if category in review_templates and rating in review_templates[category]:
        return np.random.choice(review_templates[category][rating])
    else:
        # Fallback generic review
        if rating >= 4:
            return f"Great {product_name}! Really satisfied with this purchase."
        elif rating == 3:
            return f"The {product_name} is okay. Does what it should."
        else:
            return f"Not happy with this {product_name}. Poor quality."

for i in range(1, NUM_REVIEWS + 1):
    # Generate datetime and format to remove microseconds
    review_datetime = fake.date_time_between(start_date='-2y', end_date='now')
    review_formatted = review_datetime.strftime('%Y-%m-%d %H:%M:%S')
    
    # Get random product info for realistic review
    product_id = np.random.randint(1, NUM_PRODUCTS + 1)
    # Get product name and category from our products
    try:
        product_info = products_df[products_df['product_id'] == product_id].iloc[0]
        product_name = product_info['product_name']
        category = product_info['category']
    except:
        # Fallback if products_df not available
        product_name = "product"
        category = "General"
    
    rating = np.random.randint(1, 6)
    
    reviews_data.append({
        'review_id': fake.uuid4(),
        'product_id': product_id,
        'customer_id': np.random.randint(1, NUM_CUSTOMERS + 1),
        'rating': rating,
        'review_text': generate_realistic_review(rating, product_name, category),
        'review_date': review_formatted
    })

reviews_df = pd.DataFrame(reviews_data)

# Save reviews DataFrame as JSON file (with proper error handling)
import os

# Try multiple approaches to save the file
save_successful = False
file_path = None

# Method 1: Try creating a data directory (avoids read-only issues)
try:
    os.makedirs('data', exist_ok=True)
    file_path = 'data/product_reviews.json'
    reviews_df.to_json(file_path, orient='records', indent=2)
    save_successful = True
    print("✅ product_reviews.json saved successfully in 'data' folder!")
except Exception as e:
    print(f"❌ Method 1 failed: {e}")

# Method 2: Try saving to current directory
if not save_successful:
    try:
        file_path = 'product_reviews.json'
        reviews_df.to_json(file_path, orient='records', indent=2)
        save_successful = True
        print("✅ product_reviews.json saved successfully in current directory!")
    except Exception as e:
        print(f"❌ Method 2 failed: {e}")

# Method 3: Try saving to Downloads folder
if not save_successful:
    try:
        home_dir = os.path.expanduser("~")
        downloads_dir = os.path.join(home_dir, "Downloads")
        file_path = os.path.join(downloads_dir, "product_reviews.json")
        reviews_df.to_json(file_path, orient='records', indent=2)
        save_successful = True
        print(f"✅ product_reviews.json saved successfully to Downloads folder!")
    except Exception as e:
        print(f"❌ Method 3 failed: {e}")

# Results
if save_successful:
    print(f"📊 File contains {len(reviews_df)} reviews")
    print(f"📁 File saved as: {file_path}")
    print(f"📏 File size: {os.path.getsize(file_path)} bytes")
    
    print("\n🔽 TO DOWNLOAD:")
    if file_path.startswith('data/'):
        print("1. Look at the VS Code file explorer (left sidebar)")
        print("2. Expand the 'data' folder")
        print("3. Right-click on 'product_reviews.json'")
        print("4. Select 'Download' from the menu")
    elif 'Downloads' in file_path:
        print(f"File is already in your Downloads folder: {file_path}")
    else:
        print("1. Look at the VS Code file explorer (left sidebar)")
        print("2. Find 'product_reviews.json' in the file list")
        print("3. Right-click on 'product_reviews.json'")
        print("4. Select 'Download' from the menu")
    
    print("\n📋 Sample reviews by rating:")
    for rating in [5, 3, 1]:
        sample = reviews_df[reviews_df['rating'] == rating].head(1)
        if not sample.empty:
            print(f"\n⭐ {rating}-star review:")
            print(f"Product ID: {sample.iloc[0]['product_id']}")
            print(f"Review: {sample.iloc[0]['review_text']}")
else:
    print("\n❌ All save methods failed. Here's the review data:")
    print(reviews_df.head())

❌ Method 1 failed: [Errno 30] Read-only file system: 'data'
❌ Method 2 failed: [Errno 30] Read-only file system: 'product_reviews.json'
✅ product_reviews.json saved successfully to Downloads folder!
📊 File contains 2000 reviews
📁 File saved as: /Users/mehul/Downloads/product_reviews.json
📏 File size: 495027 bytes

🔽 TO DOWNLOAD:
File is already in your Downloads folder: /Users/mehul/Downloads/product_reviews.json

📋 Sample reviews by rating:

⭐ 5-star review:
Product ID: 20
Review: Love this Storage Basket! Perfect size and excellent quality. Looks amazing!

⭐ 3-star review:
Product ID: 24
Review: Decent book. Dune was readable but forgettable.

⭐ 1-star review:
Product ID: 18
Review: Horrible Wall Clock. Fell apart immediately, complete junk.
