In [1]:
pip install pandas names

Collecting names
  Downloading names-0.3.0.tar.gz (789 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m789.1/789.1 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: names
  Building wheel for names (setup.py) ... [?25l[?25hdone
  Created wheel for names: filename=names-0.3.0-py3-none-any.whl size=803681 sha256=d6093484e7524182bbee645d4d393b2487147e6c81ed0779ce745b0ee1610e29
  Stored in directory: /root/.cache/pip/wheels/8d/db/fc/50ec19a89a8dcbbd158a4aae44123cb525cda1f07dae287197
Successfully built names
Installing collected packages: names
Successfully installed names-0.3.0


In [2]:
!pip install --quiet google-cloud-storage

In [3]:
import pandas as pd
import random
import uuid
import time
import os
from datetime import datetime, timedelta
import names
from google.cloud import storage

# Configuration

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/atomcampxdatapilot-460118-921a89b28fa2.json"

LOG_FILE = "sales_log.txt"
SIMULATION_DURATION = 60 * 60 * 3  # 3 hours
SLEEP_BETWEEN_BATCHES = 60  # seconds
SALES_PER_BATCH = 30
GCS_BUCKET_NAME = "istore_salesdata_atomcamp"

# Logging Utility

def write_log(message: str):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    log_line = f"[{timestamp}] {message}"
    with open(LOG_FILE, "a") as f:
        f.write(log_line + "\n")
    print(log_line)

# GCS Upload Utility

def upload_to_gcs(file_path):
    try:
        client = storage.Client()
        bucket = client.bucket(GCS_BUCKET_NAME)
        blob = bucket.blob(file_path)
        blob.upload_from_filename(file_path)
        write_log(f"☁️ Uploaded {file_path} to GCS.")
    except Exception as e:
        write_log(f"❌ GCS Upload Failed for {file_path}: {e}")

# Data Generators

def generate_products():
    product_data = [
        ("iPhone 13", "128GB Blue", "iOS 15", 174, 230000, "2021-09-24"),
        ("iPhone 13 Pro", "256GB Silver", "iOS 15", 204, 290000, "2021-09-24"),
        ("iPhone 14", "128GB Black", "iOS 16", 172, 250000, "2022-09-16"),
        ("iPhone 14 Pro Max", "512GB Gold", "iOS 16", 240, 370000, "2022-09-16"),
        ("iPhone 15", "256GB Blue", "iOS 17", 171, 270000, "2023-09-22"),
        ("iMac M1", "8GB/256GB", "macOS Monterey", 4300, 320000, "2021-04-20"),
        ("iMac M3", "16GB/512GB", "macOS Sonoma", 4500, 450000, "2023-11-07"),
        ("MacBook Air M2", "8GB/256GB", "macOS Ventura", 1290, 280000, "2022-07-15"),
        ("MacBook Air M3", "16GB/512GB", "macOS Sonoma", 1350, 340000, "2024-03-08"),
        ("AirPods Pro 2", "MagSafe Case", "iOS Compatible", 50, 75000, "2022-09-23"),
        ("AirPods 3", "Lightning Case", "iOS Compatible", 46, 50000, "2021-10-26"),
    ]
    return pd.DataFrame([{
        "product_id": i + 1,
        "model_name": model,
        "variant": variant,
        "os_version": os_version,
        "weight_grams": weight,
        "base_price": price,
        "launch_date": pd.to_datetime(launch),
        "discontinued": False
    } for i, (model, variant, os_version, weight, price, launch) in enumerate(product_data)])

def generate_new_customer(customer_id):
    name = names.get_full_name()
    email = f"{name.replace(' ', '.').lower()}.{random.randint(1000, 9999)}@example.com"
    return {
        "customer_id": customer_id,
        "full_name": name,
        "age": random.randint(18, 60),
        "gender": random.choice(["Male", "Female"]),
        "email": email,
        "city": random.choice(["Karachi", "Lahore", "Quetta", "Islamabad", "Multan", "Rawalpindi", "Peshawar"]),
        "created_at": datetime.now(),
        "loyalty_tier": random.choice(["Bronze", "Silver", "Gold"]),
        "purchase_count": 0,
        "total_spent": 0.0,
        "order_goal": random.randint(1, 100)
    }

def generate_stores():
    return pd.DataFrame({
        "store_id": range(1, 5),
        "store_name": ["iStore Karachi", "iStore Lahore", "iStore Quetta", "iStore Islamabad"],
        "location": ["Karachi", "Lahore", "Quetta", "Islamabad"],
        "manager_name": [names.get_full_name() for _ in range(4)],
        "opened_on": [datetime(2020, 5, 1), datetime(2021, 6, 15), datetime(2022, 7, 10), datetime(2021, 8, 25)],
        "region": ["South", "Central", "West", "North"]
    })

def generate_inventory(stores_df, products_df):
    inventory = []
    for store in stores_df.itertuples(index=False):
        for product in products_df.itertuples(index=False):
            inventory.append({
                "inventory_id": len(inventory) + 1,
                "store_id": store.store_id,
                "product_id": product.product_id,
                "stock_level": 100,
                "restock_date": datetime.now() + timedelta(days=random.randint(10, 40)),
                "damaged_units": random.randint(0, 5)
            })
    return pd.DataFrame(inventory)

# Sale Processing

def create_sale(sale_id, customer, products_df, stores_df, inventory_df, customers_df):
    product = products_df.sample(1).iloc[0]
    store = stores_df.sample(1).iloc[0]
    quantity = random.randint(1, 3)

    inv_filter = (inventory_df["product_id"] == product["product_id"]) & \
                 (inventory_df["store_id"] == store["store_id"])

    if inventory_df[inv_filter].empty or inventory_df.loc[inv_filter, "stock_level"].values[0] < quantity:
        return None

    inventory_df.loc[inv_filter, "stock_level"] -= quantity
    now = datetime.now()
    unit_price = round(product["base_price"] * random.uniform(0.95, 1.05), 2)
    discount = random.choice([0, 500, 1000])
    total_price = round(unit_price * quantity - discount, 2)
    payment_method = random.choice(["JazzCash", "Bank Transfer", "Credit Card"])
    shipping_cost = random.choice([300, 500, 800])
    shipping_method = random.choice(["Standard", "Express"])

    customer_idx = customers_df[customers_df["customer_id"] == customer["customer_id"]].index[0]
    customers_df.at[customer_idx, "purchase_count"] += 1
    customers_df.at[customer_idx, "total_spent"] += total_price

    return {
        "sale": {
            "sale_id": sale_id,
            "product_id": product["product_id"],
            "customer_id": customer["customer_id"],
            "store_id": store["store_id"],
            "quantity": quantity,
            "unit_price": unit_price,
            "discount_applied": discount,
            "total_price": total_price,
            "sale_date": now,
            "salesperson": names.get_full_name(),
            "payment_method": payment_method
        },
        "payment": {
            "payment_id": sale_id,
            "sale_id": sale_id,
            "payment_method": payment_method,
            "amount_paid": total_price,
            "payment_status": "Completed",
            "payment_time": now,
            "transaction_id": str(uuid.uuid4())[:8]
        },
        "shipping": {
            "shipping_id": sale_id,
            "sale_id": sale_id,
            "shipping_method": shipping_method,
            "carrier_name": random.choice(["TCS", "BlueEx", "Pakistan Post"]),
            "shipping_cost": shipping_cost,
            "delivery_status": "Delivered",
            "estimated_delivery": now + timedelta(days=3),
            "actual_delivery": now + timedelta(days=3)
        }
    }

# CSV Writing Utility


def save_row_to_csv(row, file_path):
    df = pd.DataFrame([row])
    df.to_csv(file_path, mode='a', header=not os.path.exists(file_path), index=False)
    write_log(f"💾 Saved row to {file_path}")

# Main Simulation Loop

def main():
    write_log("🚀 Simulation Started")

    products_df = generate_products()
    stores_df = generate_stores()
    inventory_df = generate_inventory(stores_df, products_df)
    customers_df = pd.DataFrame()

    sale_id = 1
    customer_id = 1
    batch = 1
    start_time = time.time()

    for name, df in {
        "products.csv": products_df,
        "stores.csv": stores_df,
        "inventory.csv": inventory_df
    }.items():
        df.to_csv(name, index=False)
        upload_to_gcs(name)

    try:
        while time.time() - start_time < SIMULATION_DURATION:
            write_log(f"📦 Starting batch #{batch}")
            for _ in range(SALES_PER_BATCH):
                new_customer = generate_new_customer(customer_id)
                customers_df = pd.concat([customers_df, pd.DataFrame([new_customer])], ignore_index=True)

                result = create_sale(sale_id, new_customer, products_df, stores_df, inventory_df, customers_df)

                customers_df.to_csv("customers.csv", index=False)

                if result:
                    save_row_to_csv(result["sale"], "sales.csv")
                    save_row_to_csv(result["payment"], "payments.csv")
                    save_row_to_csv(result["shipping"], "shipping.csv")
                    inventory_df.to_csv("inventory.csv", index=False)

                    write_log(f" Sale {sale_id} | Customer {new_customer['full_name']}")

                    if sale_id % 100 == 0:
                        write_log(" Uploading CSVs to GCS (after 100 sales)...")
                        for file in ["sales.csv", "payments.csv", "shipping.csv", "inventory.csv", "customers.csv"]:
                            if os.path.exists(file):
                                upload_to_gcs(file)

                    sale_id += 1
                customer_id += 1

            write_log(f"✅ Batch #{batch} complete.")
            batch += 1
            time.sleep(SLEEP_BETWEEN_BATCHES)

    except KeyboardInterrupt:
        write_log("🛑 Simulation interrupted manually.")
    except Exception as e:
        write_log(f"❌ Unexpected error: {e}")
    finally:
        write_log("📤 Final upload of all CSVs to GCS...")
        for file in ["sales.csv", "payments.csv", "shipping.csv", "inventory.csv", "customers.csv"]:
            if os.path.exists(file):
                upload_to_gcs(file)

        write_log("🏁 Simulation complete.")

if __name__ == "__main__":
    main()


[2025-05-24 05:23:37] 🚀 Simulation Started
[2025-05-24 05:23:39] ☁️ Uploaded products.csv to GCS.
[2025-05-24 05:23:41] ☁️ Uploaded stores.csv to GCS.
[2025-05-24 05:23:43] ☁️ Uploaded inventory.csv to GCS.
[2025-05-24 05:23:43] 📦 Starting batch #1
[2025-05-24 05:23:43] 💾 Saved row to sales.csv
[2025-05-24 05:23:43] 💾 Saved row to payments.csv
[2025-05-24 05:23:43] 💾 Saved row to shipping.csv
[2025-05-24 05:23:43]  Sale 1 | Customer Mindy Wright
[2025-05-24 05:23:43] 💾 Saved row to sales.csv
[2025-05-24 05:23:43] 💾 Saved row to payments.csv
[2025-05-24 05:23:43] 💾 Saved row to shipping.csv
[2025-05-24 05:23:43]  Sale 2 | Customer Heather Crandall
[2025-05-24 05:23:43] 💾 Saved row to sales.csv
[2025-05-24 05:23:43] 💾 Saved row to payments.csv
[2025-05-24 05:23:43] 💾 Saved row to shipping.csv
[2025-05-24 05:23:43]  Sale 3 | Customer Ann Saad
[2025-05-24 05:23:43] 💾 Saved row to sales.csv
[2025-05-24 05:23:43] 💾 Saved row to payments.csv
[2025-05-24 05:23:43] 💾 Saved row to shipping.csv
[