In [1]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Generate date range from Jan 1, 2024 to current date
start_date = datetime(2023, 1, 1)
end_date = datetime.now()
date_range = pd.date_range(start=start_date, end=end_date)

# Possible values
store_ids = ['ST001', 'ST002']
product_ids = [f'P{str(i).zfill(3)}' for i in range(1, 51)]
user_ids = [f'U{str(i).zfill(3)}' for i in range(1, 101)] + ['']

# Generate synthetic data
records = []
for date in date_range:
    for _ in range(random.randint(1, 20)):  # Random number of records per day
        record = {
            'Date': date.strftime('%Y-%m-%d'),
            'ProductID': random.choice(product_ids),
            'StoreID': random.choice(store_ids),
            'UnitsSold': random.randint(1, 10),
            'UserID': random.choice(user_ids)
        }
        records.append(record)

# Create DataFrame
df = pd.DataFrame(records)

# Save DataFrame to CSV
df.to_csv("sales_data_2023_to_present.csv", index=False)


In [3]:
import csv
import json
from datetime import datetime

sales_json = []

with open('sales_data_new.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        # Format date to YYYY-MM-DD
        date_obj = datetime.strptime(row['Date'], "%m/%d/%Y")
        formatted_date = date_obj.strftime("%Y-%m-%d")

        sales_json.append({
            "Date": formatted_date,
            "ProductID": row["ProductID"],
            "StoreID": row["StoreID"],
            "UserID": row["UserID"],
            "UnitsSold": int(row["UnitsSold"]),
            "Price(OfProduct)": float(row["Price"]),
            "TotalEarn": float(row["TotalEarn"])
        })

# Save to JSON
with open('sales_output_new.json', 'w') as jsonfile:
    json.dump(sales_json, jsonfile, indent=2)

print("✅ sales_output.json has been created.")


✅ sales_output.json has been created.


In [3]:
import random
import csv
from datetime import datetime, timedelta
from collections import defaultdict

# Configuration
start_date = datetime(2024, 1, 1)
end_date = datetime.today()

product_ids = [f"P{str(i).zfill(3)}" for i in range(1, 51)]  # P001 to P050
store_ids = ["ST001", "ST002"]

# Track balance per product per store
stock_balance = defaultdict(lambda: 0)

rows = []

current_date = start_date
while current_date <= end_date:
    for _ in range(random.randint(5, 15)):  # Simulate 5-15 transactions per day
        product = random.choice(product_ids)
        store = random.choice(store_ids)
        key = (product, store)

        # Restock occasionally (e.g., 10% chance)
        qty_in = random.randint(10, 100) if random.random() < 0.1 else 0

        # Sales: only sell if stock available
        max_qty_out = stock_balance[key]
        qty_out = random.randint(1, min(10, max_qty_out)) if max_qty_out > 0 else 0

        # Update balance
        stock_balance[key] += qty_in
        stock_balance[key] -= qty_out
        balance = stock_balance[key]

        rows.append([
            current_date.strftime("%Y-%m-%d"),
            product,
            store,
            qty_in,
            qty_out,
            balance
        ])

    current_date += timedelta(days=1)

# Write to CSV
with open("inventory_data.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Date", "ProductID", "StoreID", "QtyIn", "QtyOut", "Balance"])
    writer.writerows(rows)

print("✅ inventory_data.csv generated.")


✅ inventory_data.csv generated.


In [4]:
import csv
import json

# Read CSV and convert to list of dicts
csv_file = "inventory_data.csv"
json_file = "inventory_data.json"

with open(csv_file, mode='r') as f:
    reader = csv.DictReader(f)
    data = []

    for row in reader:
        # Convert numeric fields to appropriate types
        row["QtyIn"] = int(row["QtyIn"])
        row["QtyOut"] = int(row["QtyOut"])
        row["Balance"] = int(row["Balance"])
        data.append(row)

# Write to JSON
with open(json_file, mode='w') as f:
    json.dump(data, f, indent=2)

print("✅ inventory_data.json generated.")


✅ inventory_data.json generated.


In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime

# Configurable: adjust as needed
INVENTORY_CSV = "inventory_data.csv"
SALES_CSV = "sales_data_new.csv"
USER_IDS = [f"U{str(i).zfill(3)}" for i in range(1, 11)]  # U001 - U010
PRICE_RANGE = (3.00, 10.00)  # RM3.00 - RM10.00

# Read inventory data
df_inventory = pd.read_csv(INVENTORY_CSV, parse_dates=["Date"])

# Filter dates from Jan 2024 until today
start_date = pd.Timestamp("2024-01-01")
today = pd.Timestamp(datetime.today().date())
df_inventory = df_inventory[(df_inventory["Date"] >= start_date) & (df_inventory["Date"] <= today)]

# Initialize list to hold sales records
sales_records = []

for _, row in df_inventory.iterrows():
    qty_out = int(row["QtyOut"])
    if qty_out > 0:
        price = round(random.uniform(*PRICE_RANGE), 2)
        total_earn = round(qty_out * price, 2)
        sales_records.append({
            "Date": row["Date"].strftime("%Y-%m-%d"),
            "ProductID": row["ProductID"],
            "StoreID": row["StoreID"],
            "UserID": random.choice(USER_IDS),
            "UnitsSold": qty_out,
            "Price": price,
            "TotalEarn": total_earn
        })

# Create DataFrame and save to CSV
df_sales = pd.DataFrame(sales_records)
df_sales.to_csv(SALES_CSV, index=False)

print(f"[✓] Sales data generated and saved to '{SALES_CSV}'")


[✓] Sales data generated and saved to 'sales_data_new.csv'
