# Simulating real-time sales data and continuously append it to a CSV file.

## 1. Importing Required Libraries 

random: Used to generate random values (e.g., customer IDs, product names, etc.).

pandas: Used for data manipulation (though not directly used in this script, it’s often used for handling CSV files).

datetime: Used to generate timestamps for sales transactions.

time: Used to introduce delays between generating records (to simulate real-time data).

In [15]:
import random                                 
import pandas as pd
import datetime
import time

## 2. Defining Values for Fields

These lists define the possible values for various fields in the sales data:

products: Types of products sold.

categories: Categories of products.

brands: Brands of products.

regions: Regions where sales occur.

payment_methods: Payment methods used by customers.

genders: Genders of customers.

In [16]:
products = ['Smartphone', 'Laptop', 'Headphones', 'Smartwatch', 'TV', 'Refrigerator', 'Washing Machine']
categories = ['Electronics', 'Home Appliances', 'Wearables']
brands = ['Samsung', 'Apple', 'Xiaomi', 'LG', 'Sony', 'Boat', 'OnePlus']
regions = ['North India', 'South India', 'East India', 'West India']
payment_methods = ['UPI', 'Credit/Debit Card', 'Cash', 'Net Banking']
genders = ['Male', 'Female']

## 3. Defining Price Ranges for Products

In [17]:
product_prices = {
    'Smartphone': (8000, 150000),  # ₹8,000 to ₹1,50,000
    'Laptop': (25000, 200000),     # ₹25,000 to ₹2,00,000
    'Headphones': (500, 30000),    # ₹500 to ₹30,000
    'Smartwatch': (2000, 50000),   # ₹2,000 to ₹50,000
    'TV': (10000, 300000),         # ₹10,000 to ₹3,00,000
    'Refrigerator': (15000, 100000),  # ₹15,000 to ₹1,00,000
    'Washing Machine': (12000, 80000)  # ₹12,000 to ₹80,000
}

## 4. Function to Generate a Single Sales Record

This function generates a single sales record with the following fields:

customer_id: A random 4-digit number.

customer_name: A string like Customer_1234.

customer_age: A random age between 18 and 60.

customer_gender: Randomly selected from genders.

customer_location: Randomly selected from regions.

product_name: Randomly selected from products.

product_id: A random number between 1 and 100.

product_category: Randomly selected from categories.

product_brand: Randomly selected from brands.

product_rating: A random rating between 3.5 and 5.0.

price: Randomly generated within the product’s price range.

transaction_id: A random 6-digit number.

quantity: A random number between 1 and 5.

discount_applied: A random discount between 0% and 30%.

total_amount: Calculated as quantity * price * (1 - discount_applied).

payment_method: Randomly selected from payment_methods.

order_date: Current date in YYYY-MM-DD format.

order_time: Current time in HH:MM:SS format.

delivery_time: A random date and time between 1 and 7 days from now.



In [18]:
def generate_sales_record():
    customer_id = random.randint(1000, 9999)
    customer_name = f"Customer_{customer_id}"
    customer_age = random.randint(18, 60)
    customer_gender = random.choice(genders)
    customer_location = random.choice(regions)
    
    product_name = random.choice(products)
    product_id = random.randint(1, 100)
    product_category = random.choice(categories)
    product_brand = random.choice(brands)
    product_rating = round(random.uniform(3.5, 5.0), 1)
    
    # Get price range for the selected product
    min_price, max_price = product_prices[product_name]
    price = round(random.uniform(min_price, max_price), 2)
    
    transaction_id = random.randint(100000, 999999)
    quantity = random.randint(1, 5)
    discount_applied = round(random.uniform(0, 0.3), 2)  # 0% to 30% discount
    total_amount = round(quantity * price * (1 - discount_applied), 2)
    payment_method = random.choice(payment_methods)
    
    order_date = datetime.datetime.now().strftime('%Y-%m-%d')
    order_time = datetime.datetime.now().strftime('%H:%M:%S')
    delivery_time = (datetime.datetime.now() + datetime.timedelta(days=random.randint(1, 7))).strftime('%Y-%m-%d %H:%M:%S')
    
    return [
        customer_id, customer_name, customer_age, customer_gender, customer_location,
        product_id, product_name, product_category, product_brand, product_rating,
        transaction_id, quantity, price, discount_applied, total_amount, payment_method,
        order_date, order_time, delivery_time
    ]

## 5. Function to Continuously Generate and Append Data


This function continuously generates sales records and appends them to a CSV file.

Parameters:

file_path: Path to the CSV file where data will be saved.

interval: Time delay (in seconds) between generating records (default is 5 seconds).

Steps:

Define the column names for the CSV file.

Enter an infinite loop (while True).

Generate a new sales record using generate_sales_record().

Append the record to the CSV file in comma-separated format.

Print the new record to the console.

Wait for the specified interval before generating the next record.

In [19]:
def generate_real_time_data(file_path, interval=5):
    columns = [
        'customer_id', 'customer_name', 'customer_age', 'customer_gender', 'customer_location',
        'product_id', 'product_name', 'product_category', 'product_brand', 'product_rating',
        'transaction_id', 'quantity', 'price', 'discount_applied', 'total_amount', 'payment_method',
        'order_date', 'order_time', 'delivery_time'
    ]
    
    # Clear the file and write the column titles at the beginning of every run
    with open(file_path, 'w') as file:
        file.write(','.join(columns) + '\n')
    
    while True:
        # Generate a new sales record
        new_record = generate_sales_record()
        
        # Append the record to the CSV file
        with open(file_path, 'a') as file:
            file.write(','.join(map(str, new_record)) + '\n')
        
        print(f"New record added: {new_record}")
        
        # Wait for the specified interval before generating the next record
        time.sleep(interval)

## 6. Starting the Real-Time Data Generation

This line starts the real-time data generation process.

sales_data_india.csv: The CSV file where data will be saved.

interval=5: A new record will be added every 5 seconds.

In [None]:
generate_real_time_data('sales_data_india.csv', interval=5)  # Add a new record every 5 seconds

New record added: [8689, 'Customer_8689', 44, 'Male', 'East India', 71, 'Washing Machine', 'Electronics', 'Boat', 3.7, 127680, 3, 59414.47, 0.02, 174678.54, 'UPI', '2025-05-28', '11:59:46', '2025-06-04 11:59:46']
New record added: [5121, 'Customer_5121', 36, 'Female', 'West India', 90, 'Laptop', 'Electronics', 'Xiaomi', 4.4, 938359, 5, 181076.99, 0.08, 832954.15, 'UPI', '2025-05-28', '11:59:51', '2025-06-03 11:59:51']
New record added: [9584, 'Customer_9584', 60, 'Male', 'East India', 88, 'TV', 'Home Appliances', 'Sony', 5.0, 414082, 1, 216325.34, 0.14, 186039.79, 'Credit/Debit Card', '2025-05-28', '11:59:56', '2025-06-02 11:59:56']
New record added: [3200, 'Customer_3200', 37, 'Male', 'North India', 40, 'Laptop', 'Wearables', 'OnePlus', 4.6, 217261, 5, 69128.87, 0.29, 245407.49, 'UPI', '2025-05-28', '12:00:01', '2025-06-02 12:00:01']
New record added: [1240, 'Customer_1240', 21, 'Female', 'North India', 57, 'TV', 'Home Appliances', 'OnePlus', 4.8, 436950, 3, 52176.47, 0.15, 133050.0, 