In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Set the random seed for reproducibility
np.random.seed(42)

# Define the parameters
num_records = 1000
start_date = '2023-01-01'
end_date = '2023-12-31'
categories = ['Electronics', 'Clothing', 'Groceries', 'Furniture']
product_names = {
    'Electronics': ['Laptop', 'Smartphone', 'Tablet', 'Headphones'],
    'Clothing': ['Jeans', 'T-shirt', 'Jacket', 'Shoes'],
    'Groceries': ['Milk', 'Bread', 'Eggs', 'Cheese'],
    'Furniture': ['Chair', 'Table', 'Sofa', 'Bed']
}
price_range = {
    'Electronics': (100, 1000),
    'Clothing': (10, 100),
    'Groceries': (1, 20),
    'Furniture': (50, 500)
}

# Generate random data
dates = pd.date_range(start_date, end_date).to_pydatetime().tolist()
data = []

for _ in range(num_records):
    date = np.random.choice(dates)
    category = np.random.choice(categories)
    product_name = np.random.choice(product_names[category])
    price = np.round(np.random.uniform(*price_range[category]), 2)
    quantity_sold = np.random.randint(1, 20)
    revenue = price * quantity_sold
    data.append([date, product_name, category, price, quantity_sold, revenue])

# Create DataFrame
columns = ['Date', 'Product Name', 'Category', 'Price', 'Quantity Sold', 'Revenue']
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv('/content/business_sales_data.csv', index=False)

# Display first few rows of the dataset
df.head()


Unnamed: 0,Date,Product Name,Category,Price,Quantity Sold,Revenue
0,2023-04-13,Chair,Furniture,132.55,8,1060.4
1,2023-07-08,Tablet,Electronics,501.25,11,5513.75
2,2023-03-29,Headphones,Electronics,228.58,3,685.74
3,2023-05-30,Smartphone,Electronics,749.8,6,4498.8
4,2023-07-11,Chair,Furniture,327.87,12,3934.44


In [5]:
# prompt: Using dataframe df:

df.groupby('Date').agg({'Revenue': 'sum'}).head()


Unnamed: 0_level_0,Revenue
Date,Unnamed: 1_level_1
2023-01-01,5955.44
2023-01-02,23923.06
2023-01-03,8016.16
2023-01-04,8808.42
2023-01-05,8114.13
