In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Setting the random seed for reproducibility
np.random.seed(42)

# Generating 1000 random dates within a year
date_range = pd.date_range(start='2023-01-01', end='2023-12-31', periods=1000)

# Randomly choosing categories and stores
categories = ['Electronics', 'Furniture', 'Clothing', 'Toys', 'Groceries']
stores = ['Target', 'Walmart', 'Kroger']

category_choices = np.random.choice(categories, size=1000)
store_choices = np.random.choice(stores, size=1000)

# Generating varying distributions of sales values for each category
sales = np.zeros(1000)
for category in categories:
    indices = np.where(category_choices == category)[0]
    if category == 'Electronics':
        sales[indices] = np.random.normal(loc=800, scale=100, size=len(indices))
    elif category == 'Furniture':
        sales[indices] = np.random.normal(loc=500, scale=150, size=len(indices))
    elif category == 'Clothing':
        sales[indices] = np.random.normal(loc=200, scale=50, size=len(indices))
    elif category == 'Toys':
        sales[indices] = np.random.normal(loc=300, scale=80, size=len(indices))
    elif category == 'Groceries':
        sales[indices] = np.random.normal(loc=100, scale=30, size=len(indices))

# Clipping the sales to ensure they are all positive
sales = np.clip(sales, 10, 1000)

# Creating the DataFrame
data = {
    'Date': date_range,
    'Category': category_choices,
    'Store': store_choices,
    'Sales': sales
}

df = pd.DataFrame(data)

# Adjusting the Date column to show only the date part
df['Date'] = df['Date'].dt.date

# Rounding the Sales column to the nearest integer
df['Sales'] = df['Sales'].round().astype(int)