In [1]:
## Importing necessary libraries
import random
from faker import Faker
import pandas as pd
from datetime import datetime, timedelta

In [2]:

# Initialize Faker
fake = Faker()

# Create an empty list to store the data
data = []

# Define categories and competitors
categories = ['Clothing', 'Electronics', 'Home Goods', 'Beauty', 'Sports & Outdoors']
competitors = ['Competitor A', 'Competitor B', 'Competitor C']

# Define locations and customer demographics
locations = ['North America', 'Europe', 'Asia', 'Africa', 'South America']
age_groups = ['18-25', '26-35', '36-50', '51-65', '65+']
seasonality = ['Seasonal', 'Year-Round']
sales_channels = ['Online', 'In-Store', 'Both']
discounts = ['Yes', 'No']

# Define the start and end dates for the 7-year range
start_date = datetime(2017, 1, 1)
end_date = datetime(2024, 12, 31)

# Generate fake data for each date
for _ in range(1000):  # Generate 1000 data points for the sake of the example
    # Randomly select a date within the 7-year period
    random_date = start_date + timedelta(days=random.randint(0, (end_date - start_date).days))
    
    # Generate random values for each attribute
    category = random.choice(categories)
    sales_revenue = round(random.uniform(100000, 1000000), 2)
    
    # Calculate COGS as a percentage of sales revenue (e.g., between 40% and 60%)
    cogs_percentage = random.uniform(0.4, 0.6)
    cogs = round(sales_revenue * cogs_percentage, 2)
    
    # Quantity Sold (random between 500 and 5000 units sold)
    quantity_sold = random.randint(500, 5000)
    
    # Calculate profit as Sales Revenue - COGS
    profit = round(sales_revenue - cogs, 2)
    
    # Marketing spend and other values remain the same
    market_share = round(random.uniform(10, 50), 2)
    consumer_preference = random.randint(60, 95)
    growth_rate = round(random.uniform(5, 20), 2)
    marketing_spend = round(random.uniform(10000, 100000), 2)
    customer_satisfaction = round(random.uniform(3.5, 5), 1)
    
    # Generate fake competitor data for the same category and date
    competitor = random.choice(competitors)
    competitor_sales = round(random.uniform(500000, 800000), 2)
    competitor_market_share = round(random.uniform(5, 30), 2)
    
    # Additional Attributes
    location = random.choice(locations)
    age_group = random.choice(age_groups)
    seasonal = random.choice(seasonality)
    sales_channel = random.choice(sales_channels)
    discount = random.choice(discounts)
    return_rate = round(random.uniform(1, 10), 2)  # Percentage of returns

    # Append the data for each entry
    data.append({
        'Date': random_date.strftime('%Y-%m-%d'),
        'Category': category,
        'Sales Revenue': sales_revenue,
        'Cost of Goods Sold (COGS)': cogs,
        'Quantity Sold': quantity_sold,
        'Profit': profit,
        'Market Share': market_share,
        'Consumer Preference Index': consumer_preference,
        'Growth Rate (%)': growth_rate,
        'Marketing Spend': marketing_spend,
        'Customer Satisfaction': customer_satisfaction,
        'Competitor': competitor,
        'Competitor Sales Revenue': competitor_sales,
        'Competitor Market Share': competitor_market_share,
        'Location': location,
        'Age Group': age_group,
        'Seasonality': seasonal,
        'Sales Channel': sales_channel,
        'Discount/Promotion': discount,
        'Return Rate (%)': return_rate
    })

# Create a DataFrame from the generated data
df = pd.DataFrame(data)

# Display the first few rows of the generated dataset
print(df.head())

# Optionally, save the fake data to a CSV file for further analysis
df.to_csv('fake_market_research_with_all_attributes.csv', index=False)

print("Fake market research data with all attributes generated and saved!")


         Date     Category  Sales Revenue  Cost of Goods Sold (COGS)  \
0  2019-08-12   Home Goods      859827.76                  396176.55   
1  2017-05-09  Electronics      288677.67                  122067.20   
2  2022-06-14  Electronics      220300.16                  118282.00   
3  2017-02-04     Clothing      555889.94                  301948.21   
4  2017-01-20  Electronics      746241.23                  360765.10   

   Quantity Sold     Profit  Market Share  Consumer Preference Index  \
0            835  463651.21         19.96                         80   
1            900  166610.47         32.12                         75   
2           3788  102018.16         15.43                         66   
3           2530  253941.73         36.09                         91   
4           3412  385476.13         21.39                         66   

   Growth Rate (%)  Marketing Spend  Customer Satisfaction    Competitor  \
0             7.36         31466.90                    4.5