In [2]:
import random
import pandas as pd
from datetime import datetime, timedelta

# Category and Subcategories (your input)
categories = {
    "Development": [
        "Web Development", "Mobile Apps", "Programming Languages", "Game Development", 
        "Databases", "Software Testing", "Software Engineering", "Development Tools", "E‑Commerce"
    ],
    "IT & Software": [
        "IT Certification", "Network & Security", "Hardware", "Operating Systems", "Other IT & Software"
    ],
    "Office Productivity": [
        "Microsoft", "Apple", "Google", "SAP", "Intuit", "Salesforce", "Oracle", "Other Office Tools"
    ],
    "Personal Development": [
        "Personal Transformation", "Leadership", "Productivity", "Personal Finance", "Career Development",
        "Parenting & Relationships", "Self‑Esteem", "Stress Management", "Motivation", "Creativity",
        "Influence", "Memory & Study Skills", "Happiness", "Religion & Spirituality", "Personal Branding"
    ],
    "Design": [
        "Web Design", "Graphic Design", "Design Tools", "UX Design", "Game Design", "Design Thinking", 
        "3D & Animation", "Fashion", "Architectural Design", "Interior Design", "Other Design"
    ],
    "Marketing": [
        "Digital Marketing", "SEO", "Social Media Marketing", "Branding", "Marketing Fundamentals", 
        "Analytics & Automation", "Public Relations", "Advertising", "Video Marketing", 
        "Content Marketing", "Product Marketing", "Affiliate Marketing", "Growth Hacking", 
        "Non‑Digital Marketing", "Other Marketing"
    ],
    "Lifestyle": [
        "Arts & Crafts", "Food & Beverage", "Beauty & Makeup", "Travel", "Gaming", 
        "Home Improvement", "Pet Care & Training", "Other Lifestyle"
    ],
    "Photography": [
        "Digital Photography", "Photography Fundamentals", "Portraits", "Landscape", 
        "Black & White Photography", "Photography Tools", "Mobile Photography", 
        "Travel Photography", "Commercial Photography", "Wedding Photography", 
        "Wildlife Photography", "Video Design", "Other Photography"
    ],
    "Health & Fitness": [
        "Fitness", "General Health", "Sports", "Nutrition", "Yoga", "Mental Health", "Dieting",
        "Self Defense", "Safety & First Aid", "Dance", "Meditation", "Other Health"
    ],
    "Teaching & Academics": [
        "Instructional Design", "Educational Development", "Teaching Tools", 
        "Social Science", "Math & Science", "Humanities"
    ],
    "Music": [
        "Instruments", "Music Production", "Music Fundamentals", "Vocal Training", 
        "Music Techniques", "Music Software", "Other Music"
    ],
    "Language": [
        "English", "Spanish", "French", "German", "Japanese", "Portuguese", 
        "Chinese", "Russian", "Latin", "Arabic", "Hebrew", "Italian", "Other Languages"
    ],
    "Test Prep": [
        "Graduate Exam Prep", "International High School", "College Entrance Prep", 
        "Test‑Taking Skills", "Other Test Prep"
    ],
    "Business": [
        "Entrepreneurship", "Communication", "Management", "Sales", "Business Strategy", 
        "Operations", "Project Management", "Business Law", "Analytics & Intelligence"
    ]
}

# Setup
countries = ["USA", "India", "UK", "Canada", "Australia", "Germany", "Iceland", "France"]
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 12, 31)
date_range = pd.date_range(start=start_date, end=end_date)

# Generate Data
data = []

for category, subcats in categories.items():
    for subcat in subcats:
        avg_price = round(random.uniform(15, 25), 2)
        yearly_sales = random.randint(5700, 50300)
        selected_countries = random.sample(countries, k=2)  # 2 random countries per subcat

        for country in selected_countries:
            daily_sales_avg = yearly_sales // (2 * 365)  # split between 2 countries

            for date in date_range:
                daily_sales = max(0, int(random.gauss(daily_sales_avg, daily_sales_avg * 0.2)))
                data.append([
                    category, subcat, f"${avg_price}", yearly_sales,
                    country, date.date(), daily_sales
                ])

# Final DataFrame
df = pd.DataFrame(data, columns=[
    "Category", "Subcategory", "Avg. Paid Course Price (USD)", "Yearly Course Sales Quantity",
    "Country", "Date", "Daily Sales Quantity"
])

# Save to CSV
df.to_csv("udemy_full_sales_2024.csv", index=False)
