In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Set seed for reproducibility
random.seed(42)
np.random.seed(42)

# Sample data for fields
platforms = ['Instagram', 'YouTube', 'Twitter']
categories = ['Fitness', 'Wellness', 'Nutrition', 'Lifestyle']
genders = ['Male', 'Female', 'Other']
products = ['MuscleBlaze Whey', 'HKVitals Omega-3', 'Gritzo SuperMilk', 'MB Fat Burner']
campaigns = ['MB_Shred_June', 'HK_HeartHealth_July', 'Gritzo_Growth_May']

# Generate Influencers
influencers = pd.DataFrame({
    'influencer_id': range(1, 1001),
    'name': [f'Influencer_{i}' for i in range(1, 1001)],
    'category': np.random.choice(categories, 1000),
    'gender': np.random.choice(genders, 1000),
    'follower_count': np.random.randint(5000, 500000, 1000),
    'platform': np.random.choice(platforms, 1000)
})

# Generate Posts
post_dates = [datetime(2025, 5, 1) + timedelta(days=int(x)) for x in np.random.randint(0, 60, 1000)]
posts = pd.DataFrame({
    'post_id': range(1, 1001),
    'influencer_id': np.random.choice(influencers['influencer_id'], 1000),
    'platform': np.random.choice(platforms, 1000),
    'date': post_dates,
    'url': [f'https://socialmedia.com/post/{i}' for i in range(1, 1001)],
    'caption': [f'Great product! #{random.choice(products).split()[0]}' for _ in range(1000)],
    'reach': np.random.randint(1000, 100000, 1000),
    'likes': np.random.randint(100, 5000, 1000),
    'comments': np.random.randint(10, 1000, 1000)
})

# Generate Tracking Data
tracking_data = pd.DataFrame({
    'source': np.random.choice(platforms, 1000),
    'campaign': np.random.choice(campaigns, 1000),
    'influencer_id': np.random.choice(influencers['influencer_id'], 1000),
    'user_id': [f'U{1000 + i}' for i in range(1000)],
    'product': np.random.choice(products, 1000),
    'date': [datetime(2025, 5, 1) + timedelta(days=int(x)) for x in np.random.randint(0, 60, 1000)],
    'orders': np.random.randint(1, 5, 1000),
    'revenue': np.random.randint(500, 5000, 1000)
})

# Generate Payouts
payouts = pd.DataFrame({
    'influencer_id': np.random.choice(influencers['influencer_id'], 1000),
    'basis': np.random.choice(['post', 'order'], 1000),
    'rate': np.random.randint(100, 1000, 1000),
    'orders': np.random.randint(1, 20, 1000)
})
payouts['total_payout'] = payouts['rate'] * payouts['orders']

# Export to CSV
influencers.to_csv("influencers.csv", index=False)
posts.to_csv("posts.csv", index=False)
tracking_data.to_csv("tracking_data.csv", index=False)
payouts.to_csv("payouts.csv", index=False)
