In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)  # for reproducibility

start_date = datetime(2023, 8, 18)
dates = [start_date + timedelta(days=x) for x in range(365)]

total_users = 5000000
new_users = 10000
dau_18_25 = 2000000
avg_trust_score = 6.5

data = []
for date in dates:
    total_users += new_users
    dau_18_25 += np.random.randint(3000, 7000)
    avg_trust_score = min(10, avg_trust_score + np.random.uniform(0.001, 0.005))
    new_users = max(5000, new_users + np.random.randint(-500, 1000))
    
    data.append([date, int(total_users), int(new_users), int(dau_18_25), round(avg_trust_score, 2)])

df = pd.DataFrame(data, columns=['Date', 'TotalUsers', 'NewUsers', 'DAU_18_25', 'AvgTrustScore'])

# Verification
assert df['TotalUsers'].is_monotonic_increasing, "Total Users should always increase"
assert (df['DAU_18_25'] <= df['TotalUsers']).all(), "DAU should not exceed Total Users"
assert (df['AvgTrustScore'] <= 10).all(), "Trust Score should not exceed 10"

df.to_csv('users_data.csv', index=False)
print("Users data generated and verified successfully.")

Users data generated and verified successfully.


In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)

start_date = datetime(2023, 8, 18)
dates = [start_date + timedelta(days=x) for x in range(365)]

total_interactions = 950000000
unique_users_engaged = 1900000

data = []
for date in dates:
    total_interactions += np.random.randint(3000000, 7000000)
    unique_users_engaged += np.random.randint(5000, 15000)
    avg_interactions = total_interactions / unique_users_engaged
    
    data.append([date, int(total_interactions), int(unique_users_engaged), round(avg_interactions, 2)])

df = pd.DataFrame(data, columns=['Date', 'TotalInteractions', 'UniqueUsersEngaged', 'AvgInteractionsPerUser'])

# Verification
assert df['TotalInteractions'].is_monotonic_increasing, "Total Interactions should always increase"
assert df['UniqueUsersEngaged'].is_monotonic_increasing, "Unique Users Engaged should always increase"
assert (df['AvgInteractionsPerUser'] > 0).all(), "Average Interactions should be positive"

df.to_csv('engagement_data.csv', index=False)
print("Engagement data generated and verified successfully.")

Engagement data generated and verified successfully.


In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)

start_date = datetime(2023, 8, 1)
dates = [start_date + timedelta(days=30*x) for x in range(13)]  # 13 months for year-over-year comparison

active_creators = 10000000
total_content = 500000000
new_creators = 100000

data = []
for date in dates:
    active_creators += new_creators
    total_content += np.random.randint(8000000, 12000000)
    new_creators = int(new_creators * (1 + np.random.uniform(0.03, 0.07)))
    
    data.append([date.strftime('%Y-%m'), int(active_creators), int(total_content), int(new_creators)])

df = pd.DataFrame(data, columns=['Month', 'ActiveCreators', 'TotalContent', 'NewCreators'])

# Verification
assert df['ActiveCreators'].is_monotonic_increasing, "Active Creators should always increase"
assert df['TotalContent'].is_monotonic_increasing, "Total Content should always increase"
assert (df['NewCreators'] > 0).all(), "New Creators should be positive"

df.to_csv('creators_data.csv', index=False)
print("Creators data generated and verified successfully.")

Creators data generated and verified successfully.


In [4]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)

start_date = datetime(2023, 8, 18)
dates = [start_date + timedelta(days=x) for x in range(365)]

ad_revenue = 311000000
other_revenue = 5000000

data = []
for date in dates:
    ad_revenue += np.random.randint(1000000, 2000000)
    other_revenue += np.random.randint(50000, 150000)
    total_revenue = ad_revenue + other_revenue
    
    data.append([date, int(ad_revenue), int(other_revenue), int(total_revenue)])

df = pd.DataFrame(data, columns=['Date', 'AdRevenue', 'OtherRevenue', 'TotalRevenue'])

# Verification
assert df['AdRevenue'].is_monotonic_increasing, "Ad Revenue should always increase"
assert df['OtherRevenue'].is_monotonic_increasing, "Other Revenue should always increase"
assert (df['TotalRevenue'] == df['AdRevenue'] + df['OtherRevenue']).all(), "Total Revenue should equal Ad Revenue plus Other Revenue"

df.to_csv('revenue_data.csv', index=False)
print("Revenue data generated and verified successfully.")

Revenue data generated and verified successfully.


In [5]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)

start_date = datetime(2023, 8, 1)
dates = [start_date + timedelta(days=30*x) for x in range(13)]  # 13 months for year-over-year comparison

age_18_20 = 1500000
age_21_23 = 1600000
age_24_25 = 900000
age_other = 1000000

data = []
for date in dates:
    age_18_20 += np.random.randint(15000, 25000)
    age_21_23 += np.random.randint(15000, 25000)
    age_24_25 += np.random.randint(8000, 12000)
    age_other += np.random.randint(8000, 12000)
    
    data.append([date.strftime('%Y-%m'), int(age_18_20), int(age_21_23), int(age_24_25), int(age_other)])

df = pd.DataFrame(data, columns=['Month', 'Age_18_20', 'Age_21_23', 'Age_24_25', 'Age_Other'])

# Verification
assert df['Age_18_20'].is_monotonic_increasing, "Age 18-20 group should always increase"
assert df['Age_21_23'].is_monotonic_increasing, "Age 21-23 group should always increase"
assert df['Age_24_25'].is_monotonic_increasing, "Age 24-25 group should always increase"
assert df['Age_Other'].is_monotonic_increasing, "Other age group should always increase"

df.to_csv('age_distribution_data.csv', index=False)
print("Age Distribution data generated and verified successfully.")

Age Distribution data generated and verified successfully.
