In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import random

def generate_temporal_usage_patterns(days=30):
    """Generate temporal usage patterns"""
    data = []
    
    # User-specific patterns
    base_sessions_per_day = np.random.gamma(2, 3)  # 2-20 sessions/day
    weekend_multiplier = np.random.uniform(1.2, 2.0)  # More usage on weekends
    
    for day in range(days):
        is_weekend = day % 7 in [5, 6]
        sessions_today = int(base_sessions_per_day * (weekend_multiplier if is_weekend else 1))
        
        # Generate sessions for this day
        session_times = np.random.uniform(0, 24, sessions_today)
        
        for session_time in session_times:
            # Session duration (log-normal distribution, 1-60 minutes)
            duration = np.random.lognormal(2.5, 1.2)  
            duration = np.clip(duration, 1, 120)  # 1-120 minutes
            
            # Time between sessions
            time_since_last = np.random.exponential(2)  # Hours
            
            # Peak usage hours (evening bias)
            if 18 <= session_time <= 23:
                duration *= np.random.uniform(1.5, 2.5)
            
            data.append({
                'day': day,
                'session_start_hour': session_time,
                'session_duration_minutes': duration,
                'time_since_last_session_hours': time_since_last,
                'is_weekend': is_weekend
            })
    
    return pd.DataFrame(data)

def generate_engagement_depth_indicators(n_sessions=300):
    """Generate engagement depth indicators"""
    data = []
    
    for session_id in range(n_sessions):
        # User type influences behavior
        user_type = np.random.choice(['casual', 'moderate', 'heavy'], p=[0.4, 0.4, 0.2])
        
        # Scroll velocity (posts per minute)
        if user_type == 'casual':
            scroll_velocity = np.random.gamma(2, 2)  # Slower scrolling
        elif user_type == 'moderate':
            scroll_velocity = np.random.gamma(3, 3)
        else:  # heavy user
            scroll_velocity = np.random.gamma(5, 4)  # Faster scrolling
            
        # Scroll depth (relative to feed length)
        scroll_depth = np.random.beta(2, 3)  # Most users don't scroll to bottom
        
        # Content interaction rate (interactions per 100 posts viewed)
        base_interaction_rate = np.random.beta(2, 8) * 20  # 0-20%
        
        # Time spent per post (seconds)
        time_per_post = np.random.lognormal(1.5, 0.8)
        time_per_post = np.clip(time_per_post, 1, 60)
        
        # Return-to-feed frequency (times per session)
        return_frequency = np.random.poisson(3)
        
        data.append({
            'session_id': session_id,
            'user_type': user_type,
            'scroll_velocity_posts_per_min': scroll_velocity,
            'scroll_depth_percentage': scroll_depth * 100,
            'interaction_rate_percentage': base_interaction_rate,
            'time_per_post_seconds': time_per_post,
            'return_to_feed_frequency': return_frequency
        })
    
    return pd.DataFrame(data)

def generate_content_consumption_patterns(n_sessions=300):
    """Generate content consumption patterns"""
    data = []
    
    content_types = ['photo', 'video', 'story', 'reel', 'text']
    content_categories = ['lifestyle', 'news', 'entertainment', 'sports', 'politics', 'wellness']
    
    for session_id in range(n_sessions):
        # User preferences
        preferred_type = np.random.choice(content_types)
        preferred_categories = np.random.choice(content_categories, size=2, replace=False)
        
        # Content type distribution
        type_probs = [0.2] * len(content_types)
        type_probs[content_types.index(preferred_type)] = 0.4
        type_consumed = np.random.choice(content_types, p=np.array(type_probs)/sum(type_probs))
        
        # Time on negative/controversial content
        negative_content_time = np.random.exponential(5)  # Minutes
        
        # Recommendation algorithm engagement
        algo_clicks = np.random.poisson(8)
        
        # Search vs feed browsing ratio
        search_ratio = np.random.beta(1, 4)  # Most time in feed, some searching
        
        data.append({
            'session_id': session_id,
            'primary_content_type': type_consumed,
            'preferred_categories': ','.join(preferred_categories),
            'negative_content_minutes': negative_content_time,
            'algorithm_interactions': algo_clicks,
            'search_vs_feed_ratio': search_ratio
        })
    
    return pd.DataFrame(data)

def generate_behavioral_triggers(n_sessions=300):
    """Generate behavioral trigger data"""
    data = []
    
    trigger_types = ['notification', 'boredom', 'habit', 'social', 'news_check']
    
    for session_id in range(n_sessions):
        # Notification response pattern
        notification_delay = np.random.lognormal(1, 1.5)  # Minutes to respond
        notification_delay = np.clip(notification_delay, 0.1, 120)
        
        # Trigger for opening app
        opening_trigger = np.random.choice(trigger_types, 
                                         p=[0.3, 0.25, 0.2, 0.15, 0.1])
        
        # Background app usage (0-1 scale)
        background_usage = np.random.beta(2, 5)
        
        # How session ended
        exit_type = np.random.choice(['intentional', 'distracted', 'forced'], 
                                   p=[0.4, 0.4, 0.2])
        
        data.append({
            'session_id': session_id,
            'notification_response_minutes': notification_delay,
            'opening_trigger': opening_trigger,
            'background_usage_score': background_usage,
            'exit_behavior': exit_type
        })
    
    return pd.DataFrame(data)

def generate_wellbeing_indicators(days=30):
    """Generate wellbeing indicator data"""
    data = []
    
    # User baseline characteristics
    baseline_compulsiveness = np.random.beta(2, 5)  # Most users low, some high
    baseline_sleep_quality = np.random.normal(7, 1.5)  # Hours of sleep
    
    for day in range(days):
        # Usage exceeding planned time
        planned_time = np.random.uniform(30, 120)  # Minutes planned
        actual_time = planned_time * np.random.lognormal(0, 0.5)  # Usually more
        time_overage = max(0, actual_time - planned_time)
        
        # Late night usage
        late_night_minutes = np.random.exponential(15) if np.random.random() < 0.3 else 0
        
        # Compulsive checking
        daily_compulsive_checks = np.random.poisson(baseline_compulsiveness * 20)
        
        # Sleep impact
        sleep_hours = baseline_sleep_quality - (late_night_minutes / 60) * 0.5
        sleep_hours = np.clip(sleep_hours, 4, 10)
        
        data.append({
            'day': day,
            'planned_usage_minutes': planned_time,
            'actual_usage_minutes': actual_time,
            'usage_overage_minutes': time_overage,
            'late_night_usage_minutes': late_night_minutes,
            'compulsive_checks_count': daily_compulsive_checks,
            'sleep_hours': sleep_hours,
            'baseline_compulsiveness': baseline_compulsiveness
        })
    
    return pd.DataFrame(data)

def generate_single_user_social_media_dataset():
    """Generate a complete synthetic social media usage dataset for 1 user"""
    
    print("Generating temporal usage patterns for 1 user...")
    temporal_data = generate_temporal_usage_patterns()
    
    print("Generating engagement depth indicators for 1 user...")
    engagement_data = generate_engagement_depth_indicators()
    
    print("Generating content consumption patterns for 1 user...")
    content_data = generate_content_consumption_patterns()
    
    print("Generating behavioral triggers for 1 user...")
    trigger_data = generate_behavioral_triggers()
    
    print("Generating wellbeing indicators for 1 user...")
    wellbeing_data = generate_wellbeing_indicators()
    
    return {
        'temporal_patterns': temporal_data,
        'engagement_depth': engagement_data,
        'content_consumption': content_data,
        'behavioral_triggers': trigger_data,
        'wellbeing_indicators': wellbeing_data
    }

# Generate the datasets for single user
datasets = generate_single_user_social_media_dataset()

# Save to CSV files
for name, df in datasets.items():
    df.to_csv(f'synthetic_{name}_single_user.csv', index=False)
    print(f"Saved {name} with {len(df)} records for single user")

Generating temporal usage patterns for 1 user...
Generating engagement depth indicators for 1 user...
Generating content consumption patterns for 1 user...
Generating behavioral triggers for 1 user...
Generating wellbeing indicators for 1 user...
Saved temporal_patterns with 204 records for single user
Saved engagement_depth with 300 records for single user
Saved content_consumption with 300 records for single user
Saved behavioral_triggers with 300 records for single user
Saved wellbeing_indicators with 30 records for single user
