In [None]:
import pyro
import pyro.distributions as dist
import torch
import torch.distributions.constraints as constraints
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
"""
To install pyro, run the following command:
pip install pyro-ppl
"""


def prepare_data_for_model(df):
    """
    Prepare data for Pyro hierarchical Bayesian model
    
    Input:
    - df: pandas DataFrame containing raw categorical data
    
    Output:
    - encoded_data: Dictionary of encoded numerical data
    - encoders: Dictionary of encoders for reverse transformation
    
    Function:
    - Encode string categorical variables to numerical values
    - Prepare appropriate data format for hierarchical model
    """
    encoders = {}
    encoded_data = {}
    
    # Encode categorical variables
    categorical_columns = ['user_segment', 'region', 'ad_campaign', 'ad_intensity', 'click_behavior']
    
    for col in categorical_columns:
        le = LabelEncoder()
        encoded_data[col] = torch.tensor(le.fit_transform(df[col]), dtype=torch.long)
        encoders[col] = le
    
    # Encode target variable (purchase decision)
    purchase_encoder = LabelEncoder()
    encoded_data['purchase_decision'] = torch.tensor(
        purchase_encoder.fit_transform(df['purchase_decision']), dtype=torch.long
    )
    encoders['purchase_decision'] = purchase_encoder
    
    # Add numerical features
    encoded_data['session_duration'] = torch.tensor(
        df['session_duration_sec'].values, dtype=torch.float32
    )
    
    return encoded_data, encoders

def hierarchical_bayesian_model(encoded_data):
    """
    Two-level Hierarchical Bayesian Model: User segment level and ad campaign level
    
    Input:
    - encoded_data: Dictionary of encoded data
    
    Output:
    - Model trace (via Pyro sampling)
    
    Function:
    - Build random effects at user segment level
    - Build random effects at ad campaign level
    - Model multi-level influencing factors of purchase decisions
    """
    n_obs = len(encoded_data['purchase_decision'])
    n_user_segments = len(torch.unique(encoded_data['user_segment']))
    n_ad_campaigns = len(torch.unique(encoded_data['ad_campaign']))
    
    with pyro.plate("data", n_obs):
        # Level 1: Random effects at user segment level
        with pyro.plate("user_segments", n_user_segments):
            user_intercept = pyro.sample(
                "user_intercept", 
                dist.Normal(0, 1)
            )
            user_effect = pyro.sample(
                "user_effect",
                dist.Normal(user_intercept, 0.5)
            )
        
        # Level 2: Random effects at ad campaign level
        with pyro.plate("ad_campaigns", n_ad_campaigns):
            ad_intercept = pyro.sample(
                "ad_intercept",
                dist.Normal(0, 1)
            )
            ad_effect = pyro.sample(
                "ad_effect", 
                dist.Normal(ad_intercept, 0.5)
            )
        
        # Influence of click behavior
        click_coef = pyro.sample("click_coef", dist.Normal(0, 1))
        
        # Influence of session duration
        session_coef = pyro.sample("session_coef", dist.Normal(0, 1))
        
        # Linear combination
        user_contrib = user_effect[encoded_data['user_segment']]
        ad_contrib = ad_effect[encoded_data['ad_campaign']]
        click_contrib = click_coef * encoded_data['click_behavior'].float()
        session_contrib = session_coef * encoded_data['session_duration'] / 100.0
        
        logits = user_contrib + ad_contrib + click_contrib + session_contrib
        
        # Observation model (purchase decision)
        pyro.sample(
            "obs", 
            dist.Categorical(logits=logits), 
            obs=encoded_data['purchase_decision']
        )

def run_bayesian_inference(df, num_samples=1000):
    """
    Run hierarchical Bayesian model inference and analyze results
    
    Input:
    - df: Raw data DataFrame
    - num_samples: Number of MCMC samples
    
    Output:
    - inference_results: Dictionary containing posterior distributions and model diagnostics
    
    Function:
    - Perform Bayesian inference using NUTS sampler
    - Analyze random effects at different levels
    - Compare uncertainty of model parameters
    """
    # Prepare data
    encoded_data, encoders = prepare_data_for_model(df)
    
    # Set Pyro random seed
    pyro.set_rng_seed(42)
    
    # Use NUTS sampler
    nuts_kernel = pyro.infer.NUTS(hierarchical_bayesian_model)
    mcmc = pyro.infer.MCMC(
        nuts_kernel,
        num_samples=num_samples,
        warmup_steps=200,
        num_chains=2
    )
    
    print("Starting hierarchical Bayesian model inference...")
    mcmc.run(encoded_data)
    
    print("\nInference completed! Model summary:")
    mcmc.summary()
    
    # Extract posterior samples
    posterior_samples = mcmc.get_samples()
    
    # Analyze results
    results = {
        'mcmc': mcmc,
        'posterior_samples': posterior_samples,
        'encoders': encoders,
        'user_effects': posterior_samples['user_effect'].mean(dim=0),
        'ad_effects': posterior_samples['ad_effect'].mean(dim=0)
    }
    
    print("\nUser Segment Level Effects:")
    for i, effect in enumerate(results['user_effects']):
        segment_name = encoders['user_segment'].inverse_transform([i])[0]
        print(f"  {segment_name}: {effect:.3f}")
    
    print("\nAd Campaign Level Effects:")
    for i, effect in enumerate(results['ad_effects']):
        campaign_name = encoders['ad_campaign'].inverse_transform([i])[0]
        print(f"  {campaign_name}: {effect:.3f}")
    
    return results

# Run hierarchical Bayesian analysis
if __name__ == "__main__":
    # Load data
    df = pd.read_csv('causal_advertising_data.csv')
    
    # Run Bayesian inference
    bayesian_results = run_bayesian_inference(df, num_samples=500)