In [5]:
import pandas as pd
import numpy as np

# Generate 1000 random observations for x and y
np.random.seed(42)  # Set a seed for reproducibility
x = np.random.uniform(low=0, high=100, size=1000)  # Random values between 0 and 100
y = x * 0.5 + np.random.normal(loc=0, scale=5, size=1000)  # Linear relationship with noise

# Create a DataFrame
df = pd.DataFrame({'x': x, 'y': y})

# Save to CSV
csv_path = 'sample2.csv'
df.to_csv(csv_path, index=False)

csv_path


'sample2.csv'

In [6]:

def generate_multilevel_data(n_groups, n_obs_per_group, mu_alpha, sigma_alpha, mu_beta, sigma_beta, sigma_noise):
    """
    Generate multilevel data for Bayesian regression.
    
    Args:
        n_groups (int): Number of groups.
        n_obs_per_group (int): Number of observations per group.
        mu_alpha (float): Mean of group-level intercept (alpha) distribution.
        sigma_alpha (float): Standard deviation of group-level intercept (alpha) distribution.
        mu_beta (float): Mean of group-level slope (beta) distribution.
        sigma_beta (float): Standard deviation of group-level slope (beta) distribution.
        sigma_noise (float): Standard deviation of noise added to y.

    Returns:
        pd.DataFrame: A dataframe with columns 'x', 'y', and 'group'.
    """
    np.random.seed(42)  # For reproducibility

    # Initialize lists to store data
    x_data, y_data, group_data = [], [], []

    # Generate group-specific intercepts (alpha) and slopes (beta)
    alpha_group = np.random.normal(mu_alpha, sigma_alpha, n_groups) # random intercept
    beta_group = np.random.normal(mu_beta, sigma_beta, n_groups) # random slope
    # these are the random effects
    
    # Generate data for each group
    for group in range(n_groups):
        # Generate x values (predictor)
        x = np.random.uniform(0, 10, n_obs_per_group)  # Random values between 0 and 10 for x
        
        # Generate y values based on the group-specific intercept and slope, plus noise
        y = alpha_group[group] + beta_group[group] * x + np.random.normal(0, sigma_noise, n_obs_per_group)
        
        # Append generated data to lists
        x_data.extend(x)
        y_data.extend(y)
        group_data.extend([group] * n_obs_per_group)

    # Create a DataFrame
    data = pd.DataFrame({
        'x': x_data,
        'y': y_data,
        'group': group_data
    })
    
    return data

# Generate data with 5 groups, 50 observations per group
data = generate_multilevel_data(
    n_groups=5,
    n_obs_per_group=50,
    mu_alpha=2.0,     # Group-level mean intercept
    sigma_alpha=1.0,  # Group-level standard deviation for intercept
    mu_beta=0.5,      # Group-level mean slope
    sigma_beta=0.1,   # Group-level standard deviation for slope
    sigma_noise=1.0   # Noise level
)

# Save to CSV
csv_path = 'sample3.csv'
data.to_csv(csv_path, index=False)

csv_path

'sample3.csv'