# Ejemplo logística

In [3]:
import numpy as np
import pandas as pd

# Set the random seed for reproducibility
np.random.seed(42)

# Number of observations
n = 1000

# Generate 5 explanatory variables from a normal distribution
X1 = np.random.normal(size=n)
X2 = np.random.normal(size=n)
X3 = np.random.normal(size=n)
X4 = np.random.normal(size=n)
X5 = np.random.normal(size=n)

# Combine into a single array
X = np.column_stack([X1, X2, X3, X4, X5])

# Coefficients for the logistic regression
beta = np.array([4, -3, 1, -2, 0.5])

# Linear predictor (logit function)
eta = X.dot(beta)

# Simulate the binary response variable
p = 1 / (1 + np.exp(-eta))  # logistic function
y = np.random.binomial(1, p, size=n)

# Combine into a DataFrame
data = pd.DataFrame(X, columns=['X1', 'X2', 'X3', 'X4', 'X5'])
data['y'] = y

# Display the first few rows of the DataFrame
print(data.head())


         X1        X2        X3        X4        X5  y
0  0.496714  1.399355 -0.675178 -1.907808 -0.863494  1
1 -0.138264  0.924634 -0.144519 -0.860385 -0.031203  0
2  0.647689  0.059630 -0.792420 -0.413606  0.018017  1
3  1.523030 -0.646937 -0.307962  1.887688  0.472630  1
4 -0.234153  0.698223 -1.893615  0.556553 -1.366858  0


In [5]:
import pymc as pm
import arviz as az

# Define the model
with pm.Model() as logistic_model:
    # Priors for the regression coefficients
    beta = pm.Normal('beta', mu=0, sigma=5, shape=5)
    
    # Prior for the intercept
    intercept = pm.Normal('intercept', mu=0, sigma=5)
    
    # Linear predictor
    eta = intercept + pm.math.dot(data[['X1', 'X2', 'X3', 'X4', 'X5']], beta)
    
    # Likelihood (Bernoulli distribution)
    p = pm.Deterministic('p', pm.math.sigmoid(eta))
    y_obs = pm.Bernoulli('y_obs', p=p, observed=data['y'])
    
    # Inference
    trace = pm.sample(4000, return_inferencedata=True)

# Print the summary of the posterior distributions
print(az.summary(trace))


Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [beta, intercept]


Output()

Sampling 4 chains for 1_000 tune and 4_000 draw iterations (4_000 + 16_000 draws total) took 17 seconds.


          mean     sd  hdi_3%  hdi_97%  mcse_mean  mcse_sd  ess_bulk  \
beta[0]  4.194  0.314   3.618    4.792      0.003    0.002    9728.0   
beta[1] -3.347  0.263  -3.837   -2.854      0.003    0.002   10081.0   
beta[2]  0.777  0.143   0.505    1.046      0.001    0.001   12828.0   
beta[3] -2.031  0.185  -2.372   -1.679      0.002    0.001   10481.0   
beta[4]  0.785  0.142   0.516    1.050      0.001    0.001   13992.0   
...        ...    ...     ...      ...        ...      ...       ...   
p[995]   0.010  0.004   0.004    0.017      0.000    0.000   10226.0   
p[996]   1.000  0.000   1.000    1.000      0.000    0.000    8928.0   
p[997]   1.000  0.000   0.999    1.000      0.000    0.000   10811.0   
p[998]   0.017  0.007   0.006    0.029      0.000    0.000   11685.0   
p[999]   0.970  0.009   0.953    0.986      0.000    0.000   11974.0   

         ess_tail  r_hat  
beta[0]    9543.0    1.0  
beta[1]   10622.0    1.0  
beta[2]   10218.0    1.0  
beta[3]   10773.0    1.0  
