In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm

np.random.seed(37)

df = pd.DataFrame([
    ['female', 'no', 'no', 0.90],
    ['female', 'no', 'yes', 0.10],
    ['female', 'yes', 'no', 0.27],
    ['female', 'yes', 'yes', 0.73],
    ['male', 'no', 'no', 0.99],
    ['male', 'no', 'yes', 0.01],
    ['male', 'yes', 'no', 0.07],
    ['male', 'yes', 'yes', 0.93]], columns=['gender', 'drug', 'recovery', '__p__'])
df

Unnamed: 0,gender,drug,recovery,__p__
0,female,no,no,0.9
1,female,no,yes,0.1
2,female,yes,no,0.27
3,female,yes,yes,0.73
4,male,no,no,0.99
5,male,no,yes,0.01
6,male,yes,no,0.07
7,male,yes,yes,0.93


In [2]:
def get_cpt(df, y):
    n = df[y].unique().shape[0]
    fields = df.columns.drop([y, '__p__']).tolist() + [y]
    df = df.sort_values(fields).reset_index(drop=True)
    
    cpt = np.array([df.iloc[i:i+2].__p__.values for i in range(0, df.shape[0], n)])
    return cpt

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()

def add_noise(p, u):
    b = np.log(p)
    n = u.rvs(size=p.shape)
    s = b + n
    return softmax(s)

def wiggle(p, u, max_samples=100):
    s = [add_noise(p, u) for _ in range(max_samples)]
    m = np.mean(s, axis=0)
    return m

def wiggle_cpt(cpt, u, max_samples=100):
    return np.ravel(np.array([wiggle(_cpt, u, max_samples) for _cpt in cpt]))

def create_potential(df, u_i, wpt):
    wf = df.assign(**{
        '__u__': f'u{u_i}',
        '__p__': wpt
    })

    fields = ['__u__'] + df.columns.tolist()
    return wf[fields]
    
N = 100
u = [norm(0.05, 2), norm(0.05, 2)]

cpt = get_cpt(df, 'recovery')
wpt = [wiggle_cpt(cpt, _u, N) for _u in u]
pd.concat([create_potential(df, _u, _wpt) for _u, _wpt in enumerate(wpt)]).reset_index(drop=True)

Unnamed: 0,__u__,gender,drug,recovery,__p__
0,u0,female,no,no,0.754121
1,u0,female,no,yes,0.245879
2,u0,female,yes,no,0.429747
3,u0,female,yes,yes,0.570253
4,u0,male,no,no,0.918125
5,u0,male,no,yes,0.081875
6,u0,male,yes,no,0.231932
7,u0,male,yes,yes,0.768068
8,u1,female,no,no,0.732675
9,u1,female,no,yes,0.267325
