In [1]:
import numpy as np
import gymnasium as gym
env = gym.make('FrozenLake-v1', is_slippery = False)
stateDim = 3#env.observation_space.n #size of state space, 16
actionDim = 2#env.action_space.n #size of action space, 4

## Notes

- Step 3.4 Fusion 
    - Need to choose appropriate fusion operator (BCF used as a placeholder for now)
    - Working on fusionOperators notebook as a reference to choose fusion operator moving forward (next main step for implementation)
- Step 3.5 Transformation of opinions to probabilities 
    - May want to normalize the values $p_i$ within each row to ensure that $0 \le p \le 1$ and $\sum p_i = 1$

# 3.1 Gathering human opinion

## 3.1.1 Initializing $\kappa$

In [2]:
def initializeKappa(stateDim, actionDim):
    kappa = np.zeros((stateDim, actionDim), dtype = "f, f, f, f")
    for state in range(stateDim):
        for action in range(actionDim):
            kappa[state, action] = (0, 0, 1, 1/actionDim)
    return kappa

In [7]:
kappa = initializeKappa(stateDim, actionDim)
print(f"Initialized Kappa Matrix of Opinions is \n{kappa}")

Initialized Kappa Matrix of Opinions is 
[[(0., 0., 1., 0.5) (0., 0., 1., 0.5)]
 [(0., 0., 1., 0.5) (0., 0., 1., 0.5)]
 [(0., 0., 1., 0.5) (0., 0., 1., 0.5)]]


## 3.1.2 Inserting an opinion into $\kappa$

In [4]:
def insertOpinion(kappa, opinion):
    kappa[opinion[0], opinion[1]] = opinion[2]
    return kappa

In [8]:
kappa = insertOpinion(kappa, [1, 1, (0.8, 0.0, 0.2, 1/actionDim)])
print(f"Kappa Matrix of with Opinions is \n{kappa}")

Kappa Matrix of with Opinions is 
[[(0. , 0., 1. , 0.5) (0. , 0., 1. , 0.5)]
 [(0. , 0., 1. , 0.5) (0.8, 0., 0.2, 0.5)]
 [(0. , 0., 1. , 0.5) (0. , 0., 1. , 0.5)]]


# 3.2 Transformation of numerical preferences to probabilities 

In [10]:
#NOTE for this implementation we set theta as a matrix of zeros, but when merged with RL algorithm, we will pull theta from the agent
theta = np.zeros((stateDim, actionDim))
print(f"Theta Matrix of Numerical Preferences is \n{theta}")

Theta Matrix of Numerical Preferences is 
[[0. 0.]
 [0. 0.]
 [0. 0.]]


## 3.2.1 Converting from numerical preferences to probabilities

In [11]:
#https://pytorch.org/docs/stable/generated/torch.nn.Softmax.html
def pi(state, actionDim, theta):
    logits = np.zeros(actionDim)
    for action in range(actionDim):
        logits[action] = np.exp(theta[state, action])
    return logits / np.sum(logits)

In [12]:
def numToProb(stateDim, actionDim, theta):
    """ Converts a matrix of numerical preferences to a matrix of probabilities"""
    thetaProb = np.zeros((stateDim, actionDim))
    for state in range(stateDim):
        thetaProb[state] = pi(state, actionDim, theta)
    return thetaProb

In [13]:
thetaProb = numToProb(stateDim, actionDim, theta)
print(f"Pi Theta Matrix of Probabilities is \n{thetaProb}")

Pi Theta Matrix of Probabilities is 
[[0.5 0.5]
 [0.5 0.5]
 [0.5 0.5]]


# 3.3 Transformation of probabilities to opinions

## 3.3.1 Converting probabilities to opinions

In [14]:
def probToOpinion(stateDim, actionDim, thetaProb):
    """Converts a matrix of probabilities to a matrix of tuples of opinions (S Booleans)"""
    thetaOpinion = np.zeros((stateDim, actionDim), dtype = "f, f, f, f")
    for state in range(stateDim):
        for action in range(actionDim):
            p = thetaProb[state, action]
            thetaOpinion[state, action] = (p, 1-p, 0, p)
    return(thetaOpinion)


In [15]:
thetaOpinion = probToOpinion(stateDim, actionDim, thetaProb)
print(f"Pi Theta Matrix of Opinions is \n{thetaOpinion}")

Pi Theta Matrix of Opinions is 
[[(0.5, 0.5, 0., 0.5) (0.5, 0.5, 0., 0.5)]
 [(0.5, 0.5, 0., 0.5) (0.5, 0.5, 0., 0.5)]
 [(0.5, 0.5, 0., 0.5) (0.5, 0.5, 0., 0.5)]]


# 3.4 Fusion

In [17]:
def beliefConstraintFusion(matrix1, matrix2, stateDim, actionDim):
    iota = np.zeros((stateDim, actionDim), dtype = "f, f, f, f")
    for state in range(stateDim):
        for action in range(actionDim):
            b1 = (matrix1[state, action])[0]
            d1 = (matrix1[state, action])[1]
            u1 =  (matrix1[state, action])[2]
            a1 = (matrix1[state, action])[3]

            b2 = (matrix2[state, action])[0]
            d2 = (matrix2[state, action])[1]
            u2 =  (matrix2[state, action])[2]
            a2 = (matrix2[state, action])[3]

            harmony = b1*b2 + b1*u2 + b2*u1
            conflict = b1*d2 + b2*d1
            b = harmony / (1 - conflict)
            u = u1 * u2 / (1 - conflict)
            a = (a1 * (1 - u1) + a2 * (1 - u2)) / (2 - u1 - u2)
            d = 1 - (b + u)

            (iota[state, action])[0] = b
            (iota[state, action])[1] = d
            (iota[state, action])[2] = u
            (iota[state, action])[3] = a
    return iota

In [18]:
iota = beliefConstraintFusion(kappa, thetaOpinion, stateDim, actionDim) 
print(f"Iota Matrix of Fused Opinions is \n{iota}")

Iota Matrix of Fused Opinions is 
[[(0.5      , 0.5       , 0., 0.5) (0.5      , 0.5       , 0., 0.5)]
 [(0.5      , 0.5       , 0., 0.5) (0.8333333, 0.16666666, 0., 0.5)]
 [(0.5      , 0.5       , 0., 0.5) (0.5      , 0.5       , 0., 0.5)]]


# 3.5 Transformation of opinions to probabilities

## 3.5.1 Converting from fused opinions to probabilities 

In [19]:
def opinionToProb(stateDim, actionDim, iota):
    """Converts a iota of tuples of opinions (SBool) to a iota of probabilities"""
    thetaProb2 = np.zeros((stateDim, actionDim))
    for state in range(stateDim):
        for action in range(actionDim):
            b = (iota[state, action])[0]
            u = (iota[state, action])[2]
            a = (iota[state, action])[3]
            thetaProb2[state, action] = b + a * u
    return thetaProb2


In [20]:
thetaProb2 = opinionToProb(stateDim, actionDim, iota)
print(f"Pi Theta Matrix of Probabilities is \n{thetaProb2}")

Pi Theta Matrix of Probabilities is 
[[0.5        0.5       ]
 [0.5        0.83333331]
 [0.5        0.5       ]]


# 3.6 Transformation of probabilities to numerical preferences

In [24]:
#https://search.r-project.org/CRAN/refmans/ohenery/html/inv_smax.html
def piInverse(stateDim, actionDim, thetaProb):
    theta = np.zeros((stateDim, actionDim))
    for state in range(stateDim):
        mu = thetaProb[state]
        logSum = 0
        for action in range(actionDim):
            logSum += np.log(mu[action])
        c = (-1 / actionDim) * logSum

        for action in range(actionDim):
            theta[state, action] = np.log(mu[action]) + c

    return(theta)

In [23]:
theta2 = piInverse(stateDim, actionDim, thetaProb2)
print(f"Theta Matrix of Numerical Preferences is \n{theta2}")

Theta Matrix of Numerical Preferences is 
[[ 0.         0.       ]
 [-0.2554128  0.2554128]
 [ 0.         0.       ]]
