In [1]:
# Standard Library Imports
import random
import math
import os
import sys
import math
import copy
import random
sys.path.append('..')

# External Library Imports
import numpy as np
from tqdm import trange
import matplotlib
import matplotlib.pyplot as plt

# Deep Learning and Bayesian Deep Learning Libraries
import deepbayes_prealpha
import deepbayes_prealpha.optimizers as optimizers
from deepbayes_prealpha import PosteriorModel
from deepbayes_prealpha.analyzers import propagate
from deepbayes_prealpha.analyzers import prob_veri

import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *

In [2]:
pos_ind = 0
HORIZON = 10
momentum = -0.45
momentum_eps = 0.08
OBSTACLE = [0.4,0.4]
GOAL_STATE = [-0.2,-0.2]
GLOB_MARGIN = 2.3
GLOB_SAMPLES = 3
stepnum = 0

In [3]:
"""
The handcrafted 2D point environment we consider
"""
class P2D_env:
    def __init__(self):
        self.x = 0.90  + random.uniform(-0.1, 0)
        self.y = 0.90  + random.uniform(-0.1, 0)  #np.random.normal(-0.05, 0.025)
        self.dx = momentum
        self.dy = momentum
        # Point Mass Dynamics Control:
        self.h = 0.35      # time resolution
        self.T = HORIZON       # Time for each trajectory
        self.eta = 0.75    # friction coef
        self.m = 1.0      # mass of the car
        self.t = 0        # current time

    def step(self, act):
        if(self.t > self.T):
            sys.exit(-1)
        pt = np.asarray([self.x, self.y])
        vt = np.asarray([self.dx, self.dy])
        ut = np.asarray(act)
        vt_1 = ((1-((self.h*self.eta)/self.m))*vt) + ((self.h/self.m)*ut)
        pt_1 = pt + (self.h*vt_1)
        if(np.linalg.norm(pt_1 - OBSTACLE) < 0.05):
            print("COLLIDED")
            self.dx, self.dy = 0, 0
        self.x, self.y = pt_1[0], pt_1[1]
        self.dx, self.dy = vt_1[0], vt_1[1]
        self.t += 1
        state = np.asarray([self.x, self.y, self.dx, self.dy])
        return state, np.linalg.norm(pt) - np.linalg.norm(pt_1), self.complete(), 0

    def complete(self):
        state = np.asarray([self.x, self.y])
        if(self.x < 0.05 and self.y < 0.05):
            return True
        if(self.x < 0.0 or self.y < 0.0):
            return True
        #if(np.linalg.norm(state) < 0.05):
        #    return True
        if(self.t==self.T):
            return True
    def reset(self):
        self.x = 0.90  + random.uniform(-0.3, 0)
        self.y = 0.90  + random.uniform(-0.3, 0)
        self.dx = momentum
        self.dy = momentum
        self.t = 0
        return np.asarray([self.x, self.y, self.dx, self.dy])
    def action_space_sample(self):
        return np.random.rand(2)
    def observation_space_sample(self):
        return np.random.rand(4)

In [4]:
env = P2D_env()
state_0 = env.reset()
initial_state = state_0
action_dims = len(env.action_space_sample())
observe_dims = len(env.observation_space_sample())
model_input_dims = observe_dims + action_dims
model_output_dims = observe_dims
control_input_dims = observe_dims
control_output_dims = action_dims

In [5]:
#bayes_model = PosteriorModel("LearnedSystem/BayesDyn")
bayes_model = PosteriorModel("MinimalModel")
bayes_model.posterior_var += 0.0000001

ACTION_MATRIX = np.load("MinimalAM.npy")
C_STATES = len(ACTION_MATRIX)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, None, 36)          252       
                                                                 
 dense_1 (Dense)             (None, None, 4)           148       
                                                                 
Total params: 400
Trainable params: 400
Non-trainable params: 0
_________________________________________________________________
deepbayes: detected the above model 
 None


2023-03-02 21:21:32.583766: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
def return_AM_action(state_0, ACTION_MATRIX):
    #ind_i = math.floor(state_0[0]*C_STATES)
    #ind_j = math.floor(state_0[1]*C_STATES)
    ind_i = round(state_0[0]*C_STATES)
    ind_j = round(state_0[1]*C_STATES)
    a, b = random.uniform(-0.4, 0), random.uniform(-0.4, 0)
    retval = [a, b]
    try:
        retval =  ACTION_MATRIX[ind_i][ind_j]
    except:
        if(ind_i >= C_STATES*0.9 and ind_j >= C_STATES*0.9):
            retval = [-0, -0]
        elif(ind_j >= C_STATES*0.9):
            retval = ACTION_MATRIX[ind_i][C_STATES-1]
            retval[1] = -0
        elif(ind_j >= C_STATES*0.9):
            retval = ACTION_MATRIX[C_STATES-1][ind_j]
            retval[0] = -0
    return retval


def reward_function(state_0, dyn_pred, goal):
    state_0 = tf.cast(tf.squeeze(state_0), dtype=tf.float32)
    dyn_pred = tf.cast(tf.squeeze(dyn_pred), dtype=tf.float32)
    state_1 = state_0 + dyn_pred
    #print("State shape: ", state_1.shape)
    d1 = tf.norm(state_0[pos_ind:pos_ind+2] - goal, axis=0)
    d2 = tf.norm(state_1[pos_ind:pos_ind+2] - goal, axis=0)

    o1 = tf.norm(state_0[pos_ind:pos_ind+2] - OBSTACLE, axis=0)
    o2 = tf.norm(state_1[pos_ind:pos_ind+2] - OBSTACLE, axis=0)
    return (-1*(d2-d1)) + ((0.1/o2)*(o2-o1))

    #return (-1*(d2-d1))

# Verification of state
def verify_state(state, eps, predicate, p_arr, refine=2.5, s=False):

    s0 = np.asarray(state)                              # Lower bound of the states
    s1 = s0 + eps                                       # Add epsilon to the postion to get the upper bound
    input_to_alg = (s0, s1)

    initial_state[pos_ind:pos_ind+2] = s0               # Making copies of the state with velocity to pass to bounds
    s0_pre = copy.deepcopy(initial_state)
    initial_state[pos_ind:pos_ind+2] = s1
    s1_pre = copy.deepcopy(initial_state)
    s0_pre[pos_ind+2:pos_ind+4] -= eps                # Subtracting epsilon from momentum to get interval over velocity
    diff = s0_pre - s1_pre

    # Propagating the intervals through the controller to get action intervals
    act =  argmax_action(bayes_model, s0_pre, s1_pre)
    act_l = np.squeeze(act)
    act_u = np.squeeze(act)

    # Adding the action intervals to state intervals to get full intervals
    s0 = np.concatenate((s0_pre,act_l))
    s1 = np.concatenate((s1_pre,act_u))

    s0 = np.asarray([s0])
    s1 = np.asarray([s1])

    # Computing the probability by propagating intervals through the BNN
    #def prob_veri(model, s0, s1, w_marg, samples, predicate, i0=0, depth=4, outs=True):
    p, outs = prob_veri(bayes_model, s0, s1, w_marg=GLOB_MARGIN, samples=GLOB_SAMPLES, predicate=predicate, depth=3, y_scaler=None)

    if(p == 0):
        print("~~~~~~~~~~~~~~~~~~~~~~~~~~")
        print("~~~~~NOT REACHABLE~~~~~~~~")
        print("~~~~~~~~~~~~~~~~~~~~~~~~~~")
        return 0.00000000000000000001, act

    mins = [i[0] for i in outs]
    maxs = [i[1] for i in outs]
    try:
        mins = np.min(mins, axis=0)[pos_ind:pos_ind+2]
        maxs = np.max(maxs, axis=0)[pos_ind:pos_ind+2]
        maxs[0] = min(maxs[0], np.squeeze(s1)[0])
        maxs[1] = min(maxs[1], np.squeeze(s1)[1])
    except:
        mins = np.asarray(mins)
        maxs = np.asarray(maxs)
    # Account for first iteration here:
    maxs-=2*eps 
    if((maxs < INIT_SAFE_ZONE).all()):
        print("One step reachable. (p=%s)"%(p))
        return p, act
    else:
        print("CHECK THIS INTERVAL: ", mins, maxs)
        mi_i, ma_i = math.floor(mins[0]/eps), math.ceil(maxs[0]/eps)
        mi_j, ma_j = math.floor((mins[1])/eps), math.ceil((maxs[1])/eps)
        mi_i, ma_i = max(0, mi_i), max(0, ma_i)
        mi_j, ma_j = max(0, mi_j), max(0, ma_j)
        mi_i, ma_i = min(len(p_arr), mi_i), min(len(p_arr), ma_i)-1
        mi_j, ma_j = min(len(p_arr), mi_j), min(len(p_arr), ma_j)-1
        mi_i, ma_i = max(0, mi_i), max(0, ma_i)
        mi_j, ma_j = max(0, mi_j), max(0, ma_j)
        if(mi_i == 0 and ma_i == 0):
            ma_i += 1
        if(mi_j == 0 and ma_j == 0):
            ma_j += 1
        try:
            worst_future = np.min(np.asarray(p_arr[mi_i:ma_i,mi_j:ma_j]).flatten())
        except:
            worst_future = 0.0 #p_arr[mi_i,mi_j]
        print("Multi-step reachable. Worst case: %s (p=%s)"%(worst_future, p))
        print(mins, maxs)
        if(worst_future == 0.0):
            return 0.00000000000000000001, act
        if(p == 0):
            return 0.00000000000000000001, act
        return p * worst_future, act



SAFE_REGION = 0.2
INIT_SAFE_ZONE = 0.2
def label(state_l, state_u, eps=0.025):
    collision = True
    # Overlap, side
    if(not (state_u[0] <= (OBSTACLE[0]-eps) or state_l[0] >= (OBSTACLE[0]+eps)
            or state_u[1] <= (OBSTACLE[1] - eps) or state_l[1] >= (OBSTACLE[1] +eps))):
        collision = False
    # Obstacle subsumed
    if(state_u[0] < (OBSTACLE[0]+eps) and state_l[0] > (OBSTACLE[0]-eps) and 
          state_u[1] < (OBSTACLE[1]+eps) and state_l[1] > (OBSTACLE[1]-eps)):
        collision = False
    # State subsumed
    if(state_u[0] > (OBSTACLE[0]+eps) and state_l[0] < (OBSTACLE[0]-eps) and 
          state_u[1] > (OBSTACLE[1]+eps) and state_l[1] < (OBSTACLE[1]-eps)):
        collision = False
    # Point intersection
    def point_intersection(low, high, pt) :
        if (pt[0] > low[0] and pt[0] < high[0] and pt[1] > low[1] and pt[1] < high[1]):
            return False
        else:
            return True
    if(state_u[0] < -0.175 or state_u[1] < -0.175):
        return False
    pts = [state_l, state_u, [state_l[0], state_u[1]], [state_u[0], state_l[1]]]
    for pt in pts:
        collision = (collision and point_intersection(OBSTACLE, np.asarray(OBSTACLE)+0.025, pt))
    return collision


def predicate(source_l, source_u, state_l, state_u):
    source_l = source_l[0:observe_dims]
    source_u = source_u[0:observe_dims]

    # Start by assuming we are safe wrt the obstacle collision critera
    collision = True

    # Check if we overlap with the obstacle, if so we are unssafe
    collision = collision and label(source_l, source_u)
    collision = collision and label(state_l, state_u)

    # Check that we make it to the safe region in the worst case
    goal = (state_u[pos_ind:pos_ind+2] <= SAFE_REGION).all()

    # Check that the velocity constraint is satisfied
    velo = (abs(state_u[pos_ind+2:pos_ind+4] - momentum) <= 2*momentum_eps).all()
    #print(abs(state_u[pos_ind+2:pos_ind+4] - momentum), velo)
    return collision and goal and velo



In [7]:
def argmax_action(bayes_model, state_l, state_u, resolution=25, n=10):
    uppers = []
    best_p = -float('inf')
    best_upper = [1000, 1000]
    best_action = [-1, -1]
    furthest_p = -float('inf')
    furthest_upper = [1000, 1000]
    furthest_action = [-1, -1]
    for x_action in tqdm(np.linspace(-1, 1, 11), desc="Computing Optimal Action"):
        for y_action in np.linspace(-1, 1, 11):
            s_l = np.concatenate((np.squeeze(state_l), [x_action,y_action] ))
            s_u = np.concatenate((np.squeeze(state_u), [x_action,y_action] ))
            p = 0; collide_flag = 0 
            for i in range(n):
                s_l = np.asarray([s_l])
                s_u = np.asarray([s_u])
                lower, upper = propagate(bayes_model, s_l, s_u, w_marg=GLOB_MARGIN)
                lower = np.squeeze(lower); upper = np.squeeze(upper)
                s_l = np.squeeze(s_l); s_u = np.squeeze(s_u)
                p += int(predicate(s_l, s_u, lower, upper))
                #if(lower[0] <= obstacle[0] and upper[0] >= obstacle[0] and lower[1] <= obstacle[1] and upper[1] >= obstacle[1]):
                if(label(lower, upper, eps=0.03)):
                    collide_flag -= 1
            p = p/n
            #print(best_upper, upper, [x_action, y_action],  (upper < best_upper).all())
            #if((upper < best_upper).all()):
#            if(np.linalg.norm(upper) < np.linalg.norm(best_upper)):
            if(p > best_p ):
                best_upper = upper
                best_action = [x_action, y_action]
                best_p = p
            #print(list(upper[pos_ind:pos_ind+2]), list(OBSTACLE))
            dist = np.linalg.norm(np.asarray(upper[pos_ind:pos_ind+2]) - np.asarray(OBSTACLE))
            if(dist > furthest_p):
                furthest_action = [x_action, y_action]
                furthest_upper = upper
                furthest_p = dist
    if(best_p == 0 and (stepnum in [0,1,2]) or stepnum == 0):
        print("FURTHEST ACTION: %s"%(furthest_action))
        return furthest_action
    print("BEST ACTION: %s"%(best_action))
    print("PREDICTED: %s with PROB: %s"%(best_upper, best_p))
    print(" ")
    return best_action


In [8]:
from tqdm import tqdm
INIT_SAFE_REGION = 0.2
states = 21
end_point = 0.21
eps = end_point/(states-1)
non_zero = []
ind_i, ind_j = 0,0
multiplier = 1.0
global_probas = np.zeros((states,states))
global_actions = np.zeros((states,states,2))
for k in trange(states-int(SAFE_REGION/eps) + 1):
    probas = []
    for i in np.linspace(0, end_point, num=states):
        for j in tqdm(np.linspace(0, end_point, num=states)):
            if(global_probas[ind_i][ind_j] != 0):
                probas.append(global_probas[ind_i][ind_j])
                ind_j += 1; continue
            elif((i < INIT_SAFE_REGION and j < INIT_SAFE_REGION)):
                probas.append(1.0)
                ind_j += 1; continue
            if(i > (SAFE_REGION + eps/1.5) or j > (SAFE_REGION + eps/1.5)):
                probas.append(0.0)
                ind_j += 1; continue
            print("State: (%s, %s) (eps: %s)"%(i,j,eps))
            p, act = verify_state([i, j], eps, predicate, global_probas, refine=2.0, s=False)
            global_actions[ind_i][ind_j] = act
            print(" ")
            print(" ")
            print("Probability: %s"%(p))
            print(" ")
            print(" ")

            probas.append(p)
            non_zero.append(p)
            ind_j += 1
        ind_i += 1
        ind_j = 0
    SAFE_REGION += eps
    ind_i = 0
    probas = np.reshape(probas, (states,states))
    probas_vals = np.argwhere(probas > 0.00)
    global_probas[probas_vals] = probas[probas_vals]
    print("======================================")
    print("==========EXPANDING GOAL==============")
    print("======================================")
    probas = np.asarray(probas)
    probas = probas.reshape(states,states)

  0%|                                                                                                                                   | 0/3 [00:00<?, ?it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 214307.50it/s][A

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 262144.00it/s][A

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 227597.89it/s][A

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 276982.34it/s][A

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 279620.27it/s][A

100%|█████████████████████




  0%|                                                                                                                                  | 0/21 [00:00<?, ?it/s][A

State: (0.0, 0.21) (eps: 0.010499999999999999)




Computing Optimal Action:   0%|                                                                                                        | 0/11 [00:00<?, ?it/s][A[A

Computing Optimal Action:   9%|████████▋                                                                                       | 1/11 [00:00<00:03,  3.25it/s][A[A

Computing Optimal Action:  18%|█████████████████▍                                                                              | 2/11 [00:00<00:02,  3.61it/s][A[A

Computing Optimal Action:  27%|██████████████████████████▏                                                                     | 3/11 [00:00<00:02,  3.74it/s][A[A

Computing Optimal Action:  36%|██████████████████████████████████▉                                                             | 4/11 [00:01<00:01,  3.82it/s][A[A

Computing Optimal Action:  45%|███████████████████████████████████████████▋                                                    | 5/11 [00:01<00:01,  3.86it/s][A[A

Co

FURTHEST ACTION: [-1.0, -1.0]




Checking Samples: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 308.19it/s][A[A


Found 0 safe intervals
About to compute intersection for this many intervals:  0




Computing intersection weights: 0it [00:00, ?it/s][A[A

Depth 1 has 0 intersections





0it [00:00, ?it/s][A[A

Depth 1 prob:  0
Depth 2 has 0 intersections





0it [00:00, ?it/s][A[A

Depth 2 prob: 0
Current approximation: 0
Depth 2 prob::  0
Depth 3 has 0 intersections





0it [00:00, ?it/s][A[A

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:04<00:00,  4.88it/s][A


Depth 3 prob: 0
Current approximation: 0
Got this approximation:  0
~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~NOT REACHABLE~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 
Probability: 1e-20
 
 



  0%|                                                                                                                                  | 0/21 [00:00<?, ?it/s][A

State: (0.010499999999999999, 0.21) (eps: 0.010499999999999999)




Computing Optimal Action:   0%|                                                                                                        | 0/11 [00:00<?, ?it/s][A[A

Computing Optimal Action:   9%|████████▋                                                                                       | 1/11 [00:00<00:02,  3.83it/s][A[A

Computing Optimal Action:  18%|█████████████████▍                                                                              | 2/11 [00:00<00:02,  3.89it/s][A[A

Computing Optimal Action:  27%|██████████████████████████▏                                                                     | 3/11 [00:00<00:02,  3.94it/s][A[A

Computing Optimal Action:  36%|██████████████████████████████████▉                                                             | 4/11 [00:01<00:01,  3.94it/s][A[A

Computing Optimal Action:  45%|███████████████████████████████████████████▋                                                    | 5/11 [00:01<00:01,  3.91it/s][A[A

Co

FURTHEST ACTION: [-1.0, -1.0]




Checking Samples: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 286.64it/s][A[A


Found 3 safe intervals
About to compute intersection for this many intervals:  3




  stage1_args.append((model.posterior_mean, model.posterior_var, np.swapaxes(np.asarray([weight_intervals[wi]]),1,0), margin, verbose, n_proc, False))
Computing intersection weights: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 4293.04it/s]

Depth 1 has 3 intersections





  0%|                                                                                                                                   | 0/3 [00:00<?, ?it/s][A[A2023-03-02 21:21:42.448350: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-02 21:21:42.448350: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-02 21:21:42.448350: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neura

Depth 1 prob:  1.2887951529543908
Depth 2 has 3 intersections




  0%|                                                                                                                                   | 0/3 [00:00<?, ?it/s][A[A2023-03-02 21:21:45.637158: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-02 21:21:45.637158: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-02 21:21:45.637158: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural

Depth 2 prob: -0.5405143638352874
Current approximation: 0.7482807891191035
Depth 2 prob::  0.7482807891191035
Depth 3 has 1 intersections




  0%|                                                                                                                                   | 0/1 [00:00<?, ?it/s][A[AProcess SpawnPoolWorker-176:
Process SpawnPoolWorker-178:
Process SpawnPoolWorker-180:
Process SpawnPoolWorker-152:
Process SpawnPoolWorker-172:
Traceback (most recent call last):
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/queues.py", line 365, in get
    with self._rlock:
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/mult

  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/queues.py", line 368, in get
    return _ForkingPickler.loads(res)
  File "/Users/matthewwicker/Desktop/Development/BNNReachAvoid/minimalPuck2Dv1/../deepbayes_prealpha/__init__.py", line 4, in <module>
    from .posteriormodel import PosteriorModel
  File "/Users/matthewwicker/Desktop/Development/BNNReachAvoid/minimalPuck2Dv1/../deepbayes_prealpha/posteriormodel.py", line 6, in <module>
    import tensorflow as tf
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/site-packages/tensorflow/__init__.py", line 51, in <module>
    from ._api.v2 import compat
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/site-packages/tensorflow/_api/v2/compat/__init__.py", line 37, in <module>
    from . import v1
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/site-packages/tensorflow/_api/v2/compat/v1/__init__.py", line 30, in <module>
    from . i

Process SpawnPoolWorker-184:
Traceback (most recent call last):
  File "<string>", line 1, in <module>
Process SpawnPoolWorker-183:
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/__init__.py", line 16, in <module>
Process SpawnPoolWorker-182:
Traceback (most recent call last):
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/queues.py", line 365, in get
    with self._rlock:
  File "/Users/matthewwicker/opt/anaconda3/envs/bayesdev/lib/python3.10/multiprocessing/synchronize.py"

In [None]:
print(global_actions)

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
probas = np.asarray(global_probas).reshape(states,states)
fig= plt.figure(num=None, figsize=(6, 6), dpi=100)
arr = probas
arr = np.rot90(np.fliplr(arr))
arr = np.rot90(arr)
plt.imshow(arr, cmap='YlGnBu')
plt.colorbar()
plt.show()
