# Reinforcement Learning- and FEM-based Inverse Design

## Experiment Logger

In [1]:
import os
import neptune.new as neptune
from neptune.new.types import File
from neptune.new.utils import stringify_unsupported

os.environ['NEPTUNE_PROJECT']="pil-clemson/metamtl-rl"
os.environ['NEPTUNE_NOTEBOOK_ID']="45d03d69-6ac7-41ca-8af8-80caaa73aad5"
os.environ['NEPTUNE_NOTEBOOK_PATH']="metamaterial-rl/RemoteFEM-DQN.ipynb"

exp = neptune.init_run(project="pil-clemson/metamtl-rl",
                       capture_hardware_metrics=True,
                       capture_stderr=True,
                       capture_stdout=True,
                      )

https://app.neptune.ai/pil-clemson/metamtl-rl/e/METAMTLRL-297
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


## Import

In [2]:
from __future__ import annotations
from typing import Union, Optional, Callable, Any
from typing import Tuple, List, Set, Dict
from typing import NamedTuple
from typing import Generator

In [3]:
from collections import defaultdict, deque
from types import SimpleNamespace
import queue
from queue import PriorityQueue
from enum import Enum

In [4]:
from dataclasses import dataclass, field

In [5]:
import traceback
import tracemalloc

In [6]:
import ipywidgets as widgets

In [7]:
import os
import sys
import copy
import time
from datetime import datetime, timedelta
from pprint import pformat, pprint
import multiprocessing
import random
import math
import itertools
import uuid

In [8]:
from tqdm.notebook import trange, tqdm

In [9]:
import matplotlib.pyplot as plt

In [10]:
import plotly.express as px

In [11]:
import torch
from torch import nn

from torch import Tensor, BoolTensor

from torch.nn.modules.loss import _Loss
from torch.optim import Optimizer

from torchvision.transforms import PILToTensor

print('PyTorch version:', torch.__version__)

PyTorch version: 1.13.0


In [12]:
import torchinfo

In [13]:
import numpy as np

In [14]:
from SimHubClient import SimHubClient

## Computing Devices

In [15]:
print('CPU Cores:', multiprocessing.cpu_count())

CPU Cores: 56


In [16]:
# Getting all memory using os.popen()
mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')  # e.g. 4015976448
mem_gib = mem_bytes/(1024.**3)
print('Memory size:', int(mem_gib), 'GiB')

Memory size: 376 GiB


In [17]:
available_gpus = [torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]
print('GPUs:', available_gpus)

GPUs: ['Tesla V100S-PCIE-32GB', 'Tesla V100S-PCIE-32GB']


In [18]:
cuda = torch.device('cpu') if torch.cuda.is_available() else torch.device('cpu')
print('Current computing device:', cuda)

Current computing device: cpu


## DEBUG FLAG

In [19]:
DEBUG = False

In [20]:
class DEBUG:
    result_generation=True
    result_visualization=False
    
    transition_log=True
    transition_log_buffer=''
    transition_log_buffer_gen=''

    action_log=False
    prediction_log=False
    state_log=True
    epsilon_log=True
    reward_log=True
    state_visualization=False
    state_target_diff_visualization=False
    
    visualization_sampling_rate=.000
    
    optimizer_sample_log=False
    
    start_from_goal=False
    
    trace_memory=False
    
    in_generation_mode=False

In [21]:
if DEBUG.trace_memory:
    os.environ['PYTHONTRACEMALLOC'] = '3'
    tracemalloc.start()

## Helper Functions

In [22]:
clip = lambda x, l, u: l if x < l else u if x > u else x

## Hyperparameters

In [23]:
environment_configuration = {
    'grid_size': (2, 2),
    'result_size': (20, 20),
    'result_range': (293.15, 353.15),
}

hyperparameters = {
    'target_update_interval': 1000,
    'optimization_iterations': 1,
    'experience_replay_capacity': 10000,
    'replay_batch_size': 32,
    'lr': .001,
    'discount_factor': .9,
    'epsilon_initial': 1.,
    'epsilon_minimal': .1,
    'epsilon_halflife': 2000,

    'max_episode': 300,
    'max_step_per_episode': 1000,
    
    'goal_reward':10000.,
    'terminal_error_threshold': 0.01,
    
    'reward_error_func': 'MSE',
    'reward_state_normalization': '0-1',
    
}    

exp['EnvConfig'] = stringify_unsupported(environment_configuration)
exp['Hyperparameters'] = stringify_unsupported(hyperparameters)


  exp['EnvConfig'] = stringify_unsupported(environment_configuration)


## Reinforcement Learning Environment

## Interfaces and Dataclasses

In [24]:
class State(dict):
    def __init__(self) -> None:
        super().__init__()
        
    def step(self, action: 'Action') -> 'State': 
        return action(copy.deepcopy(self))

    def to_tensor(self) -> Tensor: raise NotImplementedError

In [25]:
class Action:
    def __init__(self, name: str, action: Callable[[State], State]) -> None:
        self.name = name
        self.action = action
        
    def __repr__(self) -> str:
        return self.name
        
    def __call__(self, state: State) -> State:
        return self.action(state)
# Action = Callable[[State], State]

In [26]:
class Environment:    
    def __init__(self) -> None:
        self._state: State = None
        self._action_space: List[Action] = list()
        self._valid_actions: BoolTensor = None

    def __repr__(self) -> str:
        return f'''{self.__class__.__name__}(
    Action space size: {self.action_count()}
    Current state: {self.state}
)'''
        
    @property
    def state(self) -> State: return self._state
    
    @property
    def action_space(self) -> List[Action]: return self._action_space        

    def action_count(self) -> int: return len(self._action_space)        
    
    def reset(self) -> None: raise NotImplementedError
        
    def step(self, action_index: int) -> None: raise NotImplementedError


In [27]:
class ReplayTransition(NamedTuple):
    state: State
    action_index: int
    reward: float
    next_state: State
    note: str

In [28]:
@dataclass
class SimulationTransition:
    # From stepping
    episode: int
    step: int
    
    state: State
    action_index: int
    next_state: State
    
    
    action_name: str = None
    action_type: str = None
    
    # From FEM simulator
    state_id: str = None
    next_state_id: str = None
    
    state_sim: Dict[str, Any] = None
    next_state_sim: Dict[str, Any] = None
    
    # From reward function
    state_error: float = None
    is_state_terminal: bool = None
    
    next_state_error: float = None
    is_next_state_terminal: bool = None
    
    reward: float = None    
    
    def __repr__(self) -> str:
        return f'[{self.episode}-{self.step}] ' +\
                f'{self.state}({self.state_error}, {self.is_state_terminal})' +\
                f' =={self.action_name}({self.action_type})==> ' +\
                f'{self.next_state}({self.state_error}, {self.is_state_terminal})' +\
                f'  R:{self.reward} {"Loop!" if self.state_id == self.next_state_id else ""}'

In [29]:
RewardFunc = Callable[[SimulationTransition], SimulationTransition]

## State and Environment

In [30]:
class TurnableGridState(State):
    def __init__(self) -> None:
        super().__init__()
        self['angle_matrix'] = np.zeros(environment_configuration['grid_size']) + 10
        
        if DEBUG.start_from_goal:
            self['angle_matrix'] = np.array([[45, -45], [-45, 45]])
    
    def to_tensor(self) -> torch.Tensor:
        return torch.tensor(self['angle_matrix'].flatten()).float()

In [31]:
class TurnableGridEnvironment(Environment):    
    def __init__(self) -> None:
        super().__init__()
        
        self.grid_size = environment_configuration['grid_size']
        
        self.angle_range = [-80, 80]
        self.angle_modifiers = [-1, 1]

        self.reset()
        
        def angle_matrix_action(i, j, mod):
            def action(state):
                old_value = state['angle_matrix'][i, j]
                # Jump between -10, 10
                if old_value * mod == -10:
                    modifier = mod * 20
                else:
                    modifier = mod * 35
                    
                # Jump between -80, 80
                if old_value * mod == 80:
                    modifier = mod * -160
                    
                state['angle_matrix'][i, j] = old_value + modifier
                
                if DEBUG.action_log:
                    exp['action_log'].append(f'{old_value}, {modifier}({mod}), {state["angle_matrix"][i, j]}')
                return state
            return Action(f'({i}, {j})->{mod}', action)
        
        for i in range(self.grid_size[0]):
            for j in range(self.grid_size[1]):
                for mod in self.angle_modifiers:
                    self._action_space.append(angle_matrix_action(i, j, mod))

                    
    def reset(self) -> None: 
        self._state = TurnableGridState()
        
    def step(self, action_index: int) -> None: 
        action = self._action_space[action_index]
        self._state = self._state.step(action)

## Visualization Functions

In [32]:
def log_vis_sim(sim_arr, field, append=False, step=None, desc=None):
    rang = environment_configuration['result_range']
    size = environment_configuration['result_size']
    arr = np.clip((sim_arr.reshape(size) - rang[0]) / (rang[1] - rang[0]), 0, 1)
    if append:
        exp[field].append(File.as_image(arr), step=step, description=desc)
    else:
        exp[field].upload(File.as_image(arr))

In [33]:
def log_vis_sim(sim_arr, field, append=False, step=None, desc=None, vrange=(0, 1)):
    plt.imshow(sim_arr.reshape(environment_configuration['result_size']), 
               vmin=vrange[0], vmax=vrange[1], cmap='coolwarm')
    fig = plt.gcf()
    
    if append:
        exp[field].append(fig, step=step, description=desc, wait=True)
    else:
        exp[field].upload(fig, wait=True)
        
    fig.clear()
    plt.close(fig)
    


In [34]:
def log_vis_tran(transition, field, step=None):
    ...

## DQN

### FEM-based Reward & Terminal Function

In [35]:
class FEMReward():
    def __init__(self,
                 target: Tensor,
                 hyperparameters: Dict[str, Any]) -> None:

        
        # self.target = target
        # exp['target_value'] = target
        
        if hyperparameters['reward_state_normalization'] == '0-1':
            self.normalize_func = lambda x: (x - 293.15) / 60
        else:
            raise NotImplementedError

        if hyperparameters['reward_error_func'] == 'MSE':
            self.error_func = nn.MSELoss(reduction='mean')
        else:
            raise NotImplementedError
            
        self.target = self.normalize_func(target)

        self.goal_reward = hyperparameters['goal_reward']
        self.terminal_error_threshold = hyperparameters['terminal_error_threshold']

    def __call__(self, transition: SimulationTransition) -> SimulationTransition:
        """
        Calculate reward value for a transition, and determine if a terminal state is reached

        Parameters
        ----------
        transition : SimulationTransition
            A transition with completed simulation data

        Returns
        -------
        float | None
            Reward value, None if the next_state is terminal
        bool
            The next_state is terminal

        Raises
        ------
        TODO
        """
        
        if transition.state_sim and transition.state_sim['status'] == 'done':
            state_result = torch.tensor(transition.state_sim['output']['temperature_distribution'][2])
            state_result = self.normalize_func(state_result)
            transition.state_error = float(torch.sqrt(self.error_func(state_result, self.target)))
            transition.is_state_terminal = transition.state_error <= self.terminal_error_threshold

        
        if transition.next_state_sim and transition.next_state_sim['status'] == 'done':
            next_state_result = torch.tensor(transition.next_state_sim['output']['temperature_distribution'][2])
            next_state_result = self.normalize_func(next_state_result)
            transition.next_state_error = float(torch.sqrt(self.error_func(next_state_result, self.target)))
            transition.is_next_state_terminal = transition.next_state_error <= self.terminal_error_threshold
        
        # # Reward decreasing error from state to next state
        # transition.reward += transition.state_error - transition.next_state_error
        
        # Reward based on state_error
        transition.reward = .1 - transition.state_error 
        # if transition.state_error < 1:
        #     transition.reward = -1
        # else:
        #     transition.reward = -10

        # Reward extra if next state is final
        if transition.is_state_terminal:
            transition.reward = self.goal_reward
            
        if transition.state_sim and transition.state_sim['status'] == 'done':
            if not DEBUG.in_generation_mode:
                if DEBUG.state_log:
                    exp['state_values'].append(transition.state_error)

                if DEBUG.state_visualization or DEBUG.state_target_diff_visualization:
                    debug_sampling = (random.random() <= DEBUG.visualization_sampling_rate)

                if DEBUG.state_visualization and debug_sampling:
                    log_vis_sim(state_result, 'state_visualization', vrange=(293.15, 353.15),
                                append=True, desc=f'E{transition.episode}'
                                + f' S{transition.step}'
                                + f' E {transition.state_error}' 
                                + f' R {transition.reward}' 
                                + f' {transition.state}')

                if DEBUG.state_target_diff_visualization and debug_sampling:
                    log_vis_sim((state_result - 293.15) / 60 - self.target, 'state_target_diff', 
                                vrange=(0, 1),
                                append=True, desc=f'E{transition.episode}'
                                + f' S{transition.step}'
                                + f' E {transition.state_error}' 
                                + f' R {transition.reward}' 
                                + f' {transition.state}')
            
                    
        if DEBUG.reward_log and not DEBUG.in_generation_mode:
            exp['reward'].append(transition.reward)
        
        return transition

### Network Definition

In [36]:
# Network Container
class Model():
    def __init__(self, network: nn.Module, loss_func: _Loss, optimizer: Optimizer):
        self.network = network
        self.loss_func = loss_func
        self.optimizer = optimizer

    def __call__(self, network_input: Tensor) -> Tensor:
        return self.network(network_input)

In [37]:
def QNet(state_size: int = 4, action_number: int = 8, target_network: bool = False):
    net = nn.Sequential(
        nn.Linear(state_size, 100, device=cuda),
        nn.ReLU(),
        nn.Linear(100, 200, device=cuda),
        nn.ReLU(),
        nn.Linear(200, action_number, device=cuda),
    )
    if target_network:
        return Model(network=net, loss_func=None, optimizer=None)
    else:
        # exp['Network'] = str(torchinfo.summary(net, input_size=(32, state_size), 
        #                                        device=cuda, verbose=0))
        return Model(network=net, loss_func=nn.HuberLoss(), optimizer=torch.optim.Adam(net.parameters(), 0.001))

### Replay Memory Class

In [38]:
class ReplayMemory():
    def __init__(self, capacity):
        self.memory: deque = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(ReplayTransition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

### Agent Class

In [39]:
class Agent():
    def __init__(self, environment: Environment, simulator: SimHubClient, reward_func: RewardFunc, 
                 policy_network: nn.Module, target_network: nn.Module, hyperparameters: Dict[str, Any]) -> None:
        self.environment: Environment = environment
        self.fem_simulator: SimHubClient = simulator
        self.fem_reward_func: RewardFunc = reward_func
        
        self.policy_network: Model = policy_network
        self.target_network: Model = target_network  
        
        self.target_update_interval: int = hyperparameters['target_update_interval']

        self.optimization_iterations: int = hyperparameters['optimization_iterations']
        self.max_step_per_episode: int = hyperparameters['max_step_per_episode']
        self.experience_replay: ReplayMemory = ReplayMemory(hyperparameters['experience_replay_capacity'])
        self.replay_batch_size: int = hyperparameters['replay_batch_size']

        self.discount_factor: float = hyperparameters['discount_factor']
        self.epsilon_initial: float = hyperparameters['epsilon_initial']
        self.epsilon_minimal: float = hyperparameters['epsilon_minimal']
        self.epsilon_halflife: float = hyperparameters['epsilon_halflife']
        
        self.pending_transitions: List[SimulationTransition] = list()
        
        # Set to true when generating result
        self.generation_mode: bool = False
        self.explored_step: int = 0
        
        self.total_steps: int = 0
        
        self.episode: int = 0
        self.step_num: int = 0
        
        self.convergence_episode: int = 0
        self.convergence_step: int = 100000
        self.convergence_episode_gen: int = 0
        self.convergence_step_gen: int = 100000

    
    def select_action(self) -> Tuple[State, int, str]:
        """
        Decide an action based on epsilon greedy algorithm

        Returns
        -------
        State
            Current state instance
        int
            Index number of an action in the action space
        
        str
            Action type, literal string of "Prediction" or "Random"
        """
        state = self.environment.state
        # epsilon = self.epsilon_minimal + (self.epsilon_initial  - self.epsilon_minimal) * \
        #             math.exp(-1. * self.total_steps / self.epsilon_decay)
        
        epsilon = max(self.epsilon_initial 
                             * (0.5 ** (self.total_steps / self.epsilon_halflife)), 
                             self.epsilon_minimal)
        
        if DEBUG.epsilon_log and not self.generation_mode:
            exp['epsilon'].append(epsilon, step=self.episode + self.step_num / self.max_step_per_episode)
        
        if random.random() > epsilon or self.generation_mode:
            prediction = self.policy_network(state.to_tensor().flatten()).flatten()
            
            if DEBUG.prediction_log:
                if not self.generation_mode:
                    log_target = 'prediction'
                else:
                    log_target = 'prediction_gen'
                exp[f'{log_target}/{self.episode}'].append(f'Step {self.step_num}', step=self.step_num)
                exp[f'{log_target}/{self.episode}'].append(str(state), step=self.step_num+0.1)
                preds = []
                for i in range(len(prediction)):
                    preds.append((prediction[i].item(), str(self.environment.action_space[i])))
                preds.sort(reverse=True)
                exp[f'{log_target}/{self.episode}'].append(pformat(preds), step=self.step_num+0.2)
                
            action_index = prediction.argmax().item()
            action_type = 'Prediction'
            
        else:
            action_index = random.randrange(len(self.environment.action_space))
            action_type = 'Random'
            self.explored_step += 1
            
        if not self.generation_mode:
            self.total_steps += 1
        return state, action_index, action_type
    
    def step(self) -> SimulationTransition:
        """
        Perform an action in the in the environment and submit the transition as FEM task to simulator

        Returns
        -------
        SimulationTransition
            Return the transition
        """
        state, action_index, action_type = self.select_action()
        self.environment.step(action_index)
        next_state = self.environment.state
        
        
        transition = SimulationTransition(self.episode, self.step_num, state, action_index, next_state)
        transition.state_id, state_result = self.fem_simulator.submit_task(state)
        transition.next_state_id, next_state_result = self.fem_simulator.submit_task(next_state)
        
        if DEBUG:
            transition.action_name = self.environment.action_space[action_index].name
            transition.action_type = action_type
        
#         # If both result already exsit, proceed to reward and memory instead of waiting
#         if state_result and next_state_result:
#             transition.state_sim = state_result
#             transition.next_state_sim = next_state_result
            
#             self.compute_reward(transition)
#             if transition.is_state_terminal: return transition
        
        self.pending_transitions.append(transition)
        return transition
    
    def compute_reward(self, transition: SimulationTransition) -> None:
        """
        Compute reward value and terminal status for a COMPLETED transition. 
        The states, action and reward will be pushed into experience replay
        
        If the current state is terminal, transition.next_state will be set to None

        Returns
        -------
        SimulationTransition
            Return transition
        """
        self.fem_reward_func(transition)
        
        if DEBUG.transition_log:
            if not self.generation_mode:
                DEBUG.transition_log_buffer += str(transition) + '\n'
                # exp[f'transitions/{transition.episode}'].append(str(transition))
            else:
                DEBUG.transition_log_buffer_gen += str(transition) + '\n'
                # exp[f'transitions_gen/{transition.episode}'].append(str(transition))

        if not self.generation_mode:
            self.experience_replay.push(transition.state.to_tensor(), 
                                        transition.action_index, 
                                        transition.reward, 
                                        None if transition.is_state_terminal else transition.next_state.to_tensor(),
                                        f'{transition.episode}-{transition.step}')

        
    def compute_pending_rewards(self) -> SimulationTransition:
#         self.fem_simulator.wait(print_stats=False, progress_bar=False)

#         while len(self.pending_transitions) > 0:
#             transition: SimulationTransition = self.pending_transitions.pop(0)

#             transition.state_sim = self.fem_simulator.get_result(transition.state_id)
#             transition.next_state_sim = self.fem_simulator.get_result(transition.next_state_id)
            
#             self.compute_reward(transition)

        
#             # Skip all remaining transition beyond terminal state
#             if transition.is_state_terminal:
#                 return transition
#         return transition
        for transition in tqdm(self.pending_transitions):
            terminal_transition = transition
            
            transition.state_sim = self.fem_simulator.wait_for_task(transition.state_id)
            transition.next_state_sim = self.fem_simulator.wait_for_task(transition.next_state_id)
        
            self.compute_reward(transition)
            
            if transition.is_state_terminal:
                break
                
        self.pending_transitions.clear()
        return transition
        
    def optimize(self) -> None:
        if len(self.experience_replay) < self.replay_batch_size: return

        for i in range(self.optimization_iterations):
            samples = self.experience_replay.sample(self.replay_batch_size)
            batch = ReplayTransition(*zip(*samples))
            
            if DEBUG.optimizer_sample_log:
                filename = f'logs/sampled_transition-{self.total_steps + i / self.optimization_iterations}.log'
                with open(filename, 'w') as fp:
                    pprint(samples, stream=fp)
                exp['sampled_transition'].upload_files(filename)

            non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                                      batch.next_state)), device=cuda, dtype=torch.bool)
            # If none of the transition has a valid next_step, skip the round
            if not non_final_mask.any():
                return
            non_final_next_states = torch.stack([s.flatten() for s in batch.next_state
                                                            if s is not None])

            state_batch = torch.stack([s.flatten() for s in batch.state])
            action_batch = torch.tensor(batch.action_index, device=cuda).unsqueeze(1)
            reward_batch = torch.tensor(batch.reward, device=cuda)

            state_action_values = self.policy_network(state_batch).gather(1, action_batch)

            next_state_values = torch.zeros(self.replay_batch_size, device=cuda)
            next_state_values[non_final_mask] = self.target_network(non_final_next_states).max(1)[0].detach()

            expected_state_action_values = (next_state_values * self.discount_factor) + reward_batch

            loss = self.policy_network.loss_func(state_action_values, expected_state_action_values.unsqueeze(1))
            optimization_loss = float(loss)
            self.policy_network.optimizer.zero_grad()
            loss.backward()
            for param in self.policy_network.network.parameters():
                param.grad.data.clamp_(-1, 1)
            self.policy_network.optimizer.step()
            
            exp['optimization_loss'].append(optimization_loss, step=self.total_steps + i / self.optimization_iterations)
        
    def update_target_network(self) -> None:
        self.target_network.network.load_state_dict(self.policy_network.network.state_dict())
        
    def train(self, episodes: int) -> None:
        for episode in range(episodes):
            print('')
            print(f'Episode: {episode}')
            self.episode = episode
            
            self.environment.reset()
            
            print('Stepping...')
            for self.step_num in trange(self.max_step_per_episode):
                self.step()

                self.optimize()
                
                if self.total_steps % self.target_update_interval == 0:
                    self.update_target_network()
                    
            exp['total_explored'].append(self.explored_step, step=self.episode)

            print('Processing rewards...')
            transition = self.compute_pending_rewards()
            if transition.is_state_terminal: 
                print(f'Terminal state found in episode {transition.episode} step {transition.step}:')
                print(transition.state)

            exp['terminal_step'].append(transition.step, step=self.episode)
            
            if transition.step < self.convergence_step:
                self.convergence_step = transition.step
                self.convergence_episode = transition.episode
            elif transition.step > self.convergence_step:
                self.convergence_step = 100000
                self.convergence_episode = 0                
            
            self.fem_simulator.clear_tasks()

            if DEBUG.result_generation:
                generated_result = self.generate()
                
                exp['generated_result'].append(str(generated_result), step=self.episode)
                
                if DEBUG.result_visualization:
                    print('Visualizing result...')
                    log_vis_sim(generated_result.state_sim['output']['temperature_distribution'][2], 'generated_state_vis', 
                                append=True, step=self.episode, vrange=(293.15, 353.15))
                
            if DEBUG.transition_log:
                log_file = f'logs/transition-{self.episode}.log'
                with open(log_file, 'w') as fp:
                    fp.write(DEBUG.transition_log_buffer)
                DEBUG.transition_log_buffer = ''
                exp['transition_log'].upload_files(log_file)
                
                log_file = f'logs/transition-gen-{self.episode}.log'
                with open(log_file, 'w') as fp:
                    fp.write(DEBUG.transition_log_buffer_gen)
                DEBUG.transition_log_buffer_gen = ''
                exp['transition_log'].upload_files(log_file)

            
            if DEBUG.trace_memory:
                snapshot = tracemalloc.take_snapshot()
                with open(f'logs/mem{self.episode}.log', 'w') as fp:
                    for line in snapshot.statistics('lineno')[:30]:
                        print(line, file=fp)
            
        
    def generate(self) -> State:
        self.generation_mode = True
        if DEBUG:
            DEBUG.in_generation_mode=True
            
        print('Generating...')
            
        self.environment.reset()
            
        for self.step_num in trange(self.max_step_per_episode):
            self.step()

            
        print('Evaluating states...')
        transition = self.compute_pending_rewards()
        if transition.is_state_terminal: 
            print(f'Terminal state reached in step {transition.step}:')
            print(transition.state)
            
        exp['terminal_step_gen'].append(transition.step, step=self.episode)
        
        if transition.step < self.convergence_step_gen:
            self.convergence_step_gen = transition.step
            self.convergence_episode_gen = transition.episode
        elif transition.step > self.convergence_step:
            self.convergence_step_gen = 100000
            self.convergence_episode_gen = 0         
        
        self.generation_mode = False
        if DEBUG:
            DEBUG.in_generation_mode=False
        return transition
        

## Training

In [40]:
env = TurnableGridEnvironment()

fem = SimHubClient('10.128.97.115', 44444, database_ip='10.125.9.39')
fem.set_experiment('./elmer_task/elmer_task.yml')

target_arr = np.load('target.npy')[2]
log_vis_sim(target_arr, 'target', vrange=(293.15, 353.15))

reward_func = FEMReward(torch.tensor(target_arr), hyperparameters)

agent = Agent(env, fem, reward_func, QNet(), QNet(target_network=True), hyperparameters)

/home/nwen/metamaterial-rl/elmer_task/elmer_script.py
/home/nwen/metamaterial-rl/elmer_task/data
Establishing working directory structure...
Working directory structure established
Copying script files...
Copying /home/nwen/metamaterial-rl/elmer_task/elmer_script.py
/home/nwen/metamaterial-rl/elmer_task/elmer_script.py copied
Copying data files...
Copying /home/nwen/metamaterial-rl/elmer_task/data
/home/nwen/metamaterial-rl/elmer_task/data copied
Entry script set to /scratch1/nwen/simhub/workspaces/scripts/elmer_script.py


In [41]:
agent.train(hyperparameters['max_episode'])


Episode: 0
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 0 step 388:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 1
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 2
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 3
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 3 step 692:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 4
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 5
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 6
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 7
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 8
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 9
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 10
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 11
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 12
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 13
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 14
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 15
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 16
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 17
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 18
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 19
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 20
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 21
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 22
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 22 step 296:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 23
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 23 step 78:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 24
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 25
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 26
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 27
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 28
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 29
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 30
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 31
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 32
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 33
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 34
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 35
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 36
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 36 step 206:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 37
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 37 step 46:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 38
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 38 step 166:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 39
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 39 step 580:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 40
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 40 step 86:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 41
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 41 step 332:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 42
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 42 step 350:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 43
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 43 step 186:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 44
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 44 step 298:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 45
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 45 step 128:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 46
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 46 step 130:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 47
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 47 step 666:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 48
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 48 step 164:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 49
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 49 step 168:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 50
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 51
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 51 step 404:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 52
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 53
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 54
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 55
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 56
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 57
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 57 step 62:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 58
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 58 step 208:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 59
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 59 step 218:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 60
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 60 step 120:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 61
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 62
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 62 step 182:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 63
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 63 step 18:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 64
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 64 step 560:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 65
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 65 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 66
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 67
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 68
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 69
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 69 step 664:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 70
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 70 step 356:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 71
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 71 step 20:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 72
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 72 step 162:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 73
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 73 step 46:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 74
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 74 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 75
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 75 step 56:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 76
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 76 step 62:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 77
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 77 step 28:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 78
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 78 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 79
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 79 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 80
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 80 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 81
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 81 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 82
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 82 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 83
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 83 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 84
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 84 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 85
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 85 step 16:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 86
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 86 step 56:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 87
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 87 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 88
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 88 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 89
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 89 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 90
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 90 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 91
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 91 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 92
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 92 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 93
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 93 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 94
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 94 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 95
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 95 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 96
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 96 step 118:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 97
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 97 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 98
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 98 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 99
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 99 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 100
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 100 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 101
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 101 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 102
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 102 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 103
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 103 step 242:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 104
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 104 step 24:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 105
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 106
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 107
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 108
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 108 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 109
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 109 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 110
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 110 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 111
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 111 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 112
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 112 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 113
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 113 step 18:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 114
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 114 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 115
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 115 step 14:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 116
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 116 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 117
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 117 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 118
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 118 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 119
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 119 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 120
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 120 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 121
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 121 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 122
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 122 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 123
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 123 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 124
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 124 step 42:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 125
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 125 step 40:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 126
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 126 step 46:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 127
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 127 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 128
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 128 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 129
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 129 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 130
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 130 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 131
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 131 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 132
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 132 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 133
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 133 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 134
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 134 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 135
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 135 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 136
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 136 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 137
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 137 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 138
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 138 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 139
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 139 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 140
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 140 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 141
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 141 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 142
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 142 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 143
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 143 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 144
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 144 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 145
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 145 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 146
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 146 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 147
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 147 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 148
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 148 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 149
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 149 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 150
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 150 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 151
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 151 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 152
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 152 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 153
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 153 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 154
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 154 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 155
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 155 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 156
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 156 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 157
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 157 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 158
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 158 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 159
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 159 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 160
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 160 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 161
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 161 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 162
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 162 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 163
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 163 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 164
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 164 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 165
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 165 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 166
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 166 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 167
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 167 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 168
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 168 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 169
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 169 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 170
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 170 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 171
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 171 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 172
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 172 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 173
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 173 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 174
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 174 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 175
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 175 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 176
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 176 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 177
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 177 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 178
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 178 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 179
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 179 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 180
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 180 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 181
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 181 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 182
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 182 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 183
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 183 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 184
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 184 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 185
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 185 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 186
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 186 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 187
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 187 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 188
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 188 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 189
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 189 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 190
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 190 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 191
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 191 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 192
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 192 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 193
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 193 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 194
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 194 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 195
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 195 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 196
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 196 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 197
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 197 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 198
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 198 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 199
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 199 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 200
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 200 step 14:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 201
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 201 step 12:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 202
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 202 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 203
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 203 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 204
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 204 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 205
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 205 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 206
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 206 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 207
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 207 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 208
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 208 step 170:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 209
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 209 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 210
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 210 step 62:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]


Episode: 211
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 211 step 20:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 212
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 212 step 70:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 213
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 213 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 214
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 214 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 215
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 215 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 216
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 216 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 217
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 217 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 218
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 218 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 219
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 219 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 220
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 220 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 221
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 221 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 222
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 222 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 223
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 223 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 224
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 224 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 225
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 225 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 226
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 226 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 227
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 227 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 228
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 228 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 229
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 229 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 230
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 230 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 231
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 231 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 232
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 232 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 233
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 233 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 234
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 234 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 235
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 235 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 236
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 236 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 237
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 237 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 238
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 238 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 239
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 239 step 14:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 240
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 240 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 241
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 241 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 242
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 242 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 243
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 243 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 244
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 244 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 245
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 245 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 246
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 246 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 247
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 247 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 248
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 248 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 249
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 249 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 250
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 250 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 251
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 251 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 252
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 252 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 253
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 253 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 254
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 254 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 255
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 255 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 256
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 256 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 257
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 257 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 258
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 258 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 259
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 259 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 260
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 260 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 261
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 261 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 262
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 262 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 263
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 263 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 264
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 264 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 265
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 265 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 266
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 266 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 267
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 267 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 268
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 268 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 269
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 269 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 270
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 270 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 271
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 271 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 272
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 272 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 273
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 273 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 274
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 274 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 275
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 275 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 276
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 276 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 277
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 277 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 278
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 278 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 279
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 279 step 12:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 280
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 280 step 10:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 281
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 281 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 282
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 282 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 283
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 283 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 284
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 284 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 285
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 285 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 286
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 286 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 287
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 287 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 288
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 288 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 289
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 289 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 290
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 290 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 291
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 291 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 292
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 292 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 293
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 293 step 8:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 294
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 294 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 295
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 295 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 296
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 296 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 297
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 297 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 298
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 298 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}

Episode: 299
Stepping...


  0%|          | 0/1000 [00:00<?, ?it/s]

Processing rewards...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state found in episode 299 step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}


In [42]:
exp['convergence'] = f'{agent.convergence_episode}({agent.convergence_step})' \
                    + f'/{agent.convergence_episode_gen}({agent.convergence_step_gen})'

In [43]:
agent.episode += 1
generated_result = agent.generate()
                
exp['generated_result_final'] = str(generated_result)
print('Visualizing result...')
log_vis_sim(generated_result.state_sim['output']['temperature_distribution'][2], 'generated_state_vis_final', 
            append=False, vrange=(293.15, 353.15))

exp['sys/tags'].add(['Done'])
if generated_result.state_error <= hyperparameters['terminal_error_threshold']:
    exp['sys/tags'].add(['Sucessful'])

print('Done')

Generating...


  0%|          | 0/1000 [00:00<?, ?it/s]

Evaluating states...


  0%|          | 0/1000 [00:00<?, ?it/s]

Terminal state reached in step 6:
{'angle_matrix': array([[ 45., -45.],
       [-45.,  45.]])}
Visualizing result...
Done


In [44]:
fem.close()

In [45]:
exp.stop()

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/pil-clemson/metamtl-rl/e/METAMTLRL-297


In [46]:
raise NotImplementedError

NotImplementedError: 

In [None]:
snapshot = tracemalloc.take_snapshot()

In [None]:
list(snapshot.statistics('traceback'))

In [None]:
fem = SimHubClient('10.128.97.115', 44444, database_ip='10.125.9.35')
fem.set_experiment('./elmer_task/elmer_task.yml')



In [None]:
state = TurnableGridState()
state['angle_matrix'] = np.array(
    [[45, -45], 
     [-45, 45]]
)
state

In [None]:
task = fem.submit_task(state)

In [None]:
task

In [None]:
fem.wait()

In [None]:
np.save('target.npy', fem.get_result(task[0])['output']['temperature_distribution'])

In [None]:
fem.close()

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(np.load('target.npy')[2].reshape(20, 20))
plt.gcf().savefig('target.png')

In [None]:
plt.show()

In [None]:
len(np.unique(np.load('target.npy')[0]))