In [1]:
import numpy as np

In [2]:
data = np.load("results.npy")
labels = data.files
labels

['timestamps', 'states', 'controls']

In [3]:
timestamps = data['timestamps']
states = data['states'][0]
controls = data['controls'][0]

print(timestamps.shape, states.shape, controls.shape)

(1, 100352) (16, 100352) (12, 100352)


In [4]:
data_table = np.vstack((timestamps, states))[0:4].T
data_table[0]

array([ 0.03333333,  0.011109  ,  0.        , -3.        ])

In [5]:
def reward(row):
    reward = 0
    if row["concentration"] > 0.9:
        reward += row["concentration"] * 1000
    else:
        reward -= (1 - row["concentration"])
    return reward

In [6]:
import pandas as pd

cols = ["timestamp", "concentration",  "x_offset", "y_offset"]
df = pd.DataFrame(data_table, columns=cols)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [7]:
df["reward"] = df.apply(reward, axis=1)

In [8]:
episode_starts = df[df["timestamp"] < 0.04].index
episode_starts

Index([     0,      1,      2,   2503,   5004,   5288,   5289,   5290,   5291,
         7792,
       ...
        98649,  98650,  99179,  99299,  99410,  99910,  99911,  99912, 100286,
       100287],
      dtype='int64', length=519)

In [9]:
episodes = []
for ep, next_ep in zip(episode_starts[:-1], episode_starts[1:]):
	if next_ep - ep == 1:
		continue
	episodes.append(df[ep:next_ep])
episodes.append(df[episode_starts[-1]:])
print(episodes[77])

       timestamp  concentration  x_offset  y_offset    reward
39078   0.033333       0.606531      -1.0       0.0 -0.393469
39079   0.066667       0.606531      -1.0       0.0 -0.393469
39080   0.100000       0.606531      -1.0       0.0 -0.393469
39081   0.133333       0.606531      -1.0       0.0 -0.393469
39082   0.166667       0.606531      -1.0       0.0 -0.393469
...          ...            ...       ...       ...       ...
39597  17.333333       0.022291       1.0      -2.0 -0.977709
39598  17.366667       0.021543       1.0      -2.0 -0.978457
39599  17.400000       0.020827       1.0      -2.0 -0.979173
39600  17.433333       0.020142       1.0      -2.0 -0.979858
39601  17.466667       0.019488       1.0      -2.0 -0.980512

[524 rows x 5 columns]


In [10]:
def episodic_mean_reward(episode_num):
	mean_reward = episodes[episode_num].loc[:, 'reward'].mean()
	return mean_reward

In [14]:
print(episodic_mean_reward(1))

-0.7081739457745085


In [19]:
def simulation_average_reward():
	average_reward = []
	for episode in range(len(episodes)):
		average_reward.append(episodic_mean_reward(episode))
	return np.mean(average_reward)
	

In [20]:
print(simulation_average_reward())

20.619429098710334


In [21]:
print(len(episodes))

264


In [32]:
import matplotlib.pyplot  as plt

for episode_number, episode in enumerate(episodes):
	plt.scatter(episode['timestamp'], episode['reward'], s=2.5)
	plt.savefig(f'images/episode_{episode_number}_plot.png')
	plt.clf()

<Figure size 640x480 with 0 Axes>