In [None]:
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from pathlib import Path
from cyclesgym.dummy_policies import OpenLoopPolicy
from cyclesgym.env import CornEnv
import matplotlib.pyplot as plt
from stable_baselines3.common.results_plotter import load_results, ts2xy
import pandas as pd

%matplotlib inline

### Models and baselines

In [None]:
# Open Loop baseline
action_sequence = [0] * 52
action_sequence[15] = 6
baseline_ol = OpenLoopPolicy(action_sequence)

# Max fertilization
baseline_max = OpenLoopPolicy([6] * 52)

# No fertilization
baseline_min = OpenLoopPolicy([0] * 52)

# Random fertilization
baseline_rand = OpenLoopPolicy(list(np.random.choice(7, 52)))

# Untrained fertilization
env = CornEnv('ContinuousCorn.ctrl')
agents_dir = Path.cwd().parent.joinpath('agents')
untrained = DQN.load(agents_dir.joinpath('corn_untrained'), env=env)

# Trained fertilization
trained = DQN.load(agents_dir.joinpath('corn_dqn_trained'), env=env)

policies = [baseline_rand, untrained, baseline_max, baseline_min, baseline_ol, trained]
names = ['Random', 'Untrained', 'Max fertilization', 'No fertilization', 'Expert open loop', 'DQN']

In [None]:
def smooth(y, box_pts):
    box = np.ones(box_pts)/box_pts
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth


x, y = ts2xy(load_results(agents_dir), 'episodes')
df = load_results(agents_dir)
print(df)
plt.plot(x, smooth(y, 5))
df.groupby(np.ndarray([1, 2]))


In [None]:
rewards = np.zeros(len(policies), dtype=float)
for i, pi in enumerate(policies):
    rewards[i], _ = evaluate_policy(pi, trained.get_env(), n_eval_episodes=1)
    
for r, name in zip(rewards, names):
    print(f'Reward of {name} policy:\t{r}')

In [None]:
plt.bar(np.arange(len(rewards)), rewards)
ax = plt.gca()
ax.set_xticks(np.arange(len(rewards)))
ax.set_xticklabels(names)
plt.xticks(rotation=45)