# How to build a custom environment with custom observations, rewards, actions, and weather generative model.

In this notebook, we show how to build a custom environment for maize management with custom observations, rewards, actions, and weather generative model.

### Observations
We first define a new observation space. A simple useful observation for maize fertilization is the cumulative amount of fertilizer used so far in the season.
This observation is already available in 'observers'.

In [None]:
from cyclesgym.envs.observers import NToDateObserver

A new observation to combine with the last one, is a trailing window of the fertilization events in the last month. Given that the standard corn evironment has a step size of 1 week,
this trailing window can be a 4 dimensional vector.

In [None]:
from cyclesgym.envs.observers import Observer
from datetime import datetime
import numpy as np

class FertilizationTrailingWindowObserver(Observer):

    def __init__(self,
                 end_year: int,
                 Nobs=4):
        super(FertilizationTrailingWindowObserver, self).__init__(end_year)
        self.Nobs = Nobs
        self.lower_bound = np.full((self.Nobs,), -np.inf)
        self.upper_bound = np.full((self.Nobs,), np.inf)
        self.reset()

    def compute_obs(self,
                    date: datetime.date,
                    N: int):

        self.window.append(N)
        self.window = self.window[-self.Nobs:]
        return np.array(self.window)

    def reset(self):
        self.window = [0]*self.Nobs
        self.obs_names = ['WINDOW_'+str(i) for i in range(self.Nobs)]

We can now compose the trailing window observation and the cumulative nitrogen observation as

In [None]:
from cyclesgym.envs.observers import compound_observer
trailing_and_cumulative = lambda end_year: compound_observer([NToDateObserver(end_year), FertilizationTrailingWindowObserver(end_year)])

### Action space
We can now build a new action space that uses manure fertilization instead of the classic mineral nitrogen fertilization used in the standard environments of CyclesGym. Manure fertilization is characterized by both organic C and N. The rate are taken by a standard manure fertilization given by Cycles documentation.

In [None]:
from cyclesgym.envs.implementers import Fertilizer
from cyclesgym.managers import OperationManager
from pathlib import Path

class ManureFertilizer(Fertilizer):
    def __init__(self, operation_manager: OperationManager,
                 operation_fname: Path,
                 start_year: int,
                 rate: [float] = np.array([0.26, 0.0087, 0.0027])):

        nutrients = ['C_Organic', 'N_Organic', 'N_NH4']
        super(ManureFertilizer, self).__init__(operation_manager,
                                               operation_fname,
                                               nutrients,
                                               start_year)
        assert np.all(np.logical_and(0 <= rate, rate <= 1)), f'Rate must be in [0, 1]. It is {rate} instead'
        self.rate = rate

    def convert_mass(self, mass):
        #mass in Kg
        mass = mass * sum(self.rate)
        masses = {'C_Organic': mass * self.rate[0],
                  'N_Organic': mass * self.rate[1],
                  'N_NH4': mass * self.rate[2]}
        return masses

    def implement_action(self, date: datetime.date, mass: float):
        #mass in Kg
        masses = self.convert_mass(mass)
        return super(ManureFertilizer, self).implement_action(date, masses)

### Rewards
We can now define a new cost function for the manure fertilization. Let assume a rough cost estimate for manure to be 10$ per ton.

In [None]:
from cyclesgym.envs.utils import date2ydoy
manure_price_dollars_per_kg = 1 * 0.001

class ManureProfitabilityRewarder(object):

    def compute_reward(self, date, delta, action=None):
        #mass in Kg
        manure_kg_per_hectare = action
        assert manure_kg_per_hectare >= 0, f'We cannot have negative fertilization'
        y, doy = date2ydoy(date)
        dollars_per_hectare = manure_kg_per_hectare * manure_price_dollars_per_kg
        return -dollars_per_hectare

The total reward is the compound of crop profitability with the cost of manure application.

In [None]:
from cyclesgym.envs.rewarders import compound_rewarder, CropRewarder
total_reward = lambda season_manager: compound_rewarder([CropRewarder(season_manager, 'CornRM.90'), ManureProfitabilityRewarder()])

### Weather generator
To use a custom weather generative model, we must subclass the `WeatherGenerator` abstract base class in `cyclesgym.env.weather_generator`. In particular, we must implement the abstract method `generate_weather`, which should populate the temporary weather directory automatically created by the generator with `Cycles` compatible [weather files](https://psumodeling.github.io/Cycles/#weather-file-weather). Moreover, it should append to the `weather_list` attribute the names of all such files. For an example, see the `WeatherShuffler` or `FixedWeatherGenerator` classes in the same file as the `WeatherGenerator`.

When these generators are used in the `CylesEnv`, they sample uniformly at random a file from the list that they generated. If you want to change the distribution over such files, you can do so by overriding the method `sample_weather_path` of your generator.


### Environment
We can finally define a new environment that combines all the elements defined so far.

In [None]:
from cyclesgym.envs.corn import Corn

class OrganicCorn(Corn):

    def _init_observer(self, *args, **kwargs):
        end_year = self.ctrl_base_manager.ctrl_dict['SIMULATION_END_YEAR']
        self.observer = trailing_and_cumulative(end_year)

    def _init_rewarder(self, *args, **kwargs):
        self.rewarder = total_reward(self.season_manager)

    def _init_implementer(self, *args, **kwargs):
        self.implementer = ManureFertilizer(operation_manager=self.op_manager,
                                            operation_fname=self.op_file,
                                            start_year=self.ctrl_base_manager.ctrl_dict['SIMULATION_START_YEAR']
                                            )

### Training
Now we can define a configuration file that is logged using wandb.

In [None]:
import wandb
from cyclesgym.utils.paths import PROJECT_PATH

config = dict(start_year=1980, end_year=1980, maxN=50000, delta=7, n_actions=11,
              total_timesteps=100000, n_steps=80, batch_size=80, n_epochs=10,
              verbose=1, device='cpu', n_procs=1)

wandb.init(
    config=config,
    sync_tensorboard=True,
    project='notebook_experiments',
    monitor_gym=True,
    save_code=True,
    dir=PROJECT_PATH,
)

config = wandb.config

Now we use a subset of the configuration file to define a crop planning environment that simulate a multiyear rotation between maize and soybeans.

In [None]:
import gym
from stable_baselines3.common.vec_env import SubprocVecEnv, VecNormalize
from stable_baselines3.common.vec_env import VecMonitor
from cyclesgym.utils.paths import CYCLES_PATH
from cyclesgym.envs.weather_generator import WeatherShuffler

env_conf = {key: config[key] for key in ['start_year', 'end_year', 'maxN', 'delta', 'n_actions']}

weather_generator_class = WeatherShuffler
weather_generator_kwargs = dict(n_weather_samples=100,
                                sampling_start_year=1980,
                                sampling_end_year=2016,
                                base_weather_file=CYCLES_PATH.joinpath('input', 'RockSprings.weather'),
                                target_year_range=np.arange(1980, 2016 + 1))

env_conf.update({'weather_generator_class': weather_generator_class, 
                 'weather_generator_kwargs': weather_generator_kwargs})

def make_env():
    def f():
        env = OrganicCorn(**env_conf)
        env = gym.wrappers.RecordEpisodeStatistics(env)
        return env
    return f

env = SubprocVecEnv([make_env() for _ in range(config['n_procs'])], start_method='fork')
env = VecMonitor(env)
env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=5000., clip_reward=5000.)

We can now define the learning agent. Here we use the PPO model from the stable-baselines3 library fro simplicity.

In [None]:
from stable_baselines3 import PPO

model = PPO('MlpPolicy', env, n_steps=config['n_steps'], batch_size=config['batch_size'],
            n_epochs=config['n_epochs'], verbose=config['verbose'], tensorboard_log=wandb.run.dir,
            device=config['device'])

Now we train the model for a number ot total step specified in the config dictionary.

In [None]:
from wandb.integration.sb3 import WandbCallback
model.learn(total_timesteps=config["total_timesteps"], callback=[WandbCallback()])
model.finish()