# Training an RL agent with a standard environment
In this notebook, we show how to train an RL agent using the stable-baselines3 library over an environemnt provided by CyclesGym.

In [None]:
from cyclesgym.envs import Corn
from cyclesgym.envs.crop_planning import CropPlanningFixedPlantingRandomWeather
import numpy as np
from cyclesgym.paths import PROJECT_PATH
import wandb
from wandb.integration.sb3 import WandbCallback
import gym
from stable_baselines3 import PPO

First, we show how to initialize the standard one-year corn environment and print the name of its observation.

In [None]:
config = dict(start_year=1980, end_year=1990,
              total_timesteps=1000, n_steps=80, batch_size=80, n_epochs=10, run_id=0,
              norm_reward=True, verbose=1, n_process=8, device='cpu',
              n_weather_samples=50, rotation_crops=['CornRM.100', 'SoybeanMG.3'] )

wandb.init(
    config=config,
    sync_tensorboard=True,
    project='notebook_experiments',
    monitor_gym=True,
    save_code=True,
    dir=PROJECT_PATH,
)

config = wandb.config

In [None]:
env_conf = {key: config[key] for key in ['start_year', 'end_year', 'rotation_crops', 'n_weather_samples']}

env = CropPlanningFixedPlantingRandomWeather(**env_conf)
env = gym.wrappers.RecordEpisodeStatistics(env)

model = PPO('MlpPolicy', env, n_steps=config['n_steps'], batch_size=config['batch_size'],
            n_epochs=config['n_epochs'], verbose=config['verbose'], tensorboard_log=dir,
            device=config['device'])

model.learn(total_timesteps=config["total_timesteps"], callback=[WandbCallback()])