In [111]:
import os
import yaml
import pandas as pd
import numpy as np
import tensorboard as tb

import gym 
import gym_crop
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy


In [2]:
## Notes
### follow this process for tracking info logs of environment

class greedy_agent:
    def __init__(self, env):
        self.env = env
        
    def predict(self, episodes):
        
        ep_rew_mean=[]
        ep_growth_mean=[]
        ep_info=pd.DataFrame()
        
        for i in range(episodes):
            state = env.reset()
            done=False
            score=0
            growth=0
            
            while not done:
                value=[]
                for i in range(1,8):
                    _, reward, _, _ = env.step(i)
                    value.append(reward)
                
                #print(value)
                #print(np.argmax(value))
                
                best_action = np.argmax(value) + 1
                new_state, reward, done, info = env.step(best_action)
                score+=reward
            
            ep_rew_mean.append(score/30)
            
            info = pd.DataFrame(info)
            ep_info = pd.concat([info, ep_info])
            display(info['growth'])
            ep_growth_mean.append(info['growth'].sum())
        
        return ep_rew_mean, ep_growth_mean, ep_info

In [3]:
log_path = os.path.join('Training', 'Logs')
model_path = os.path.join('Training', 'Models')
env_path = os.path.join('Training', 'Env')

def save_envInfo(path, name):
    info.to_csv(os.path.join(path,name))
    
def save_modelInfo(path, model):
    # this is how it would save for stable baseline, look into how to do it for any pytorch model
    torch.save(mode.state_dict(), path)
    
def load_model(path, model):
    # model = theModelCLass()
    model.load_state_dict(torch.load(path))

In [4]:
env = gym.make('fertilization-v0')

In [5]:
# this will be used for replacing training years with testing years
data_dir = '/home/mike97vogt/Desktop/Github/crop-gym/gym-crop/gym_crop/envs/env_data/'
with open(os.path.join(data_dir, 'agro', 'agromanagement_fertilization.yaml')) as file:
    agromanagement = yaml.load(file, Loader=yaml.SafeLoader)

In [108]:
class random_agent:
    def __init__(self, env):
        self.env = env
        
    def predict(self, episodes):
        
        ep_rew_mean=[]
        ep_growth_mean=[]
        ep_info=pd.DataFrame()
        
        for i in range(episodes):
            state = env.reset()
            done=False
            score=0
            
            while not done:
                action=env.action_space.sample()
                new_state, reward, done, info = env.step(action)
                score+=reward
                
            ep_rew_mean.append(score/30)
            info = pd.DataFrame(info)
            ep_info = pd.concat([info, ep_info])
        return ep_rew_mean/episodes, info
                
class standard_practice_agent:
    def __init__(self, env):
        self.env = env
     
    def predict(self, episodes):
        
        ep_rew_mean=[]
        ep_growth_mean=[]
        ep_info=pd.DataFrame()
        
        for i in range(episodes):
            state = env.reset()
            done=False
            score=0
            count=0
            
            while not done:
                if(count == 30%10):
                    action = 3
                    new_state, reward, done, info = env.step(action)
                    score+=reward
                else:
                    continue
                    
            ep_rew_mean.append(score/30)
            info = pd.DataFrame(info)
            ep_info = pd.concat([info, ep_info])
        return ep_rew_mean/episodes, info
        
class greedy_agent:
    def __init__(self, env):
        self.env = env
        
    def predict(self, episodes):
        
        ep_rew_mean=[]
        ep_growth_mean=[]
        ep_info=pd.DataFrame()
        
        for i in range(episodes):
            state = env.reset()
            done=False
            score=0
            growth=0
            
            while not done:
                value=[]
                for i in range(1,8):
                    _, reward, _, _ = env.step(i)
                    value.append(reward)
                
                #print(value)
                #print(np.argmax(value))
                
                best_action = np.argmax(value) + 1
                new_state, reward, done, info = env.step(best_action)
                score+=reward
            
            ep_rew_mean.append(score/30)
            
            info = pd.DataFrame(info)
            ep_info = pd.concat([info, ep_info])
            display(info['growth'])
            ep_growth_mean.append(info['growth'].sum())
        
        return ep_rew_mean, ep_growth_mean, ep_info

In [109]:
n_steps = 2

In [110]:
greedy = greedy_agent(env)
rew, g, info = greedy.predict(n_steps)
save_envInfo(env_path, 'greedy_2.csv')

1996-01-01    NaN
1996-01-02    NaN
1996-01-03    NaN
1996-01-04    NaN
1996-01-05    NaN
             ... 
1996-07-28    NaN
1996-07-29    0.0
1996-07-30    NaN
1996-07-31    NaN
1996-08-01    0.0
Name: growth, Length: 214, dtype: float64

1998-01-01          NaN
1998-01-02          NaN
1998-01-03          NaN
1998-01-04          NaN
1998-01-05          NaN
                ...    
1998-07-28          NaN
1998-07-29          NaN
1998-07-30    59.622483
1998-07-31          NaN
1998-08-01    52.752245
Name: growth, Length: 213, dtype: float64

In [107]:
display(info)

Unnamed: 0,DVS,TGROWTH,LAI,NUPTT,TRAN,TIRRIG,TNSOIL,TRAIN,TRANRF,TRUNOF,...,WC,WLVD,WLVG,WRT,WSO,WST,growth,baseline_growth,fertilizer,reward
1986-01-01,0.000000,0.000000,0.052800,0.000000,0.000000,0.0,0.000000,0.00,0.00,0.0,...,0.000000,0.000000,2.400000,3.600000,0.000000,0.000000,,,,
1986-01-02,0.000000,0.000000,0.052800,0.000000,0.000762,0.0,0.000000,0.53,0.53,0.0,...,0.205300,0.000000,2.400000,3.600000,0.000000,0.000000,,,,
1986-01-03,0.000741,0.040647,0.052800,0.000000,0.003435,0.0,0.100000,8.37,1.00,0.0,...,0.269368,0.000000,2.410518,3.623055,0.000000,0.007074,,,,
1986-01-04,0.000741,0.086663,0.052800,0.013019,0.004911,0.0,0.186981,14.35,1.00,0.0,...,0.306015,0.000000,2.422452,3.649110,0.000000,0.015101,,,,
1986-01-05,0.000741,0.188068,0.052800,0.014941,0.008285,0.0,0.285059,15.10,1.00,0.0,...,0.297727,0.000000,2.448751,3.706528,0.000000,0.032789,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1986-07-28,1.631571,2047.317317,1.574973,47.333989,1.965667,0.0,116.166011,460.56,1.00,0.0,...,0.245019,200.713922,74.914468,85.502330,560.458120,996.023117,,,,
1986-07-29,1.655938,2062.318620,1.515555,47.520759,1.667254,0.0,116.079241,460.74,1.00,0.0,...,0.242956,203.540195,72.088195,83.052141,575.344541,996.023117,,,,
1986-07-30,1.677956,2074.158589,1.462399,47.690091,1.596343,0.0,116.009909,461.09,1.00,0.0,...,0.241226,206.068568,69.559822,80.645711,587.099377,996.023117,79.373944,41.024531,,-61.650586
1986-07-31,1.697797,2088.381647,1.415134,47.887192,1.939922,0.0,115.912808,461.17,1.00,0.0,...,0.239607,208.316742,67.311648,78.322598,601.226176,996.023117,,,,


In [79]:
rand = random_agent(env)
rew, info = rand.predict(n_steps)
save_envInfo(env_path, 'rando_20.csv')

In [80]:
stand_prac = standard_practice_agent(env)
rew, info = stand_prac.predict(n_steps)
save_envInfo(env_path, 'SPA_20.csv')

In [11]:
for year in test_years:
    # their whole process to replace a year,
    # model.predict(env, verbose=1, tensorboard_log=log)

SyntaxError: unexpected EOF while parsing (<ipython-input-11-210434bccdd5>, line 3)