<a href="https://colab.research.google.com/github/rosie0520486/superstore/blob/main/tomato.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import numpy as np
import gym
from gym import spaces

class TomatoGreenhouseEnv(gym.Env):
    def __init__(self):
        super().__init__()
        # state variables [Internal Temperature, external, CO2, maturity, production cost, inventory]
        self.observation_space = spaces.Box(
            low=np.array([0, -10, 400, 0, 0, 0]),
            high=np.array([40, 40, 2000, 100, np.inf, np.inf]),
            dtype=np.float32
        )
        # defining agent actions [(0=weak,1=,2=medium,3=strong), Ventilation(0/1), harvest(0/1)]
        self.action_space = spaces.MultiDiscrete([4,2,2])

    def reset(self):
        self.state = np.array([20, 10, 400, 0, 0, 0])  #initialization
        return self.state

    def step(self, action):
        temp, ext_temp, co2, maturity, cost, stock = self.state
        heating_level, ventilation, harvest = action

        # gas heating
        gas_consumption = [0, 1, 2, 3][heating_level]
        cost += gas_consumption * 10  # 비용 계산

        # CO2 control
        temp += 0.5*(ext_temp - temp) + gas_consumption*1.0
        co2 += gas_consumption*50 - ventilation*100

        # maturity
        maturity += max(0, (temp - 15)*0.5)

        reward = 0
        # harvest decision
        if harvest and maturity >= 90:
            harvested_amount = maturity * 0.5  # productivity calculation
            stock += harvested_amount
            reward += harvested_amount * 200  # revenue of sales
            maturity = 0  #initialization

        # reward から　費用マイナス
        reward -= cost

        # state update

        self.state = np.array([temp, ext_temp, co2, maturity, cost, stock])

        done = False
        return self.state, reward, done, {}

!pip install stable-baselines3
!pip install gym
!pip install 'shimmy>=2.0'

import gym
import numpy as np
from stable_baselines3 import PPO

# 이미 정의한 환경 클래스 불러오기
class TomatoGreenhouseEnv(gym.Env):
    def __init__(self):
        super().__init__()
        self.observation_space = gym.spaces.Box(
            low=np.array([0, -10, 400, 0, 0, 0]),
            high=np.array([40, 40, 2000, 100, np.inf, np.inf]),
            dtype=np.float32
        )
        self.action_space = gym.spaces.MultiDiscrete([4,2,2])

    def reset(self):
        self.state = np.array([20, 10, 400, 0, 0, 0])
        return self.state

    def step(self, action):
        temp, ext_temp, co2, maturity, cost, stock = self.state
        heating_level, ventilation, harvest = action
        gas_consumption = [0, 1, 2, 3][heating_level]
        cost += gas_consumption * 10
        temp += 0.5*(ext_temp - temp) + gas_consumption*1.0
        co2 += gas_consumption*50 - ventilation*100
        maturity += max(0, (temp - 15)*0.5)
        reward = 0
        if harvest and maturity >= 90:
            harvested_amount = maturity * 0.5
            stock += harvested_amount
            reward += harvested_amount * 200
            maturity = 0
        reward -= cost
        self.state = np.array([temp, ext_temp, co2, maturity, cost, stock])
        done = False
        return self.state, reward, done, {}

# 환경 생성
env = TomatoGreenhouseEnv()

# PPO 에이전트 모델 생성
model = PPO("MlpPolicy", env, verbose=1)

# 학습 수행 (100,000회 반복)
model.learn(total_timesteps=100000)

# 학습 모델 저장 (나중에 사용가능)
model.save("tomato_greenhouse_agent")

# 저장된 모델 로드
model = PPO.load("tomato_greenhouse_agent")

# 환경 초기화
obs = env.reset()
total_reward = 0

# 시뮬레이션 수행 (200단계 동안)
for step in range(200):
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    total_reward += reward
    print(f"Step:{step+1}, Action:{action}, State:{obs}, Reward:{reward}")

print(f"Total Reward: {total_reward}")

Collecting shimmy>=2.0
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Downloading Shimmy-2.0.0-py3-none-any.whl (30 kB)
Installing collected packages: shimmy
Successfully installed shimmy-2.0.0
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




-----------------------------
| time/              |      |
|    fps             | 581  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 507           |
|    iterations           | 2             |
|    time_elapsed         | 8             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 3.4610275e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -2.77         |
|    explained_variance   | -1.67e-06     |
|    learning_rate        | 0.0003        |
|    loss                 | 3.86e+10      |
|    n_updates            | 10            |
|    policy_gradient_loss | -1.05e-05     |
|    value_loss           | 8.91e+10      |
------------------------------------------