In [8]:
import os
import random
import numpy as np
import json
from itertools import count
import gymnasium as gym

# Java集成
import jnius_config
if not jnius_config.vm_running:
    jnius_config.set_classpath('/home/chaofan/Documents/pyhyflex/hhrl/hyflex/*')
from jnius import autoclass

# 配置路径
problemjson_path = '/home/chaofan/Documents/pyhyflex/hhrl/hyflex/problems_json/'

# 定义 Solution 类
class Solution:
    def __init__(self, id, solution, fitness):
        self.id = id
        self.solution = solution
        self.fitness = fitness

    def __len__(self):
        return len(self.solution)

    def distance(self, other):
        diff = [1 if a != b else 0 for a, b in zip(self.solution, other.solution)]
        diff.extend([1] * abs(len(self) - len(other)))
        return np.mean(diff)

# 定义 RawImprovementPenalty 类
class RawImprovementPenalty:
    def get_reward(self, action, new_fitness, past_fitness):
        return (past_fitness - new_fitness) / past_fitness

# 定义 RandomAgent 类
class RandomAgent:
    def __init__(self, actions):
        self.actions = actions

    def select(self):
        return random.choice(self.actions)

# 定义 HyFlexDomain 类
class HyFlexDomain:
    solution_indexer = count(1)

    def __init__(self, problem_str, instance_id, seed):
        with open(problemjson_path + f'{problem_str}.json', 'r') as json_file:
            self.problem_dict = json.load(json_file)
        ProblemClass = autoclass(self.problem_dict['class'])
        self.problem = ProblemClass(seed)
        self.problem.loadInstance(instance_id)
        self.actions = self.problem_dict['actions']

    def initialise_solution(self, idx=0):
        self.problem.initialiseSolution(idx)

    def get_fitness(self, idx=0):
        return self.problem.getFunctionValue(idx)

    def apply_heuristic(self, llh, src_idx=0, dest_idx=1):
        return self.problem.applyHeuristic(int(llh), int(src_idx), int(dest_idx))

    def accept_solution(self, src_idx=1, dest_idx=0):
        self.problem.copySolution(src_idx, dest_idx)

    def get_solution(self, idx=0):
        solution_str = self.problem.solutionToString(idx)
        id = next(self.solution_indexer)
        return Solution(id, solution_str, self.get_fitness(idx))

# 定义 BinPacking 类
class BinPacking(HyFlexDomain):
    def __init__(self, instance_id, seed):
        super().__init__('BinPacking', instance_id, seed)

    def get_solution(self, idx=0):
        solution_str = self.problem.solutionToString(idx)
        sorted_bins = []
        for bin in solution_str.split('\n')[:-2]:
            items = [float(it.strip('[, ]')) for it in bin.split(',')]
            sorted_bins.append(sorted(items))
        sorted_bins.sort()
        fitness = self.get_fitness(idx)
        id = next(self.solution_indexer)
        return Solution(id, sorted_bins, fitness)

# 定义 HyFlexEnv 类
class HyFlexEnv(gym.Env):
    def __init__(self, problem, instance_id, seed, run_id, iteration_limit=1000):
        super(HyFlexEnv, self).__init__()
        self.problem_instance = BinPacking(instance_id, seed)
        self.actions = self.problem_instance.actions
        self.agent = RandomAgent(self.actions)
        self.credit_assignment = RawImprovementPenalty()
        self.iteration_limit = iteration_limit
        self.action_space = gym.spaces.Discrete(len(self.actions))
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32)

    def reset(self):
        self.problem_instance.initialise_solution()
        self.current_fitness = self.problem_instance.get_fitness()
        self.iteration = 0
        self.done = False
        return np.array([0.0], dtype=np.float32), {}

    def step(self, action):
        fitness = self.problem_instance.apply_heuristic(action)
        reward = self.credit_assignment.get_reward(action, fitness, self.current_fitness)
        if fitness < self.current_fitness:
            self.problem_instance.accept_solution()
            self.current_fitness = fitness
        self.iteration += 1
        if self.iteration >= self.iteration_limit:
            self.done = True
        return np.array([0.0], dtype=np.float32), reward, self.done, False, {}

    def render(self, mode='human'):
        pass

    def close(self):
        pass

# 主程序
if __name__ == "__main__":
    env = HyFlexEnv('BinPacking', 0, 7, 0)
    obs, info = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()
        obs, reward, done, truncated, info = env.step(action)
    env.close()

JavaException: JVM exception occurred: BinPacking/BinPacking java.lang.NoClassDefFoundError