In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTCheetah import GPTCheetahEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTCheetah_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")
        messages.append({"role": "assistant", "content": initial_code})

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]
            # print(diverse_messages)
            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )
            diverse_code = self.extract_code(response.choices[0].message.content)
            messages.append({"role": "assistant", "content": diverse_code})

            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files


    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_cheetah_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = cheetah_design(parameter)  
            filename = f"GPTCheetah_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_cheetah_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})
        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )
        xml_file = cheetah_design(initial_parameter['parameters'])  

        filename = f"GPTCheetah_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_cheetah_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = cheetah_design(diverse_parameter['parameters'])  
            filename = f"GPTCheetah_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts

        for rewardfunc_file, fitness in zip(rewardfunc_list, fitness_list):
            with open(rewardfunc_file, "r") as fp:
                reward_content = fp.read()
            reward_improve_prompts += f"\nreward function:\n{reward_content}\nfitness: {fitness}\n"

        with open(best_rewardfunc, "r") as fp:
            best_reward_content = fp.read()
        reward_improve_prompts += f"\nbest reward function:\n{best_reward_content}\nbest fitness: {max(fitness_list)}\n"

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content":reward_improve_prompts+ zeroshot_rewardfunc_format}
        ]
        print(messages)
        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTrewardfunc_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = cheetah_design(parameter)  
        filename = f"GPTCheetah_refine2_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/Div_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 26
rewardfunc_nums = 6

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [5]:
logging.info(f"start!")

In [8]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

In [9]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/Div_m50_r10\env\GPTrewardfunc_0.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_1.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_2.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_3.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_4.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_5.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_6.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_7.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_8.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_9.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_10.py


# enter coarse optimization stage

In [4]:
morphology_list = [f'results/Div_m25_r5/assets/GPTCheetah_{i}.xml' for i in range(0,26) ]
rewardfunc_list = [f'results/Div_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,6)]

parameter_list = np.array([[-0.4,
  0.5,
  0.8,
  0.2,
  0.3,
  -0.3,
  0.2,
  -0.4,
  0.15,
  -0.5,
  0.2,
  -0.25,
  0.15,
  -0.35,
  0.1,
  -0.45,
  0.08,
  0.04,
  0.05,
  0.04,
  0.03,
  0.05,
  0.04,
  0.03],
 [-0.6,
  0.6,
  1.1,
  0.25,
  0.4,
  -0.35,
  0.25,
  -0.45,
  0.2,
  -0.55,
  0.3,
  -0.3,
  0.25,
  -0.4,
  0.15,
  -0.5,
  0.07,
  0.03,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.9,
  0.8,
  1.4,
  0.3,
  0.6,
  -0.6,
  0.5,
  -0.75,
  0.35,
  -0.95,
  0.4,
  -0.5,
  0.4,
  -0.7,
  0.25,
  -0.85,
  0.09,
  0.045,
  0.065,
  0.055,
  0.045,
  0.065,
  0.055,
  0.045],
 [-0.5,
  0.7,
  1.0,
  0.35,
  0.55,
  -0.6,
  0.4,
  -0.8,
  0.2,
  -1.0,
  -0.45,
  -0.6,
  0.3,
  -0.9,
  0.15,
  -1.1,
  0.1,
  0.05,
  0.07,
  0.06,
  0.04,
  0.07,
  0.06,
  0.04],
 [-0.8,
  0.9,
  1.3,
  0.4,
  0.7,
  -0.7,
  0.6,
  -0.9,
  0.3,
  -1.2,
  0.5,
  -0.65,
  0.5,
  -1.0,
  0.2,
  -1.3,
  0.12,
  0.06,
  0.08,
  0.07,
  0.05,
  0.08,
  0.07,
  0.05],
 [-1.0,
  1.0,
  1.5,
  0.5,
  0.8,
  -0.8,
  0.5,
  -1.0,
  0.25,
  -1.25,
  -0.3,
  -0.6,
  0.4,
  -0.8,
  0.2,
  -1.0,
  0.08,
  0.045,
  0.09,
  0.075,
  0.055,
  0.09,
  0.075,
  0.055],
 [-0.3,
  0.4,
  0.9,
  0.15,
  0.25,
  -0.25,
  0.15,
  -0.35,
  0.1,
  -0.45,
  0.35,
  -0.2,
  0.2,
  -0.3,
  0.05,
  -0.4,
  0.05,
  0.025,
  0.035,
  0.03,
  0.02,
  0.035,
  0.03,
  0.02],
 [-0.7,
  0.6,
  1.2,
  0.25,
  0.5,
  -0.45,
  0.3,
  -0.6,
  0,
  -0.7,
  -0.2,
  -0.4,
  0.1,
  -0.55,
  -0.1,
  -0.65,
  0.06,
  0.03,
  0.045,
  0.04,
  0.035,
  0.045,
  0.04,
  0.035],
 [-0.6,
  0.75,
  1.25,
  0.2,
  0.2,
  -0.2,
  0.05,
  -0.25,
  0.1,
  -0.3,
  0.15,
  -0.15,
  0.05,
  -0.2,
  0.02,
  -0.25,
  0.04,
  0.02,
  0.03,
  0.025,
  0.02,
  0.03,
  0.025,
  0.02],
 [-0.2,
  0.3,
  0.5,
  0.1,
  0.1,
  -0.1,
  0,
  -0.15,
  -0.05,
  -0.2,
  -0.1,
  -0.15,
  0.05,
  -0.25,
  0.1,
  -0.3,
  0.02,
  0.01,
  0.025,
  0.02,
  0.015,
  0.025,
  0.02,
  0.015],
 [-1.2,
  1.2,
  2.0,
  0.6,
  0.9,
  -0.5,
  0.7,
  -1.0,
  0.4,
  -1.4,
  -0.6,
  -0.3,
  0.2,
  -0.5,
  0.0,
  -0.8,
  0.15,
  0.07,
  0.1,
  0.08,
  0.06,
  0.1,
  0.08,
  0.06],
 [-0.8,
  1.1,
  1.8,
  0.3,
  0.3,
  -0.2,
  -0.2,
  -0.6,
  0.1,
  -1.0,
  -0.5,
  -0.7,
  0.3,
  -0.9,
  0.1,
  -1.1,
  0.06,
  0.03,
  0.045,
  0.04,
  0.03,
  0.045,
  0.04,
  0.03],
 [-0.5,
  0.8,
  1.6,
  0.25,
  0.2,
  -0.2,
  0.1,
  -0.35,
  0,
  -0.5,
  -0.3,
  -0.3,
  0.2,
  -0.4,
  0.15,
  -0.55,
  0.05,
  0.025,
  0.035,
  0.03,
  0.025,
  0.035,
  0.03,
  0.025],
 [-0.9,
  0.9,
  1.5,
  0.2,
  0.3,
  0.0,
  -0.1,
  -0.2,
  0.1,
  -0.3,
  0.2,
  0.1,
  0.1,
  -0.1,
  0.15,
  -0.25,
  0.07,
  0.03,
  0.045,
  0.035,
  0.025,
  0.045,
  0.035,
  0.025],
 [-0.6,
  1.3,
  2.0,
  0.4,
  0.8,
  -0.6,
  -0.2,
  -0.7,
  0,
  -0.9,
  -0.7,
  -0.2,
  0.5,
  -0.3,
  0.8,
  -0.4,
  0.05,
  0.02,
  0.04,
  0.03,
  0.025,
  0.04,
  0.03,
  0.025],
 [-0.25,
  0.55,
  0.85,
  0.2,
  0.15,
  -0.25,
  0.05,
  -0.45,
  -0.02,
  -0.65,
  0.1,
  -0.15,
  0.07,
  -0.3,
  0.01,
  -0.5,
  0.03,
  0.015,
  0.02,
  0.017,
  0.013,
  0.02,
  0.017,
  0.013],
 [-0.1,
  0.2,
  0.6,
  0.1,
  0.8,
  -0.5,
  0.8,
  -0.8,
  0.9,
  -0.9,
  -0.3,
  -0.2,
  -0.3,
  -0.5,
  -0.2,
  -0.7,
  0.035,
  0.02,
  0.03,
  0.025,
  0.02,
  0.03,
  0.025,
  0.02],
 [-0.15,
  0.15,
  0.45,
  0.05,
  0.05,
  -0.05,
  -0.05,
  -0.1,
  -0.1,
  -0.15,
  -0.1,
  -0.05,
  0.1,
  -0.1,
  0.15,
  -0.15,
  0.01,
  0.005,
  0.01,
  0.007,
  0.005,
  0.01,
  0.007,
  0.005],
 [-0.7,
  0.7,
  0.9,
  0.1,
  -0.3,
  -0.6,
  -0.5,
  -0.9,
  -0.85,
  -1.2,
  0.4,
  -0.25,
  0.5,
  -0.55,
  0.6,
  -0.8,
  0.05,
  0.02,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-1.0,
  1.5,
  2.2,
  0.3,
  1.2,
  -0.3,
  0.6,
  -0.5,
  -0.2,
  -0.8,
  0.5,
  -0.1,
  0.3,
  -0.2,
  0.05,
  -0.4,
  0.08,
  0.04,
  0.07,
  0.06,
  0.05,
  0.07,
  0.06,
  0.05],
 [-0.5,
  0.4,
  0.7,
  0.2,
  0.1,
  -0.15,
  -0.1,
  -0.35,
  0.05,
  -0.45,
  -0.2,
  -0.25,
  0.3,
  -0.35,
  0.25,
  -0.4,
  0.03,
  0.015,
  0.02,
  0.017,
  0.015,
  0.02,
  0.017,
  0.015],
 [-0.8,
  0.9,
  1.8,
  0.4,
  -0.2,
  -0.3,
  -0.15,
  -0.45,
  -0.1,
  -0.65,
  0.25,
  -0.15,
  0.2,
  -0.35,
  0.1,
  -0.55,
  0.04,
  0.02,
  0.03,
  0.025,
  0.02,
  0.03,
  0.025,
  0.02],
 [-0.6,
  0.8,
  1.3,
  0.4,
  0.5,
  -0.4,
  0.4,
  -0.6,
  0.3,
  -0.7,
  0.4,
  -0.3,
  0.4,
  -0.4,
  0.3,
  -0.5,
  0.06,
  0.03,
  0.04,
  0.03,
  0.02,
  0.04,
  0.03,
  0.02],
 [-0.4,
  0.6,
  1.0,
  0.2,
  -0.1,
  -0.2,
  -0.15,
  -0.3,
  -0.1,
  -0.4,
  -0.2,
  -0.1,
  -0.15,
  -0.25,
  -0.1,
  -0.35,
  0.035,
  0.015,
  0.02,
  0.015,
  0.01,
  0.02,
  0.015,
  0.01],
 [-0.2,
  0.3,
  0.6,
  0.1,
  0.05,
  -0.03,
  -0.02,
  -0.05,
  -0.03,
  -0.07,
  -0.04,
  -0.02,
  0.02,
  -0.03,
  0.03,
  -0.05,
  0.02,
  0.01,
  0.018,
  0.015,
  0.012,
  0.018,
  0.015,
  0.012],
 [-0.3,
  0.2,
  0.6,
  -0.1,
  0,
  -0.2,
  0,
  -0.3,
  0.2,
  -0.4,
  -0.3,
  -0.1,
  0.1,
  -0.2,
  0.2,
  -0.3,
  0.025,
  0.015,
  0.02,
  0.015,
  0.01,
  0.02,
  0.015,
  0.01],
 [-1.2,
  2.3,
  3.5,
  0.8,
  1.5,
  -0.8,
  1.3,
  -1.0,
  1.7,
  -1.3,
  -1.4,
  -1.0,
  -1.3,
  -1.4,
  -1.7,
  -1.8,
  0.15,
  0.07,
  0.12,
  0.1,
  0.08,
  0.12,
  0.1,
  0.08],
 [-0.6,
  1.2,
  1.8,
  0.15,
  -0.5,
  -0.45,
  -0.6,
  -0.85,
  -0.8,
  -1.2,
  0.2,
  -0.15,
  0.1,
  -0.25,
  0.05,
  -0.35,
  0.03,
  0.02,
  0.04,
  0.025,
  0.02,
  0.04,
  0.025,
  0.02],
 [-0.8,
  0.5,
  1.0,
  0.3,
  0.2,
  -0.1,
  0.3,
  -0.6,
  0.1,
  -0.9,
  0.4,
  -0.2,
  0.2,
  -0.4,
  0.05,
  -0.6,
  0.06,
  0.03,
  0.045,
  0.04,
  0.03,
  0.045,
  0.04,
  0.03],
 [-0.2,
  0.3,
  0.9,
  0.1,
  0.6,
  -0.3,
  0.8,
  -0.6,
  1.0,
  -0.8,
  1.0,
  -0.2,
  1.2,
  -0.4,
  1.5,
  -0.6,
  0.02,
  0.01,
  0.03,
  0.02,
  0.015,
  0.03,
  0.02,
  0.015],
 [-0.1,
  0.2,
  0.4,
  0.3,
  0.5,
  -0.2,
  0.5,
  -0.4,
  0.5,
  -0.6,
  0.4,
  -0.3,
  0.4,
  -0.5,
  0.3,
  -0.6,
  0.02,
  0.01,
  0.015,
  0.013,
  0.011,
  0.015,
  0.013,
  0.011],
 [-0.5,
  0.9,
  1.7,
  0.3,
  0.6,
  -0.1,
  0.6,
  -0.5,
  1.0,
  -0.8,
  0.3,
  -0.1,
  0.3,
  -0.5,
  0.6,
  -0.8,
  0.04,
  0.02,
  0.035,
  0.03,
  0.025,
  0.035,
  0.03,
  0.025],
 [-0.35,
  0.6,
  1.25,
  0.4,
  0.45,
  -0.2,
  0.2,
  -0.4,
  -0.05,
  -0.6,
  0.6,
  -0.35,
  0.15,
  -0.65,
  -0.1,
  -0.85,
  0.035,
  0.015,
  0.03,
  0.025,
  0.02,
  0.03,
  0.025,
  0.02],
 [-0.9,
  1.0,
  2.2,
  0.5,
  -1.1,
  -0.9,
  -1.4,
  -1.2,
  -1.8,
  -1.5,
  0.8,
  -1.0,
  1.3,
  -1.3,
  1.8,
  -1.6,
  0.09,
  0.04,
  0.07,
  0.06,
  0.05,
  0.07,
  0.06,
  0.05],
 [-0.2,
  0.3,
  0.7,
  0.15,
  0.05,
  -0.05,
  0.02,
  -0.08,
  0.08,
  -0.13,
  0.05,
  -0.03,
  0.08,
  -0.07,
  0.12,
  -0.11,
  0.025,
  0.012,
  0.018,
  0.016,
  0.014,
  0.018,
  0.016,
  0.014],
 [-0.6,
  0.9,
  1.4,
  0.3,
  0.8,
  -0.7,
  0.3,
  -1.2,
  0.5,
  -1.6,
  0.7,
  -0.2,
  0.3,
  -0.6,
  0.1,
  -1.0,
  0.06,
  0.025,
  0.045,
  0.035,
  0.03,
  0.045,
  0.035,
  0.03],
 [-0.55,
  0.55,
  0.75,
  0.21,
  0.6,
  -0.6,
  0.2,
  -0.8,
  -0.1,
  -1.0,
  0.5,
  -0.5,
  0.2,
  -0.7,
  0.15,
  -0.9,
  0.045,
  0.022,
  0.038,
  0.032,
  0.025,
  0.038,
  0.032,
  0.025],
 [-0.3,
  1.2,
  2.4,
  0.5,
  1.0,
  -0.6,
  0.9,
  -0.8,
  1.2,
  -1.1,
  0.6,
  -0.4,
  1.1,
  -0.6,
  1.4,
  -0.9,
  0.07,
  0.05,
  0.09,
  0.07,
  0.06,
  0.09,
  0.07,
  0.06],
 [-0.8,
  0.4,
  0.5,
  0.05,
  -0.3,
  -0.6,
  -0.6,
  -0.8,
  -0.85,
  -1.1,
  0.3,
  -0.45,
  0.5,
  -0.6,
  0.65,
  -0.75,
  0.02,
  0.01,
  0.02,
  0.015,
  0.012,
  0.02,
  0.015,
  0.012],
 [-0.2,
  0.35,
  0.6,
  0.04,
  0.7,
  -0.3,
  0.5,
  -0.4,
  -0.45,
  -0.55,
  -0.4,
  -0.45,
  0.1,
  -0.5,
  0.05,
  -0.55,
  0.02,
  0.008,
  0.015,
  0.012,
  0.01,
  0.015,
  0.012,
  0.01],
 [-0.45,
  0.45,
  0.75,
  0.12,
  -0.225,
  -0.225,
  -0.075,
  -0.225,
  0.075,
  -0.3,
  0.225,
  -0.075,
  0.15,
  -0.225,
  0.3,
  -0.3,
  0.03,
  0.015,
  0.02,
  0.018,
  0.015,
  0.02,
  0.018,
  0.015],
 [-0.5,
  0.3,
  0.75,
  0.1,
  0.6,
  -0.4,
  0.7,
  -0.6,
  0.9,
  -0.8,
  -0.6,
  -0.5,
  -0.7,
  -0.7,
  -0.9,
  -0.9,
  0.025,
  0.015,
  0.03,
  0.02,
  0.015,
  0.03,
  0.02,
  0.015],
 [-0.7,
  0.8,
  1.8,
  0.6,
  1,
  -0.5,
  0.9,
  -0.8,
  0.6,
  -1,
  -0.8,
  -0.4,
  -0.7,
  -0.7,
  -0.9,
  -0.9,
  0.02,
  0.01,
  0.03,
  0.02,
  0.015,
  0.03,
  0.02,
  0.015],
 [-0.7,
  1.0,
  1.2,
  0.2,
  0.3,
  -0.1,
  0.1,
  -0.15,
  -0.05,
  -0.3,
  0.2,
  0,
  0.2,
  -0.1,
  0.15,
  -0.2,
  0.03,
  0.015,
  0.02,
  0.018,
  0.015,
  0.02,
  0.018,
  0.015],
 [-0.5,
  0.9,
  1.4,
  0.2,
  0.1,
  -0.25,
  0.3,
  -0.6,
  0.4,
  -0.9,
  -0.3,
  -0.2,
  0.2,
  -0.5,
  0.15,
  -0.8,
  0.04,
  0.02,
  0.03,
  0.02,
  0.015,
  0.03,
  0.02,
  0.015],
 [-0.8,
  1.5,
  2.5,
  0.2,
  0.9,
  -0.2,
  1.2,
  -0.4,
  1.5,
  -0.5,
  0.7,
  -0.1,
  1.0,
  -0.3,
  1.3,
  -0.4,
  0.06,
  0.035,
  0.07,
  0.05,
  0.04,
  0.07,
  0.05,
  0.04],
 [-0.6,
  0.2,
  0.8,
  0.25,
  -0.6,
  -0.8,
  -0.8,
  -1.2,
  -1.0,
  -1.5,
  -0.4,
  -0.5,
  0.2,
  -0.8,
  0.45,
  -1.2,
  0.04,
  0.02,
  0.025,
  0.02,
  0.015,
  0.025,
  0.02,
  0.015],
 [-0.3,
  0.6,
  1.2,
  0.5,
  0.4,
  -0.3,
  0.3,
  -0.4,
  0.1,
  -0.5,
  -0.4,
  -0.2,
  -0.2,
  -0.3,
  -0.1,
  -0.4,
  0.03,
  0.015,
  0.02,
  0.018,
  0.015,
  0.02,
  0.018,
  0.015],
 [-0.8,
  1.4,
  2.1,
  0.5,
  0.9,
  -0.4,
  0.7,
  -0.6,
  0.2,
  -0.7,
  0.6,
  -0.3,
  0.5,
  -0.4,
  0.1,
  -0.5,
  0.045,
  0.02,
  0.032,
  0.025,
  0.015,
  0.032,
  0.025,
  0.015],
 [-0.1,
  0.24,
  0.25,
  0.05,
  -0.08,
  -0.07,
  0.12,
  -0.23,
  0.2,
  -0.5,
  0,
  -0.1,
  0.15,
  -0.2,
  0.2,
  -0.45,
  0.02,
  0.01,
  0.015,
  0.012,
  0.01,
  0.015,
  0.012,
  0.01],
 [-0.6,
  0.4,
  0.5,
  0.1,
  0,
  -0.1,
  -0.1,
  -0.25,
  0.03,
  -0.4,
  -0.2,
  -0.2,
  -0.1,
  -0.3,
  0,
  -0.5,
  0.04,
  0.02,
  0.025,
  0.02,
  0.015,
  0.025,
  0.02,
  0.015]])

material_list = [compute_cheetah_volume(parameter) for parameter in parameter_list]
parameter_list.shape

(51, 24)

In [11]:
parameter_list[17]

array([-0.15 ,  0.15 ,  0.45 ,  0.05 ,  0.05 , -0.05 , -0.05 , -0.1  ,
       -0.1  , -0.15 , -0.1  , -0.05 ,  0.1  , -0.1  ,  0.15 , -0.15 ,
        0.01 ,  0.005,  0.01 ,  0.007,  0.005,  0.01 ,  0.007,  0.005])

In [9]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [34]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        if i not in [0,1,2,3,4]:
            continue
        if j not in [50]:
            continue
        # if j < 24:
        #     continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTCheetah.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/Div_m50_r10/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Div_m25_r5/env/GPTrewardfunc_0.py
50 results/Div_m25_r5/assets/GPTCheetah_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
1 results/Div_m25_r5/env/GPTrewardfunc_1.py
50 results/Div_m25_r5/assets/GPTCheetah_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
2 results/Div_m25_r5/env/GPTrewardfunc_2.py
50 results/Div_m25_r5/assets/GPTCheetah_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
5

In [35]:
fitness_matrix

array([[None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, 80.18560408659073],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, 142.23841964168602],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, N

In [5]:
fitness_matrix = np.array([[61.130376113245546, 69.13500935074738, 53.738925662756635,
        54.052825155044765, 57.2120730444868, 64.47912499697043,
        70.3120784841065, 85.20261983252783, 56.91522907153349,
        29.721927445327438, 55.87626658590905, 6.446482892420793,
        32.16432739550953, 218.27235483633405, 113.10753708344508,
        28.364383339526707, 113.59105282833292, 24.967713123138623,
        4.419828310390766, 11.41473110127369, 91.30389354420298,
        50.458170099176705, 37.69628666540019, 35.69674715742348,
        86.79309079918623],
       [13.77590920053392, 78.112288482573, 44.89133472806593,
        46.080923070941054, 59.16311851105932, 5.811381406855803,
        74.01937760338694, 35.09980448034824, 63.664373097458686,
        10.595142744697485, 58.504974081220524, 31.4950968874108,
        184.79349285514456, 210.90418784166806, 218.186796388248,
        16.405462944347875, 106.51206755729005, 35.46140062498762,
        5.100484870945221, 13.593095735999034, 42.29856872724528,
        32.659595944986926, 32.710782273008306, 75.3164945027813,
        85.36374941958339],
       [50.63944289532069, 68.40169679535067, 114.16960551251796,
        60.70235453444291, 60.43345873617575, 92.42293349949581,
        69.14416303032715, 43.11152042039319, 33.35108915286392,
        33.23365735450172, 56.89037415753991, 13.811869134978997,
        39.81339656647702, 226.99471895059168, 207.02322105130312,
        23.229479366813372, 98.8634275841195, 14.038628642588794,
        5.29756759273172, 14.53423708836796, 45.95359991303092,
        50.88450585929344, 33.17836414365374, 80.44905367103134,
        86.4555334032978],
       [38.494846254393394, 73.7078833140323, 34.92647775016722,
        15.521268333762352, 62.18719872517682, 76.77891522060017,
        46.27078915882296, 27.845771569827942, 22.968698339020204,
        43.89775710519532, 56.13490517466529, 17.086841439563003,
        215.0717772981808, 205.83296984022456, 192.54788543661394,
        22.285789032289866, 113.16054992427806, 28.31268882070819,
        14.940100961615249, 13.249064976762911, 51.26670419585611,
        2.9561521060538043, 34.814434832993825, 55.32472176007084,
        80.58138743800484],
       [63.167854915994475, 73.24513405730413, 42.31394672683599,
        45.69852368683235, 66.46284510430696, 100.84208677275872,
        51.23364480060153, 56.731945169412086, 59.78141265507969,
        24.996618439268467, 56.11464407726764, 12.364644578037009,
        79.7155876797246, 203.05798885980886, 167.66555623163137,
        15.258433374404085, 68.74754683700681, 17.517908177336302,
        28.788136492843282, 19.35701747486473, 70.19998025197651,
        69.07688219236738, 45.7683863069565, 71.40011929312966,
        86.99677276461763]], dtype=object)

In [6]:
efficiency_matrix = np.array([[1557.4271117182284, 1362.8625160056695, 496.0734725263708,
        484.43791872323783, 287.5825029894196, 385.95898770393666,
        4807.761383252813, 2163.8101878652215, 4696.069707294241,
        11600.23005794048, 161.42493091129646, 126.72404583479235,
        1474.6658493987932, 5360.3128171236585, 3245.336826155721,
        5641.877054690387, 9369.409849195483, 91136.99070435333,
        79.85597450780647, 95.91145449444589, 17052.34855749708,
        3024.431065455483, 1156.8054936870435, 5836.850981194829,
        73679.72488611392],
       [350.9707585920008, 1539.8321488933059, 414.3998048016239,
        412.99129881371005, 297.3896382471614, 34.78575252156308,
        5061.257083083001, 891.3964696853099, 5252.940887949217,
        4135.19929221588, 169.0191914398564, 619.1261449284162,
        8472.387740420978, 5179.3660361635575, 6260.322376017509,
        3263.162955082037, 8785.508981395134, 129440.98336854727,
        92.15384879862981, 114.21500616658795, 7899.881477354894,
        1957.5956949507777, 1003.8127355113864, 12315.160059263064,
        72466.33936604827],
       [1290.1481440483194, 1348.4066823599958, 1053.9197046660013,
        544.0330307974865, 303.7751370089884, 553.2249678600585,
        4727.902290211583, 1094.862426570513, 2751.794941898386,
        12970.830094654328, 164.3546585879916, 271.5117633187356,
        1825.3593660830459, 5574.515848892534, 5940.011607285778,
        4620.508229043523, 8154.620888422908, 51243.714704322454,
        95.71467327000397, 122.12287847559935, 8582.512451228578,
        3049.9853634943784, 1018.1616628079197, 13154.395715249459,
        73393.16825086427],
       [980.7385628865245, 1453.007850091922, 322.41245776612544,
        139.1063446912353, 312.5904955302117, 459.58304175097845,
        3163.879067245993, 707.17267063141, 1895.1449417920467,
        17132.94275361377, 162.17213037310697, 335.8907041085607,
        9860.58254075005, 5054.827521554374, 5524.678191382143,
        4432.801527249592, 9333.900378608441, 103346.80013111926,
        269.9327299049138, 111.32431253426198, 9574.813027207883,
        177.18990296690637, 1068.3686123222897, 9046.262816763281,
        68406.53331623513],
       [1609.3362431306361, 1443.885646707169, 390.6074829408978,
        409.56411880549956, 334.0824817872345, 603.6203148010338,
        3503.226533849399, 1440.7674456052346, 4932.558220506031,
        9755.979826660156, 162.11359664407598, 243.06242836389112,
        3654.7897728605008, 4986.679788747119, 4810.742220608724,
        3035.0106369352457, 5670.551741571816, 63943.759159826426,
        520.1343881379173, 162.64594270481956, 13110.88153547992,
        4140.424989585846, 1404.5182006129587, 11674.78522666635,
        73852.63301784496]], dtype=object)
mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)


平均值： 9557.069239403296
标准差： 21978.06357882543


In [26]:
none_coords = np.argwhere(efficiency_matrix == None)
print(none_coords)

[]


In [9]:
efficiency_matrix_select = efficiency_matrix[:5, :25]
efficiency_matrix_select.shape

(5, 25)

# print coarse optimization info

In [None]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [10]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)


top_k = max(1, int(len(sorted_values) * 0.1))

efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [11]:
coarse_best = efficiency_coarse_best
coarse_best

[(1, 17),
 (3, 17),
 (0, 17),
 (4, 24),
 (0, 24),
 (2, 24),
 (1, 24),
 (3, 24),
 (4, 17),
 (2, 17),
 (3, 9),
 (0, 20)]

# enter fine optimization stage

In [18]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_cheetah_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()

        # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTCheetah.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_cheetah_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            improved = False
            
        
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTCheetah.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_cheetah_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
        
            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

[-0.9, 0.8, 1.4, 0.4, 0.7, -0.6, 0.6, -0.8, 0.3, -1.1, 0.5, -0.5, 0.4, -0.8, 0.2, -1.2, 0.1, 0.05, 0.07, 0.06, 0.04, 0.07, 0.06, 0.04]
Successfully saved GPTCheetah_refine2_1_17_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[{'role': 'system', 'content': 'You are a reinforcement learning reward function designer'}, {'role': 'user', 'content': '\nYou are a reward engineer trying to write reward functions to solve reinforcement learning tasks as effectively as possible.\nYour goal is to write a reward function for the enviroment that will help the agent learn the task described in text.\nTask Description: The HalfCheetah is a 2-dimensional robot consisting of 9 body parts and 8 joints connecting them (including two paws). The goal is to apply

In [15]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [19]:
final_optimized_results

[{'best_morphology': 'results/Div_m25_r5/assets/GPTCheetah_17.xml',
  'best_parameter': array([-0.15 ,  0.15 ,  0.45 ,  0.05 ,  0.05 , -0.05 , -0.05 , -0.1  ,
         -0.1  , -0.15 , -0.1  , -0.05 ,  0.1  , -0.1  ,  0.15 , -0.15 ,
          0.01 ,  0.005,  0.01 ,  0.007,  0.005,  0.01 ,  0.007,  0.005]),
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_1.py',
  'best_fitness': 35.46140062498762,
  'best_material': 0.0002739580595121185,
  'best_efficiency': 129440.98336854727,
  'best_iteration': 0},
 {'best_morphology': 'results/Div_m25_r5/assets/GPTCheetah_17.xml',
  'best_parameter': array([-0.15 ,  0.15 ,  0.45 ,  0.05 ,  0.05 , -0.05 , -0.05 , -0.1  ,
         -0.1  , -0.15 , -0.1  , -0.05 ,  0.1  , -0.1  ,  0.15 , -0.15 ,
          0.01 ,  0.005,  0.01 ,  0.007,  0.005,  0.01 ,  0.007,  0.005]),
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 28.31268882070819,
  'best_material': 0.0002739580595121185,
  'best_efficiency': 103346.800

In [2]:
# robodesign best
parameter =  [-0.15, 0.15, 0.45, 0.05, 0.05, -0.05, -0.05, -0.1, -0.1, -0.15, -0.1, -0.05, 0.1, -0.1, 0.15, -0.15, 0.01, 0.005, 0.01, 0.007, 0.005, 0.01, 0.007, 0.005]
xml_file = cheetah_design(parameter)  
filename = r"results/Div_m25_r5/assets/GPTCheetah_refine_2_17_0.xml"
with open(filename, "w") as fp:
    fp.write(xml_file)

morphology = "results/Div_m25_r5/assets/GPTCheetah_refine_2_17_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_refine_4_17_1.py"

morphology_index=9998
rewardfunc_index=9998

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=100, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" robodesign best 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("robodesign best 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50
Run 51
Run 52
Run 53
Run 54
Run 55
Run 56
Run 57
Run 58
Run 59
Run 60
Run 61
Run 62
Run 63
Run 64
Run 65
Run 66
Run 67
Run 68
Run 69
Run 70
Run 71
Run 72
Run 73
Run 74
Run 75
Run 76
Run 77
Run 78
Run 79
Run 80
Run 81
Run 82
Run 83
Run 84
Run 85
Run 86
Run 87
Run 88
Run 89
Run 90
Run 91
Run 92
Run 93
Run 94
Run 95
Run 96
Run 97
Run 98
Run 99
Saved qpos log to /root/autodl-tmp/Cheetah/qpos.txt
Average Fitness: 136.3898, Average Reward: 3745160488064074545979940186820776325620395613421293517422247912744499100172419072.0000
robodesign best 3e6 steps train

fitness:136.38977202537532
efficiency:497849.0951069907


In [32]:
# robodesign best

morphology = "results/Div_m25_r5/assets/GPTCheetah_refine_2_17_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTSwimmer_refine_2_17_1.py"

morphology_index=999
rewardfunc_index=999

parameter = [-0.15, 0.15, 0.45, 0.05, 0.05, -0.05, -0.05, -0.1, -0.1, -0.15, -0.1, -0.05, 0.1, -0.1, 0.15, -0.15, 0.01, 0.005, 0.01, 0.007, 0.005, 0.01, 0.007, 0.005]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)

material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" robodesign best 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("robodesign best 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
robodesign best 3e6 steps train

fitness:123.73312278819954
efficiency:451649.8730081208


2025-04-07 00:50:01,521 - Initial morphology:results/Div_m25_r5/assets/GPTCheetah_17.xml
2025-04-07 00:50:01,522 - Initial parameter:[-0.15   0.15   0.45   0.05   0.05  -0.05  -0.05  -0.1   -0.1   -0.15
 -0.1   -0.05   0.1   -0.1    0.15  -0.15   0.01   0.005  0.01   0.007
  0.005  0.01   0.007  0.005]
2025-04-07 00:50:01,522 - Initial rewardfunc:results/Div_m25_r5/env/GPTrewardfunc_1.py
2025-04-07 00:50:01,522 - Initial fitness:35.46140062498762
2025-04-07 00:50:01,522 - Initial efficiency:129440.98336854727


In [19]:
# robodesign coarse best 

morphology = "results/Div_m25_r5/assets/GPTCheetah_17.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_1.py"

morphology_index=888
rewardfunc_index=888

parameter = [-0.15 ,  0.15  , 0.45  , 0.05 ,  0.05  ,-0.05 , -0.05 , -0.1 ,  -0.1 ,  -0.15,
 -0.1 ,  -0.05  , 0.1 ,  -0.1  ,  0.15 , -0.15  , 0.01  , 0.005 , 0.01 ,  0.007,
  0.005,  0.01 ,  0.007 , 0.005]


shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" robodesign coarse best 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("robodesign coarse best 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
robodesign coarse best 3e6 steps train

fitness:95.46820595833779
efficiency:348477.44990000833


In [3]:
# human

morphology = "results/Div_m25_r5/assets/GPTCheetah_50.xml"
rewardfunc = "results/CheetahEureka_morphology/env/GPTrewardfunc_0.py"

morphology_index=777
rewardfunc_index=777

material_list = [0.021184]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=100, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

# fitness, _ = Eva(model_path)
material = material_list[0]
efficiency = fitness / material

logging.info(" human 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("human 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50
Run 51
Run 52
Run 53
Run 54
Run 55
Run 56
Run 57
Run 58
Run 59
Run 60
Run 61
Run 62
Run 63
Run 64
Run 65
Run 66
Run 67
Run 68
Run 69
Run 70
Run 71
Run 72
Run 73
Run 74
Run 75
Run 76
Run 77
Run 78
Run 79
Run 80
Run 81
Run 82
Run 83
Run 84
Run 85
Run 86
Run 87
Run 88
Run 89
Run 90
Run 91
Run 92
Run 93
Run 94
Run 95
Run 96
Run 97
Run 98
Run 99
Saved qpos log to /root/autodl-tmp/Cheetah/qpos.txt
Average Fitness: 251.5038, Average Reward: 4555.9488
human 3e6 steps train

fitness:251.50381170673592
efficiency:11872.34760700226


2025-04-09 15:03:53,835 - morphology: 50, rewardfunc: 4, material cost: 0.008319517436858105 reward: 2049.3560760933597 fitness: 168.32404032342103 efficiency: 20232.428335048862

In [21]:
# Robodesign (w/o Morphology Design)

morphology = "results/Div_m25_r5/assets/GPTCheetah_50.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_4.py"

morphology_index=666
rewardfunc_index=666

material_list = [0.021184]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = material_list[0]
efficiency = fitness / material

logging.info(" Robodesign (w/o Morphology Design) 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Morphology Design) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Morphology Design) 3e6 steps train

fitness:250.14266670867656
efficiency:11808.094161096891


2025-04-06 15:46:10,818 - morphology: 17, rewardfunc: 0, material cost: 0.0002739580595121185 reward: 222.37654595342295 fitness: 24.967713123138623 efficiency: 91136.99070435333

In [30]:
# Robodesign (w/o Reward Shaping)

morphology = "results/Div_m25_r5/assets/GPTCheetah_17.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_0.py"

morphology_index=555
rewardfunc_index=555
morphology_index=17
rewardfunc_index=0
parameter =  [-0.15,
  0.15,
  0.45,
  0.05,
  0.05,
  -0.05,
  -0.05,
  -0.1,
  -0.1,
  -0.15,
  -0.1,
  -0.05,
  0.1,
  -0.1,
  0.15,
  -0.15,
  0.01,
  0.005,
  0.01,
  0.007,
  0.005,
  0.01,
  0.007,
  0.005]


# [-0.15 ,  0.15  , 0.45  , 0.05 ,  0.05  ,-0.05 , -0.05 , -0.1 ,  -0.1 ,  -0.15,
#  -0.1 ,  -0.05  , 0.1 ,  -0.1  ,  0.15 , -0.15  , 0.01  , 0.005 , 0.01 ,  0.007,
#   0.005,  0.01 ,  0.007 , 0.005]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/coarse/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" Robodesign (w/o Reward Shaping) 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Reward Shaping) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Reward Shaping) 3e6 steps train

fitness:26.528840834301896
efficiency:96835.40933800633


2025-04-07 15:03:56,961 - Final optimized result: rewardfunc_index4 morphology_index3
2025-04-07 15:03:56,961 -   Morphology: results/noDiv_m25_r5/assets/GPTCheetah_refine_4_3_0.xml
2025-04-07 15:03:56,961 -   Parameter: [-0.5, 0.5, 0.8, 0.2, 0.3, -0.4, 0.4, -0.5, 0.5, -0.6, -0.3, -0.5, 0.4, -0.6, 0.6, -0.7, 0.05, 0.04, 0.04, 0.03, 0.03, 0.04, 0.03, 0.03]
2025-04-07 15:03:56,961 -   Rewardfunc: results/noDiv_m25_r5/env/GPTSwimmer_refine_4_3_1.py
2025-04-07 15:03:56,961 -   Fitness: 180.8154850762752
2025-04-07 15:03:56,961 -   Material: 0.02788783835590477
2025-04-07 15:03:56,961 -   Efficiency: 6483.667997809899

In [25]:
# Robodesign (w/o Diversity Reflection)

morphology = "results/noDiv_m25_r5/assets/GPTCheetah_refine_4_3_0.xml"
rewardfunc = "results/noDiv_m25_r5/env/GPTSwimmer_refine_4_3_1.py"

morphology_index=333
rewardfunc_index=333

parameter =  [-0.5, 0.5, 0.8, 0.2, 0.3, -0.4, 0.4, -0.5, 0.5, -0.6, -0.3, -0.5, 0.4, -0.6, 0.6, -0.7, 0.05, 0.04, 0.04, 0.03, 0.03, 0.04, 0.03, 0.03]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" Robodesign (w/o Diversity Reflection) 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Diversity Reflection) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Diversity Reflection) 3e6 steps train

fitness:207.54673485196096
efficiency:7442.195131915505


2025-04-08 19:41:04,459 - iteration:2, morphology: 0, rewardfunc: 5, material cost: 0.021184 reward: 2999.6270393357754 fitness: 173.16370628185507 efficiency: 8174.26861224769

In [4]:
# eureka reward

morphology = "results/eureka/assets/GPTCheetah_0.xml"
rewardfunc = "results/eureka/env/GPTrewardfunc_5_2.py"

morphology_index=222
rewardfunc_index=222

material_list = [0.021184]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=100, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = material_list[0]
efficiency = fitness / material

logging.info(" eureka reward 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print(" eureka reward 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50
Run 51
Run 52
Run 53
Run 54
Run 55
Run 56
Run 57
Run 58
Run 59
Run 60
Run 61
Run 62
Run 63
Run 64
Run 65
Run 66
Run 67
Run 68
Run 69
Run 70
Run 71
Run 72
Run 73
Run 74
Run 75
Run 76
Run 77
Run 78
Run 79
Run 80
Run 81
Run 82
Run 83
Run 84
Run 85
Run 86
Run 87
Run 88
Run 89
Run 90
Run 91
Run 92
Run 93
Run 94
Run 95
Run 96
Run 97
Run 98
Run 99
Saved qpos log to /root/autodl-tmp/Cheetah/qpos.txt
Average Fitness: 253.9435, Average Reward: 4644.6285
 eureka reward 3e6 steps train

fitness:253.94347808925835
efficiency:11987.513127325261


2025-04-08 07:55:18,132 - morphology: 15, rewardfunc: 0, material cost: 0.009341494464891529 reward: 1228.9937270418675 fitness: 78.32647964015682 efficiency: 8384.79109895467

In [None]:
# eureka morphology

morphology = "results/CheetahEureka_morphology/assets/GPTCheetah_15.xml"
rewardfunc = "results/CheetahEureka_morphology/env/GPTrewardfunc_0.py"

morphology_index=111
rewardfunc_index=111

parameter =  [-0.63,
 0.61,
 0.94,
 0.51,
 0.41,
 -0.71,
 0.65,
 -1.1,
 0.81,
 -1.5,
 -0.56,
 -0.6,
 -0.68,
 -1.2,
 0.53,
 -1.6,
 0.029,
 0.02,
 0.018,
 0.013,
 0.012,
 0.018,
 0.012,
 0.012]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=100, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info("eureka morphology 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("eureka morphology 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
