In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTCheetah import GPTCheetahEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTCheetah_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")
        messages.append({"role": "assistant", "content": initial_code})

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]
            # print(diverse_messages)
            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )
            diverse_code = self.extract_code(response.choices[0].message.content)
            messages.append({"role": "assistant", "content": diverse_code})

            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files


    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_cheetah_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = cheetah_design(parameter)  
            filename = f"GPTCheetah_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_cheetah_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})
        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )
        xml_file = cheetah_design(initial_parameter['parameters'])  

        filename = f"GPTCheetah_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_cheetah_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = cheetah_design(diverse_parameter['parameters'])  
            filename = f"GPTCheetah_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts

        for rewardfunc_file, fitness in zip(rewardfunc_list, fitness_list):
            with open(rewardfunc_file, "r") as fp:
                reward_content = fp.read()
            reward_improve_prompts += f"\nreward function:\n{reward_content}\nfitness: {fitness}\n"

        with open(best_rewardfunc, "r") as fp:
            best_reward_content = fp.read()
        reward_improve_prompts += f"\nbest reward function:\n{best_reward_content}\nbest fitness: {max(fitness_list)}\n"

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content":reward_improve_prompts+ zeroshot_rewardfunc_format}
        ]
        print(messages)
        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTrewardfunc_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = cheetah_design(parameter)  
        filename = f"GPTCheetah_refine2_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [12]:

folder_name = "results/Div_m25_r5"
log_file = os.path.join(folder_name, "parameters_fineonly.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 51
rewardfunc_nums = 11

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [6]:
logging.info(f"start!")

# enter coarse optimization stage

In [8]:
morphology_list = [f'results/Div_m25_r5/assets/GPTCheetah_{i}.xml' for i in range(0,51) ]
rewardfunc_list = [f'results/Div_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,11)]

parameter_list = np.array([[-0.4,
  0.5,
  0.8,
  0.2,
  0.3,
  -0.3,
  0.2,
  -0.4,
  0.15,
  -0.5,
  0.2,
  -0.25,
  0.15,
  -0.35,
  0.1,
  -0.45,
  0.08,
  0.04,
  0.05,
  0.04,
  0.03,
  0.05,
  0.04,
  0.03],
 [-0.6,
  0.6,
  1.1,
  0.25,
  0.4,
  -0.35,
  0.25,
  -0.45,
  0.2,
  -0.55,
  0.3,
  -0.3,
  0.25,
  -0.4,
  0.15,
  -0.5,
  0.07,
  0.03,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.9,
  0.8,
  1.4,
  0.3,
  0.6,
  -0.6,
  0.5,
  -0.75,
  0.35,
  -0.95,
  0.4,
  -0.5,
  0.4,
  -0.7,
  0.25,
  -0.85,
  0.09,
  0.045,
  0.065,
  0.055,
  0.045,
  0.065,
  0.055,
  0.045],
 [-0.5,
  0.7,
  1.0,
  0.35,
  0.55,
  -0.6,
  0.4,
  -0.8,
  0.2,
  -1.0,
  -0.45,
  -0.6,
  0.3,
  -0.9,
  0.15,
  -1.1,
  0.1,
  0.05,
  0.07,
  0.06,
  0.04,
  0.07,
  0.06,
  0.04],
 [-0.8,
  0.9,
  1.3,
  0.4,
  0.7,
  -0.7,
  0.6,
  -0.9,
  0.3,
  -1.2,
  0.5,
  -0.65,
  0.5,
  -1.0,
  0.2,
  -1.3,
  0.12,
  0.06,
  0.08,
  0.07,
  0.05,
  0.08,
  0.07,
  0.05],
 [-1.0,
  1.0,
  1.5,
  0.5,
  0.8,
  -0.8,
  0.5,
  -1.0,
  0.25,
  -1.25,
  -0.3,
  -0.6,
  0.4,
  -0.8,
  0.2,
  -1.0,
  0.08,
  0.045,
  0.09,
  0.075,
  0.055,
  0.09,
  0.075,
  0.055],
 [-0.3,
  0.4,
  0.9,
  0.15,
  0.25,
  -0.25,
  0.15,
  -0.35,
  0.1,
  -0.45,
  0.35,
  -0.2,
  0.2,
  -0.3,
  0.05,
  -0.4,
  0.05,
  0.025,
  0.035,
  0.03,
  0.02,
  0.035,
  0.03,
  0.02],
 [-0.7,
  0.6,
  1.2,
  0.25,
  0.5,
  -0.45,
  0.3,
  -0.6,
  0,
  -0.7,
  -0.2,
  -0.4,
  0.1,
  -0.55,
  -0.1,
  -0.65,
  0.06,
  0.03,
  0.045,
  0.04,
  0.035,
  0.045,
  0.04,
  0.035],
 [-0.6,
  0.75,
  1.25,
  0.2,
  0.2,
  -0.2,
  0.05,
  -0.25,
  0.1,
  -0.3,
  0.15,
  -0.15,
  0.05,
  -0.2,
  0.02,
  -0.25,
  0.04,
  0.02,
  0.03,
  0.025,
  0.02,
  0.03,
  0.025,
  0.02],
 [-0.2,
  0.3,
  0.5,
  0.1,
  0.1,
  -0.1,
  0,
  -0.15,
  -0.05,
  -0.2,
  -0.1,
  -0.15,
  0.05,
  -0.25,
  0.1,
  -0.3,
  0.02,
  0.01,
  0.025,
  0.02,
  0.015,
  0.025,
  0.02,
  0.015],
 [-1.2,
  1.2,
  2.0,
  0.6,
  0.9,
  -0.5,
  0.7,
  -1.0,
  0.4,
  -1.4,
  -0.6,
  -0.3,
  0.2,
  -0.5,
  0.0,
  -0.8,
  0.15,
  0.07,
  0.1,
  0.08,
  0.06,
  0.1,
  0.08,
  0.06],
 [-0.8,
  1.1,
  1.8,
  0.3,
  0.3,
  -0.2,
  -0.2,
  -0.6,
  0.1,
  -1.0,
  -0.5,
  -0.7,
  0.3,
  -0.9,
  0.1,
  -1.1,
  0.06,
  0.03,
  0.045,
  0.04,
  0.03,
  0.045,
  0.04,
  0.03],
 [-0.5,
  0.8,
  1.6,
  0.25,
  0.2,
  -0.2,
  0.1,
  -0.35,
  0,
  -0.5,
  -0.3,
  -0.3,
  0.2,
  -0.4,
  0.15,
  -0.55,
  0.05,
  0.025,
  0.035,
  0.03,
  0.025,
  0.035,
  0.03,
  0.025],
 [-0.9,
  0.9,
  1.5,
  0.2,
  0.3,
  0.0,
  -0.1,
  -0.2,
  0.1,
  -0.3,
  0.2,
  0.1,
  0.1,
  -0.1,
  0.15,
  -0.25,
  0.07,
  0.03,
  0.045,
  0.035,
  0.025,
  0.045,
  0.035,
  0.025],
 [-0.6,
  1.3,
  2.0,
  0.4,
  0.8,
  -0.6,
  -0.2,
  -0.7,
  0,
  -0.9,
  -0.7,
  -0.2,
  0.5,
  -0.3,
  0.8,
  -0.4,
  0.05,
  0.02,
  0.04,
  0.03,
  0.025,
  0.04,
  0.03,
  0.025],
 [-0.25,
  0.55,
  0.85,
  0.2,
  0.15,
  -0.25,
  0.05,
  -0.45,
  -0.02,
  -0.65,
  0.1,
  -0.15,
  0.07,
  -0.3,
  0.01,
  -0.5,
  0.03,
  0.015,
  0.02,
  0.017,
  0.013,
  0.02,
  0.017,
  0.013],
 [-0.1,
  0.2,
  0.6,
  0.1,
  0.8,
  -0.5,
  0.8,
  -0.8,
  0.9,
  -0.9,
  -0.3,
  -0.2,
  -0.3,
  -0.5,
  -0.2,
  -0.7,
  0.035,
  0.02,
  0.03,
  0.025,
  0.02,
  0.03,
  0.025,
  0.02],
 [-0.15,
  0.15,
  0.45,
  0.05,
  0.05,
  -0.05,
  -0.05,
  -0.1,
  -0.1,
  -0.15,
  -0.1,
  -0.05,
  0.1,
  -0.1,
  0.15,
  -0.15,
  0.01,
  0.005,
  0.01,
  0.007,
  0.005,
  0.01,
  0.007,
  0.005],
 [-0.7,
  0.7,
  0.9,
  0.1,
  -0.3,
  -0.6,
  -0.5,
  -0.9,
  -0.85,
  -1.2,
  0.4,
  -0.25,
  0.5,
  -0.55,
  0.6,
  -0.8,
  0.05,
  0.02,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-1.0,
  1.5,
  2.2,
  0.3,
  1.2,
  -0.3,
  0.6,
  -0.5,
  -0.2,
  -0.8,
  0.5,
  -0.1,
  0.3,
  -0.2,
  0.05,
  -0.4,
  0.08,
  0.04,
  0.07,
  0.06,
  0.05,
  0.07,
  0.06,
  0.05],
 [-0.5,
  0.4,
  0.7,
  0.2,
  0.1,
  -0.15,
  -0.1,
  -0.35,
  0.05,
  -0.45,
  -0.2,
  -0.25,
  0.3,
  -0.35,
  0.25,
  -0.4,
  0.03,
  0.015,
  0.02,
  0.017,
  0.015,
  0.02,
  0.017,
  0.015],
 [-0.8,
  0.9,
  1.8,
  0.4,
  -0.2,
  -0.3,
  -0.15,
  -0.45,
  -0.1,
  -0.65,
  0.25,
  -0.15,
  0.2,
  -0.35,
  0.1,
  -0.55,
  0.04,
  0.02,
  0.03,
  0.025,
  0.02,
  0.03,
  0.025,
  0.02],
 [-0.6,
  0.8,
  1.3,
  0.4,
  0.5,
  -0.4,
  0.4,
  -0.6,
  0.3,
  -0.7,
  0.4,
  -0.3,
  0.4,
  -0.4,
  0.3,
  -0.5,
  0.06,
  0.03,
  0.04,
  0.03,
  0.02,
  0.04,
  0.03,
  0.02],
 [-0.4,
  0.6,
  1.0,
  0.2,
  -0.1,
  -0.2,
  -0.15,
  -0.3,
  -0.1,
  -0.4,
  -0.2,
  -0.1,
  -0.15,
  -0.25,
  -0.1,
  -0.35,
  0.035,
  0.015,
  0.02,
  0.015,
  0.01,
  0.02,
  0.015,
  0.01],
 [-0.2,
  0.3,
  0.6,
  0.1,
  0.05,
  -0.03,
  -0.02,
  -0.05,
  -0.03,
  -0.07,
  -0.04,
  -0.02,
  0.02,
  -0.03,
  0.03,
  -0.05,
  0.02,
  0.01,
  0.018,
  0.015,
  0.012,
  0.018,
  0.015,
  0.012],
 [-0.3,
  0.2,
  0.6,
  -0.1,
  0,
  -0.2,
  0,
  -0.3,
  0.2,
  -0.4,
  -0.3,
  -0.1,
  0.1,
  -0.2,
  0.2,
  -0.3,
  0.025,
  0.015,
  0.02,
  0.015,
  0.01,
  0.02,
  0.015,
  0.01],
 [-1.2,
  2.3,
  3.5,
  0.8,
  1.5,
  -0.8,
  1.3,
  -1.0,
  1.7,
  -1.3,
  -1.4,
  -1.0,
  -1.3,
  -1.4,
  -1.7,
  -1.8,
  0.15,
  0.07,
  0.12,
  0.1,
  0.08,
  0.12,
  0.1,
  0.08],
 [-0.6,
  1.2,
  1.8,
  0.15,
  -0.5,
  -0.45,
  -0.6,
  -0.85,
  -0.8,
  -1.2,
  0.2,
  -0.15,
  0.1,
  -0.25,
  0.05,
  -0.35,
  0.03,
  0.02,
  0.04,
  0.025,
  0.02,
  0.04,
  0.025,
  0.02],
 [-0.8,
  0.5,
  1.0,
  0.3,
  0.2,
  -0.1,
  0.3,
  -0.6,
  0.1,
  -0.9,
  0.4,
  -0.2,
  0.2,
  -0.4,
  0.05,
  -0.6,
  0.06,
  0.03,
  0.045,
  0.04,
  0.03,
  0.045,
  0.04,
  0.03],
 [-0.2,
  0.3,
  0.9,
  0.1,
  0.6,
  -0.3,
  0.8,
  -0.6,
  1.0,
  -0.8,
  1.0,
  -0.2,
  1.2,
  -0.4,
  1.5,
  -0.6,
  0.02,
  0.01,
  0.03,
  0.02,
  0.015,
  0.03,
  0.02,
  0.015],
 [-0.1,
  0.2,
  0.4,
  0.3,
  0.5,
  -0.2,
  0.5,
  -0.4,
  0.5,
  -0.6,
  0.4,
  -0.3,
  0.4,
  -0.5,
  0.3,
  -0.6,
  0.02,
  0.01,
  0.015,
  0.013,
  0.011,
  0.015,
  0.013,
  0.011],
 [-0.5,
  0.9,
  1.7,
  0.3,
  0.6,
  -0.1,
  0.6,
  -0.5,
  1.0,
  -0.8,
  0.3,
  -0.1,
  0.3,
  -0.5,
  0.6,
  -0.8,
  0.04,
  0.02,
  0.035,
  0.03,
  0.025,
  0.035,
  0.03,
  0.025],
 [-0.35,
  0.6,
  1.25,
  0.4,
  0.45,
  -0.2,
  0.2,
  -0.4,
  -0.05,
  -0.6,
  0.6,
  -0.35,
  0.15,
  -0.65,
  -0.1,
  -0.85,
  0.035,
  0.015,
  0.03,
  0.025,
  0.02,
  0.03,
  0.025,
  0.02],
 [-0.9,
  1.0,
  2.2,
  0.5,
  -1.1,
  -0.9,
  -1.4,
  -1.2,
  -1.8,
  -1.5,
  0.8,
  -1.0,
  1.3,
  -1.3,
  1.8,
  -1.6,
  0.09,
  0.04,
  0.07,
  0.06,
  0.05,
  0.07,
  0.06,
  0.05],
 [-0.2,
  0.3,
  0.7,
  0.15,
  0.05,
  -0.05,
  0.02,
  -0.08,
  0.08,
  -0.13,
  0.05,
  -0.03,
  0.08,
  -0.07,
  0.12,
  -0.11,
  0.025,
  0.012,
  0.018,
  0.016,
  0.014,
  0.018,
  0.016,
  0.014],
 [-0.6,
  0.9,
  1.4,
  0.3,
  0.8,
  -0.7,
  0.3,
  -1.2,
  0.5,
  -1.6,
  0.7,
  -0.2,
  0.3,
  -0.6,
  0.1,
  -1.0,
  0.06,
  0.025,
  0.045,
  0.035,
  0.03,
  0.045,
  0.035,
  0.03],
 [-0.55,
  0.55,
  0.75,
  0.21,
  0.6,
  -0.6,
  0.2,
  -0.8,
  -0.1,
  -1.0,
  0.5,
  -0.5,
  0.2,
  -0.7,
  0.15,
  -0.9,
  0.045,
  0.022,
  0.038,
  0.032,
  0.025,
  0.038,
  0.032,
  0.025],
 [-0.3,
  1.2,
  2.4,
  0.5,
  1.0,
  -0.6,
  0.9,
  -0.8,
  1.2,
  -1.1,
  0.6,
  -0.4,
  1.1,
  -0.6,
  1.4,
  -0.9,
  0.07,
  0.05,
  0.09,
  0.07,
  0.06,
  0.09,
  0.07,
  0.06],
 [-0.8,
  0.4,
  0.5,
  0.05,
  -0.3,
  -0.6,
  -0.6,
  -0.8,
  -0.85,
  -1.1,
  0.3,
  -0.45,
  0.5,
  -0.6,
  0.65,
  -0.75,
  0.02,
  0.01,
  0.02,
  0.015,
  0.012,
  0.02,
  0.015,
  0.012],
 [-0.2,
  0.35,
  0.6,
  0.04,
  0.7,
  -0.3,
  0.5,
  -0.4,
  -0.45,
  -0.55,
  -0.4,
  -0.45,
  0.1,
  -0.5,
  0.05,
  -0.55,
  0.02,
  0.008,
  0.015,
  0.012,
  0.01,
  0.015,
  0.012,
  0.01],
 [-0.45,
  0.45,
  0.75,
  0.12,
  -0.225,
  -0.225,
  -0.075,
  -0.225,
  0.075,
  -0.3,
  0.225,
  -0.075,
  0.15,
  -0.225,
  0.3,
  -0.3,
  0.03,
  0.015,
  0.02,
  0.018,
  0.015,
  0.02,
  0.018,
  0.015],
 [-0.5,
  0.3,
  0.75,
  0.1,
  0.6,
  -0.4,
  0.7,
  -0.6,
  0.9,
  -0.8,
  -0.6,
  -0.5,
  -0.7,
  -0.7,
  -0.9,
  -0.9,
  0.025,
  0.015,
  0.03,
  0.02,
  0.015,
  0.03,
  0.02,
  0.015],
 [-0.7,
  0.8,
  1.8,
  0.6,
  1,
  -0.5,
  0.9,
  -0.8,
  0.6,
  -1,
  -0.8,
  -0.4,
  -0.7,
  -0.7,
  -0.9,
  -0.9,
  0.02,
  0.01,
  0.03,
  0.02,
  0.015,
  0.03,
  0.02,
  0.015],
 [-0.7,
  1.0,
  1.2,
  0.2,
  0.3,
  -0.1,
  0.1,
  -0.15,
  -0.05,
  -0.3,
  0.2,
  0,
  0.2,
  -0.1,
  0.15,
  -0.2,
  0.03,
  0.015,
  0.02,
  0.018,
  0.015,
  0.02,
  0.018,
  0.015],
 [-0.5,
  0.9,
  1.4,
  0.2,
  0.1,
  -0.25,
  0.3,
  -0.6,
  0.4,
  -0.9,
  -0.3,
  -0.2,
  0.2,
  -0.5,
  0.15,
  -0.8,
  0.04,
  0.02,
  0.03,
  0.02,
  0.015,
  0.03,
  0.02,
  0.015],
 [-0.8,
  1.5,
  2.5,
  0.2,
  0.9,
  -0.2,
  1.2,
  -0.4,
  1.5,
  -0.5,
  0.7,
  -0.1,
  1.0,
  -0.3,
  1.3,
  -0.4,
  0.06,
  0.035,
  0.07,
  0.05,
  0.04,
  0.07,
  0.05,
  0.04],
 [-0.6,
  0.2,
  0.8,
  0.25,
  -0.6,
  -0.8,
  -0.8,
  -1.2,
  -1.0,
  -1.5,
  -0.4,
  -0.5,
  0.2,
  -0.8,
  0.45,
  -1.2,
  0.04,
  0.02,
  0.025,
  0.02,
  0.015,
  0.025,
  0.02,
  0.015],
 [-0.3,
  0.6,
  1.2,
  0.5,
  0.4,
  -0.3,
  0.3,
  -0.4,
  0.1,
  -0.5,
  -0.4,
  -0.2,
  -0.2,
  -0.3,
  -0.1,
  -0.4,
  0.03,
  0.015,
  0.02,
  0.018,
  0.015,
  0.02,
  0.018,
  0.015],
 [-0.8,
  1.4,
  2.1,
  0.5,
  0.9,
  -0.4,
  0.7,
  -0.6,
  0.2,
  -0.7,
  0.6,
  -0.3,
  0.5,
  -0.4,
  0.1,
  -0.5,
  0.045,
  0.02,
  0.032,
  0.025,
  0.015,
  0.032,
  0.025,
  0.015],
 [-0.1,
  0.24,
  0.25,
  0.05,
  -0.08,
  -0.07,
  0.12,
  -0.23,
  0.2,
  -0.5,
  0,
  -0.1,
  0.15,
  -0.2,
  0.2,
  -0.45,
  0.02,
  0.01,
  0.015,
  0.012,
  0.01,
  0.015,
  0.012,
  0.01],
 [-0.6,
  0.4,
  0.5,
  0.1,
  0,
  -0.1,
  -0.1,
  -0.25,
  0.03,
  -0.4,
  -0.2,
  -0.2,
  -0.1,
  -0.3,
  0,
  -0.5,
  0.04,
  0.02,
  0.025,
  0.02,
  0.015,
  0.025,
  0.02,
  0.015]])

material_list = [compute_cheetah_volume(parameter) for parameter in parameter_list]
parameter_list.shape

(51, 24)

In [9]:
parameter_list[17]

array([-0.15 ,  0.15 ,  0.45 ,  0.05 ,  0.05 , -0.05 , -0.05 , -0.1  ,
       -0.1  , -0.15 , -0.1  , -0.05 ,  0.1  , -0.1  ,  0.15 , -0.15 ,
        0.01 ,  0.005,  0.01 ,  0.007,  0.005,  0.01 ,  0.007,  0.005])

In [9]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [14]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        if i not in [5]:
            continue
        if j not in [50]:
            continue
        # if j < 24:
        #     continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTCheetah.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew

        # model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        model_path = f"results/Div_m25_r5/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

5 results/Div_m25_r5/env/GPTrewardfunc_5.py
50 results/Div_m25_r5/assets/GPTCheetah_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [None]:
fitness_matrix

In [15]:
efficiency_matrix_select = efficiency_matrix

# print coarse optimization info

In [None]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

# enter fine optimization stage

In [17]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果
coarse_best = [(5,50)]
for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_cheetah_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0
    print(f"Initial parameter:{parameter}")
    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        iteration +=1   
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list[morphology_index],  # 这本身已经是list结构，可以保留
            [efficiency_matrix_select[rewardfunc_index, morphology_index]],  # 👈 用 [] 包装成列表
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )
        
        print("improved parameter", improved_parameter)
        shutil.copy(improved_morphology, "GPTCheetah.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew
        try:
            model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
            improved_fitness, _ = Eva(model_path)
        except Exception as e:
            print(f"Error evaluating design: {e}")
            continue
        improved_material = compute_cheetah_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            improved = False
            # break
            
        iteration +=1        
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            [rewardfunc_list[rewardfunc_index]],
            [efficiency_matrix_select[rewardfunc_index, morphology_index]],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )
        
        shutil.copy(best_morphology, "GPTCheetah.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew
        try:
            model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
            improved_fitness, _ = Eva(model_path)
            improved_material = compute_cheetah_volume(best_parameter)
            improved_efficiency = improved_fitness / improved_material
            print("improved_fitness", improved_fitness)
        except Exception as e:
            print(f"Error evaluating design: {e}")
            continue

        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True

            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })
    logging.info("____________________________________________")
    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

Initial parameter:[-0.6    0.4    0.5    0.1    0.    -0.1   -0.1   -0.25   0.03  -0.4
 -0.2   -0.2   -0.1   -0.3    0.    -0.5    0.04   0.02   0.025  0.02
  0.015  0.025  0.02   0.015]
[-0.65, 0.45, 0.55, 0.12, 0.05, -0.15, -0.15, -0.2, 0.04, -0.35, -0.25, -0.25, -0.15, -0.35, 0.05, -0.55, 0.05, 0.03, 0.022, 0.019, 0.013, 0.022, 0.018, 0.014]
Successfully saved GPTCheetah_refine2_5_50_1.xml
improved parameter [-0.65, 0.45, 0.55, 0.12, 0.05, -0.15, -0.15, -0.2, 0.04, -0.35, -0.25, -0.25, -0.15, -0.35, 0.05, -0.55, 0.05, 0.03, 0.022, 0.019, 0.013, 0.022, 0.018, 0.014]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[{'role': 'system', 'content': 'You are a reinforcement learning reward function designer'}, {'role': 'user', 'content': '\nYou are a r

In [15]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [18]:
final_optimized_results

[{'best_morphology': 'results/Div_m25_r5/assets/GPTCheetah_50.xml',
  'best_parameter': array([-0.6  ,  0.4  ,  0.5  ,  0.1  ,  0.   , -0.1  , -0.1  , -0.25 ,
          0.03 , -0.4  , -0.2  , -0.2  , -0.1  , -0.3  ,  0.   , -0.5  ,
          0.04 ,  0.02 ,  0.025,  0.02 ,  0.015,  0.025,  0.02 ,  0.015]),
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_refine_5_50_2.py',
  'best_fitness': 138.77250750838263,
  'best_material': 0.008319517436858105,
  'best_efficiency': 16680.355388592172,
  'best_iteration': 4}]

In [33]:
# robodesign best
parameter =  [-0.15, 0.15, 0.45, 0.05, 0.05, -0.05, -0.05, -0.1, -0.1, -0.15, -0.1, -0.05, 0.1, -0.1, 0.15, -0.15, 0.01, 0.005, 0.01, 0.007, 0.005, 0.01, 0.007, 0.005]
xml_file = cheetah_design(parameter)  
filename = r"results/Div_m25_r5/assets/GPTCheetah_refine_2_17_0.xml"
with open(filename, "w") as fp:
    fp.write(xml_file)

morphology = "results/Div_m25_r5/assets/GPTCheetah_refine_2_17_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_refine_4_17_1.py"

morphology_index=9998
rewardfunc_index=9998

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" robodesign best 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("robodesign best 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
robodesign best 3e6 steps train

fitness:135.71162111252883
efficiency:495373.7128749287


In [32]:
# robodesign best

morphology = "results/Div_m25_r5/assets/GPTCheetah_refine_2_17_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTCheetah_refine_2_17_1.py"

morphology_index=999
rewardfunc_index=999

parameter = [-0.15, 0.15, 0.45, 0.05, 0.05, -0.05, -0.05, -0.1, -0.1, -0.15, -0.1, -0.05, 0.1, -0.1, 0.15, -0.15, 0.01, 0.005, 0.01, 0.007, 0.005, 0.01, 0.007, 0.005]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" robodesign best 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("robodesign best 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
robodesign best 3e6 steps train

fitness:123.73312278819954
efficiency:451649.8730081208


2025-04-07 00:50:01,521 - Initial morphology:results/Div_m25_r5/assets/GPTCheetah_17.xml
2025-04-07 00:50:01,522 - Initial parameter:[-0.15   0.15   0.45   0.05   0.05  -0.05  -0.05  -0.1   -0.1   -0.15
 -0.1   -0.05   0.1   -0.1    0.15  -0.15   0.01   0.005  0.01   0.007
  0.005  0.01   0.007  0.005]
2025-04-07 00:50:01,522 - Initial rewardfunc:results/Div_m25_r5/env/GPTrewardfunc_1.py
2025-04-07 00:50:01,522 - Initial fitness:35.46140062498762
2025-04-07 00:50:01,522 - Initial efficiency:129440.98336854727


In [19]:
# robodesign coarse best 

morphology = "results/Div_m25_r5/assets/GPTCheetah_17.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_1.py"

morphology_index=888
rewardfunc_index=888

parameter = [-0.15 ,  0.15  , 0.45  , 0.05 ,  0.05  ,-0.05 , -0.05 , -0.1 ,  -0.1 ,  -0.15,
 -0.1 ,  -0.05  , 0.1 ,  -0.1  ,  0.15 , -0.15  , 0.01  , 0.005 , 0.01 ,  0.007,
  0.005,  0.01 ,  0.007 , 0.005]


shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" robodesign coarse best 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("robodesign coarse best 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
robodesign coarse best 3e6 steps train

fitness:95.46820595833779
efficiency:348477.44990000833


In [None]:
# human

morphology = "results/Div_m25_r5/assets/GPTCheetah_50.xml"
rewardfunc = "results/CheetahEureka_morphology/env/GPTrewardfunc_0.py"

morphology_index=777
rewardfunc_index=777

material_list = [0.021184]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path)
material = material_list[0]
efficiency = fitness / material

logging.info(" human 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("human 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17


2025-04-09 15:03:53,835 - morphology: 50, rewardfunc: 4, material cost: 0.008319517436858105 reward: 2049.3560760933597 fitness: 168.32404032342103 efficiency: 20232.428335048862

In [21]:
# Robodesign (w/o Morphology Design)

morphology = "results/Div_m25_r5/assets/GPTCheetah_50.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_4.py"

morphology_index=666
rewardfunc_index=666

material_list = [0.021184]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = material_list[0]
efficiency = fitness / material

logging.info(" Robodesign (w/o Morphology Design) 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Morphology Design) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Morphology Design) 3e6 steps train

fitness:250.14266670867656
efficiency:11808.094161096891


2025-04-06 15:46:10,818 - morphology: 17, rewardfunc: 0, material cost: 0.0002739580595121185 reward: 222.37654595342295 fitness: 24.967713123138623 efficiency: 91136.99070435333

In [30]:
# Robodesign (w/o Reward Shaping)

morphology = "results/Div_m25_r5/assets/GPTCheetah_17.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_0.py"

morphology_index=555
rewardfunc_index=555
morphology_index=17
rewardfunc_index=0
parameter =  [-0.15,
  0.15,
  0.45,
  0.05,
  0.05,
  -0.05,
  -0.05,
  -0.1,
  -0.1,
  -0.15,
  -0.1,
  -0.05,
  0.1,
  -0.1,
  0.15,
  -0.15,
  0.01,
  0.005,
  0.01,
  0.007,
  0.005,
  0.01,
  0.007,
  0.005]


# [-0.15 ,  0.15  , 0.45  , 0.05 ,  0.05  ,-0.05 , -0.05 , -0.1 ,  -0.1 ,  -0.15,
#  -0.1 ,  -0.05  , 0.1 ,  -0.1  ,  0.15 , -0.15  , 0.01  , 0.005 , 0.01 ,  0.007,
#   0.005,  0.01 ,  0.007 , 0.005]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/coarse/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" Robodesign (w/o Reward Shaping) 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Reward Shaping) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Reward Shaping) 3e6 steps train

fitness:26.528840834301896
efficiency:96835.40933800633


2025-04-07 15:03:56,961 - Final optimized result: rewardfunc_index4 morphology_index3
2025-04-07 15:03:56,961 -   Morphology: results/noDiv_m25_r5/assets/GPTCheetah_refine_4_3_0.xml
2025-04-07 15:03:56,961 -   Parameter: [-0.5, 0.5, 0.8, 0.2, 0.3, -0.4, 0.4, -0.5, 0.5, -0.6, -0.3, -0.5, 0.4, -0.6, 0.6, -0.7, 0.05, 0.04, 0.04, 0.03, 0.03, 0.04, 0.03, 0.03]
2025-04-07 15:03:56,961 -   Rewardfunc: results/noDiv_m25_r5/env/GPTCheetah_refine_4_3_1.py
2025-04-07 15:03:56,961 -   Fitness: 180.8154850762752
2025-04-07 15:03:56,961 -   Material: 0.02788783835590477
2025-04-07 15:03:56,961 -   Efficiency: 6483.667997809899

In [25]:
# Robodesign (w/o Diversity Reflection)

morphology = "results/noDiv_m25_r5/assets/GPTCheetah_refine_4_3_0.xml"
rewardfunc = "results/noDiv_m25_r5/env/GPTCheetah_refine_4_3_1.py"

morphology_index=333
rewardfunc_index=333

parameter =  [-0.5, 0.5, 0.8, 0.2, 0.3, -0.4, 0.4, -0.5, 0.5, -0.6, -0.3, -0.5, 0.4, -0.6, 0.6, -0.7, 0.05, 0.04, 0.04, 0.03, 0.03, 0.04, 0.03, 0.03]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info(" Robodesign (w/o Diversity Reflection) 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Diversity Reflection) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Diversity Reflection) 3e6 steps train

fitness:207.54673485196096
efficiency:7442.195131915505


2025-04-08 19:41:04,459 - iteration:2, morphology: 0, rewardfunc: 5, material cost: 0.021184 reward: 2999.6270393357754 fitness: 173.16370628185507 efficiency: 8174.26861224769

In [26]:
# eureka reward

morphology = "results/eureka/assets/GPTCheetah_0.xml"
rewardfunc = "results/eureka/env/GPTrewardfunc_5_2.py"

morphology_index=222
rewardfunc_index=222

material_list = [0.021184]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = material_list[0]
efficiency = fitness / material

logging.info(" eureka reward 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print(" eureka reward 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
 eureka reward 3e6 steps train

fitness:257.5534101158287
efficiency:12157.921550029676


2025-04-08 07:55:18,132 - morphology: 15, rewardfunc: 0, material cost: 0.009341494464891529 reward: 1228.9937270418675 fitness: 78.32647964015682 efficiency: 8384.79109895467

In [27]:
# eureka morphology

morphology = "results/CheetahEureka_morphology/assets/GPTCheetah_15.xml"
rewardfunc = "results/CheetahEureka_morphology/env/GPTrewardfunc_0.py"

morphology_index=111
rewardfunc_index=111

parameter =  [-0.63,
 0.61,
 0.94,
 0.51,
 0.41,
 -0.71,
 0.65,
 -1.1,
 0.81,
 -1.5,
 -0.56,
 -0.6,
 -0.68,
 -1.2,
 0.53,
 -1.6,
 0.029,
 0.02,
 0.018,
 0.013,
 0.012,
 0.018,
 0.012,
 0.012]

shutil.copy(morphology, "GPTCheetah.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTCheetahEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = compute_cheetah_volume(parameter)
efficiency = fitness / material

logging.info("eureka morphology 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("eureka morphology 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
eureka morphology 3e6 steps train

fitness:145.0851809530564
efficiency:15531.260174518671
