In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTCheetah import GPTCheetahEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTCheetah_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")
        messages.append({"role": "assistant", "content": initial_code})

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]
            # print(diverse_messages)
            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )
            diverse_code = self.extract_code(response.choices[0].message.content)
            messages.append({"role": "assistant", "content": diverse_code})

            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files


    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_cheetah_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = cheetah_design(parameter)  
            filename = f"GPTCheetah_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_cheetah_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})
        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )
        xml_file = cheetah_design(initial_parameter['parameters'])  

        filename = f"GPTCheetah_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_cheetah_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = cheetah_design(diverse_parameter['parameters'])  
            filename = f"GPTCheetah_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        reward_improve_prompts = prompts.reward_improve_prompts

        for reward_filename, fitness in zip(rewardfunc_list, fitness_list):
            with open(reward_filename, 'r') as f:
                reward_content = f.read()
            reward_improve_prompts += f"reward function:\n{reward_content}\nfitness: {fitness}\n"
            
        with open(best_rewardfunc, 'r') as f:
            best_reward_content = f.read()

        reward_improve_prompts += f"This is best reward function, please carefully review it :\n{best_reward_content}\nbest fitness: {max(fitness_list)}"
        # print(reward_improve_prompts)
        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        # print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTSwimmer_refine_{rewardfunc_index}_{morphology_index}_{iteration}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = cheetah_design(parameter)  
        filename = f"GPTCheetah_refine_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/noDiv_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [4]:
logging.info(f"start!")

In [8]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

NameError: name 'param' is not defined

In [9]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/Div_m50_r10\env\GPTrewardfunc_0.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_1.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_2.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_3.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_4.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_5.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_6.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_7.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_8.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_9.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_10.py


# enter coarse optimization stage

In [5]:
morphology_list = [f'results/noDiv_m25_r5/assets/GPTCheetah_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/noDiv_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list = np.array([[-0.5,
  0.5,
  0.8,
  0.2,
  0.15,
  -0.3,
  0.15,
  -0.4,
  0.1,
  -0.45,
  0.2,
  -0.35,
  0.25,
  -0.4,
  0.15,
  -0.45,
  0.08,
  0.06,
  0.07,
  0.06,
  0.05,
  0.07,
  0.075,
  0.05],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.45,
  -0.3,
  0.5,
  -0.5,
  0.1,
  -0.3,
  0.4,
  -0.35,
  0.5,
  -0.6,
  0.2,
  -0.4,
  0.08,
  0.06,
  0.05,
  0.045,
  0.04,
  0.05,
  0.045,
  0.04],
 [-0.6,
  0.6,
  0.9,
  0.3,
  0.4,
  -0.2,
  0.3,
  -0.4,
  0.5,
  -0.6,
  0.3,
  -0.2,
  0.4,
  -0.4,
  0.5,
  -0.6,
  0.08,
  0.05,
  0.06,
  0.05,
  0.05,
  0.06,
  0.05,
  0.05],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.3,
  -0.4,
  0.4,
  -0.5,
  0.5,
  -0.6,
  -0.3,
  -0.5,
  0.4,
  -0.6,
  0.6,
  -0.7,
  0.05,
  0.04,
  0.04,
  0.03,
  0.03,
  0.04,
  0.03,
  0.03],
 [-0.6,
  0.6,
  1.2,
  0.4,
  0.2,
  -0.3,
  0.2,
  -0.5,
  0.1,
  -0.6,
  0.1,
  -0.2,
  0.1,
  -0.4,
  0.2,
  -0.5,
  0.07,
  0.05,
  0.05,
  0.04,
  0.04,
  0.05,
  0.04,
  0.03],
 [-0.6,
  0.5,
  0.9,
  0.1,
  0.55,
  -0.3,
  0.4,
  -0.45,
  0.2,
  -0.5,
  -0.55,
  -0.35,
  -0.35,
  -0.5,
  -0.15,
  -0.6,
  0.08,
  0.06,
  0.075,
  0.07,
  0.065,
  0.07,
  0.065,
  0.06],
 [-0.5,
  0.5,
  0.75,
  0.2,
  0.3,
  -0.4,
  0.45,
  -0.5,
  0.55,
  -0.6,
  -0.35,
  -0.45,
  -0.4,
  -0.55,
  -0.45,
  -0.65,
  0.08,
  0.05,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.5,
  0.5,
  0.6,
  0.1,
  0.4,
  -0.6,
  0.6,
  -1.2,
  0.7,
  -1.4,
  0.6,
  -0.6,
  0.7,
  -1.2,
  0.8,
  -1.4,
  0.07,
  0.05,
  0.06,
  0.05,
  0.045,
  0.06,
  0.05,
  0.045],
 [-0.6,
  0.5,
  0.8,
  0.2,
  0.3,
  -0.2,
  0.45,
  -0.45,
  0.55,
  -0.5,
  0.25,
  -0.25,
  0.4,
  -0.4,
  0.5,
  -0.6,
  0.08,
  0.07,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.6,
  0.5,
  0.9,
  0.2,
  0.3,
  -0.4,
  0.45,
  -0.5,
  0.6,
  -0.65,
  0.2,
  -0.45,
  0.4,
  -0.55,
  0.5,
  -0.6,
  0.07,
  0.05,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.5,
  0.5,
  1.0,
  0.3,
  0.6,
  -0.4,
  0.3,
  -0.6,
  0.15,
  -0.8,
  -0.6,
  -0.4,
  0.4,
  -0.6,
  0.2,
  -0.8,
  0.08,
  0.05,
  0.07,
  0.07,
  0.06,
  0.07,
  0.07,
  0.06],
 [-0.5,
  0.5,
  0.8,
  0.3,
  0.4,
  -0.3,
  0.4,
  -0.6,
  0.5,
  -0.7,
  0.6,
  -0.4,
  0.6,
  -0.6,
  0.7,
  -0.7,
  0.05,
  0.04,
  0.04,
  0.03,
  0.03,
  0.04,
  0.03,
  0.03],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.4,
  -0.3,
  0.35,
  -0.35,
  0.35,
  -0.4,
  -0.4,
  -0.25,
  0.3,
  -0.35,
  0.3,
  -0.4,
  0.09,
  0.07,
  0.08,
  0.07,
  0.06,
  0.08,
  0.07,
  0.06],
 [-0.6,
  0.5,
  1.0,
  0.3,
  0.45,
  -0.3,
  0.25,
  -0.5,
  0.0,
  -0.7,
  -0.2,
  -0.45,
  0.1,
  -0.6,
  0.4,
  -0.8,
  0.08,
  0.05,
  0.04,
  0.03,
  0.03,
  0.04,
  0.03,
  0.03],
 [-0.5,
  0.5,
  1.0,
  0.2,
  0.15,
  -0.35,
  0.25,
  -0.55,
  0.4,
  -0.75,
  -0.1,
  -0.35,
  0.1,
  -0.55,
  0.2,
  -0.75,
  0.12,
  0.1,
  0.11,
  0.11,
  0.1,
  0.12,
  0.12,
  0.09],
 [-0.5,
  0.5,
  0.8,
  0.1,
  0.4,
  -0.3,
  0.4,
  -0.3,
  0.3,
  -0.2,
  -0.4,
  -0.3,
  0.3,
  -0.3,
  0.2,
  -0.2,
  0.06,
  0.05,
  0.05,
  0.04,
  0.03,
  0.05,
  0.04,
  0.03],
 [-0.6,
  0.6,
  0.9,
  0.2,
  0.35,
  -0.5,
  0.35,
  -0.8,
  0.2,
  -0.85,
  -0.4,
  -0.4,
  -0.2,
  -0.75,
  -0.05,
  -0.8,
  0.08,
  0.05,
  0.07,
  0.06,
  0.05,
  0.07,
  0.06,
  0.05],
 [-0.5,
  0.5,
  0.8,
  0.25,
  0.4,
  -0.6,
  0.7,
  -1.0,
  0.9,
  -1.2,
  -0.4,
  -0.65,
  0.65,
  -0.85,
  0.8,
  -1.1,
  0.08,
  0.05,
  0.06,
  0.05,
  0.04,
  0.05,
  0.04,
  0.03],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.45,
  -0.35,
  0.35,
  -0.45,
  0.15,
  -0.25,
  -0.3,
  -0.25,
  0.2,
  -0.35,
  0.1,
  -0.2,
  0.07,
  0.05,
  0.06,
  0.04,
  0.03,
  0.05,
  0.04,
  0.03],
 [-0.6,
  0.6,
  0.9,
  0.2,
  0.4,
  -0.3,
  0.5,
  -0.4,
  0.6,
  -0.3,
  0.3,
  -0.3,
  0.4,
  -0.4,
  0.5,
  -0.3,
  0.14,
  0.12,
  0.1,
  0.08,
  0.07,
  0.1,
  0.08,
  0.07],
 [-0.3,
  0.3,
  0.6,
  0.2,
  0.2,
  -0.4,
  0.4,
  -0.6,
  0.5,
  -0.8,
  0.2,
  -0.4,
  0.4,
  -0.6,
  0.5,
  -0.8,
  0.15,
  0.12,
  0.1,
  0.08,
  0.06,
  0.1,
  0.08,
  0.06],
 [-0.5,
  0.5,
  0.8,
  0.1,
  0.4,
  -0.35,
  0.35,
  -0.35,
  0.15,
  -0.2,
  0.3,
  -0.3,
  0.2,
  -0.25,
  0.1,
  -0.15,
  0.06,
  0.04,
  0.05,
  0.04,
  0.03,
  0.05,
  0.04,
  0.03],
 [-0.5,
  0.5,
  1.0,
  0.2,
  0.2,
  -0.4,
  0.35,
  -0.8,
  0.4,
  -0.9,
  -0.4,
  -0.3,
  0.35,
  -0.55,
  0.5,
  -0.7,
  0.08,
  0.05,
  0.06,
  0.05,
  0.04,
  0.035,
  0.07,
  0.06,
  0.055,
  0.05],
 [-0.5,
  0.5,
  0.8,
  0.3,
  0.4,
  -0.6,
  0.5,
  -1.2,
  0.7,
  -1.5,
  -0.3,
  -0.7,
  0.45,
  -1.1,
  0.6,
  -1.4,
  0.07,
  0.05,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.5,
  0.5,
  1.0,
  0.2,
  0.6,
  -0.4,
  0.6,
  -0.8,
  0.6,
  -1.0,
  0.7,
  -0.5,
  0.7,
  -0.7,
  0.7,
  -0.9,
  0.07,
  0.05,
  0.045,
  0.04,
  0.035,
  0.045,
  0.04,
  0.035],
 [-0.5,
  0.5,
  0.8,
  0.1,
  0.3,
  -0.4,
  0.3,
  -0.6,
  0.4,
  -0.8,
  -0.3,
  -0.5,
  0.3,
  -0.6,
  0.4,
  -0.8,
  0.05,
  0.04,
  0.04,
  0.03,
  0.03,
  0.04,
  0.03,
  0.03],
 [-0.6,
  0.6,
  0.9,
  0.5,
  0.6,
  -0.4,
  0.75,
  -0.8,
  0.9,
  -1.0,
  0.45,
  -0.5,
  0.75,
  -0.7,
  0.85,
  -0.9,
  0.08,
  0.05,
  0.04,
  0.03,
  0.03,
  0.04,
  0.03,
  0.03],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.4,
  -0.8,
  0.45,
  -1.2,
  0.6,
  -1.5,
  0.35,
  -0.85,
  0.5,
  -1.1,
  0.7,
  -1.3,
  0.09,
  0.07,
  0.08,
  0.075,
  0.07,
  0.08,
  0.075,
  0.07],
 [-0.6,
  0.5,
  1.0,
  0.3,
  0.35,
  -0.5,
  0.3,
  -0.6,
  0.1,
  -0.7,
  0.45,
  -0.4,
  0.3,
  -0.5,
  0.2,
  -0.6,
  0.07,
  0.05,
  0.08,
  0.07,
  0.06,
  0.08,
  0.07,
  0.06],
 [-0.5,
  0.5,
  0.7,
  0.2,
  0.3,
  -0.2,
  0.45,
  -0.35,
  0.55,
  -0.45,
  -0.25,
  -0.2,
  0.3,
  -0.35,
  0.45,
  -0.4,
  0.06,
  0.04,
  0.05,
  0.05,
  0.045,
  0.05,
  0.045,
  0.04],
 [-0.5,
  0.5,
  0.8,
  0.1,
  0.3,
  -0.4,
  0.15,
  -0.6,
  0.1,
  -0.7,
  -0.3,
  -0.4,
  0.2,
  -0.6,
  0.1,
  -0.7,
  0.07,
  0.05,
  0.06,
  0.05,
  0.04,
  0.05,
  0.04,
  0.03],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.6,
  -0.4,
  1.0,
  -0.6,
  1.2,
  -0.7,
  0.3,
  -0.5,
  0.7,
  -0.6,
  0.9,
  -0.7,
  0.08,
  0.05,
  0.04,
  0.03,
  0.04,
  0.04,
  0.03,
  0.03],
 [-0.5,
  0.5,
  1.0,
  1.2,
  0.6,
  -0.6,
  0.7,
  -1.2,
  0.75,
  -1.5,
  -0.6,
  -0.65,
  0.7,
  -1.25,
  0.8,
  -1.6,
  0.08,
  0.05,
  0.06,
  0.045,
  0.04,
  0.05,
  0.045,
  0.035],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.3,
  -0.4,
  0.4,
  -0.5,
  0.5,
  -0.6,
  -0.3,
  -0.4,
  0.5,
  -0.7,
  0.6,
  -0.8,
  0.05,
  0.03,
  0.04,
  0.03,
  0.03,
  0.04,
  0.035,
  0.03],
 [-0.5,
  0.5,
  0.8,
  0.1,
  0.25,
  -0.15,
  0.25,
  -0.3,
  0.3,
  -0.35,
  -0.25,
  -0.15,
  0.35,
  -0.25,
  0.3,
  -0.3,
  0.06,
  0.04,
  0.05,
  0.04,
  0.03,
  0.05,
  0.04,
  0.03],
 [-0.5,
  0.5,
  0.9,
  0.2,
  0.3,
  -0.6,
  0.1,
  -0.8,
  0.05,
  -0.9,
  -0.3,
  -0.6,
  -0.1,
  -0.8,
  -0.05,
  -0.9,
  0.08,
  0.05,
  0.07,
  0.07,
  0.05,
  0.06,
  0.06,
  0.04],
 [-0.4,
  0.3,
  0.6,
  0.15,
  0.2,
  -0.3,
  0.1,
  -0.35,
  0.15,
  -0.4,
  -0.25,
  -0.3,
  0.2,
  -0.35,
  0.25,
  -0.4,
  0.08,
  0.05,
  0.06,
  0.04,
  0.042,
  0.06,
  0.045,
  0.04],
 [-0.5,
  0.5,
  1.0,
  0.5,
  0.2,
  -0.6,
  0.4,
  -1.2,
  0.45,
  -1.4,
  0.2,
  -0.6,
  0.45,
  -1.2,
  0.5,
  -1.4,
  0.08,
  0.05,
  0.05,
  0.045,
  0.04,
  0.05,
  0.045,
  0.04],
 [-0.6,
  0.6,
  1.2,
  0.2,
  0.4,
  -0.4,
  0.3,
  -0.6,
  0.2,
  -0.8,
  0.5,
  -0.5,
  0.3,
  -0.7,
  0.1,
  -0.9,
  0.08,
  0.05,
  0.04,
  0.04,
  0.03,
  0.04,
  0.03,
  0.02],
 [-0.5,
  0.5,
  0.9,
  0.3,
  0.2,
  -0.3,
  0.1,
  -0.4,
  0.15,
  -0.5,
  -0.2,
  -0.25,
  0.1,
  -0.35,
  0.15,
  -0.6,
  0.06,
  0.04,
  0.05,
  0.04,
  0.04,
  0.05,
  0.04,
  0.04],
 [-0.5,
  0.5,
  1.0,
  0.2,
  0.25,
  -0.4,
  0.4,
  -0.5,
  0.1,
  -0.6,
  0.23,
  -0.3,
  0.25,
  -0.35,
  0.1,
  -0.4,
  0.08,
  0.07,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.6,
  0.5,
  1.0,
  0.4,
  0.3,
  -0.25,
  0.3,
  -0.5,
  0.2,
  -0.3,
  0.4,
  -0.25,
  0.5,
  -0.4,
  0.3,
  -0.2,
  0.12,
  0.08,
  0.1,
  0.08,
  0.07,
  0.1,
  0.08,
  0.07],
 [-0.6,
  0.3,
  0.5,
  0.2,
  0.3,
  -0.45,
  0.2,
  -0.5,
  0.2,
  -0.5,
  -0.2,
  -0.4,
  0.1,
  -0.45,
  0.1,
  -0.4,
  0.07,
  0.05,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.4,
  -0.3,
  0.35,
  -0.4,
  0.2,
  -0.25,
  0.3,
  -0.35,
  0.25,
  -0.3,
  0.15,
  -0.2,
  0.06,
  0.04,
  0.05,
  0.045,
  0.035,
  0.05,
  0.045,
  0.035],
 [-0.5,
  0.5,
  1.0,
  0.4,
  0.3,
  -0.7,
  0.3,
  -1.4,
  0.3,
  -1.8,
  -0.3,
  -0.6,
  -0.3,
  -1.4,
  -0.3,
  -1.7,
  0.35,
  0.2,
  0.15,
  0.15,
  0.15,
  0.15,
  0.15,
  0.1],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.3,
  -0.4,
  0.2,
  -0.6,
  0.1,
  -0.8,
  -0.3,
  -0.4,
  0.2,
  -0.6,
  0.1,
  -0.9,
  0.06,
  0.04,
  0.05,
  0.04,
  0.03,
  0.05,
  0.04,
  0.03],
 [-0.5,
  0.5,
  1.0,
  0.2,
  0.35,
  -0.1,
  0.3,
  -0.15,
  0.4,
  -0.2,
  -0.25,
  -0.15,
  -0.2,
  -0.25,
  -0.3,
  -0.3,
  0.08,
  0.05,
  0.05,
  0.04,
  0.03,
  0.05,
  0.04,
  0.03],
 [-0.5,
  0.5,
  0.8,
  0.2,
  0.5,
  -0.4,
  0.6,
  -0.7,
  0.65,
  -0.75,
  -0.5,
  -0.45,
  -0.65,
  -0.7,
  -0.7,
  -0.75,
  0.08,
  0.05,
  0.07,
  0.06,
  0.05,
  0.07,
  0.06,
  0.05],
 [-0.6,
  0.5,
  0.8,
  0.2,
  0.35,
  -0.4,
  0.45,
  -0.5,
  0.55,
  -0.6,
  -0.45,
  -0.4,
  0.5,
  -0.5,
  0.55,
  -0.6,
  0.07,
  0.05,
  0.06,
  0.05,
  0.04,
  0.06,
  0.05,
  0.04],
 [-0.5,
  0.5,
  0.8,
  1.2,
  0.3,
  -0.6,
  0.5,
  -1.0,
  0.6,
  -1.3,
  -0.3,
  -0.5,
  0.5,
  -0.9,
  0.6,
  -1.1,
  0.07,
  0.05,
  0.07,
  0.06,
  0.05,
  0.07,
  0.06,
  0.05]],dtype=object)
material_list = [compute_cheetah_volume(parameter) for parameter in parameter_list]
parameter_list.shape

(50,)

In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [7]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [4]:
        #     continue
        # if j not in [12, 13, 14, 15]:
        #     continue
        # if i not in [2]:
        #     continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTCheetah.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/Div_m50_r10/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
0 results/noDiv_m25_r5/assets/GPTCheetah_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
1 results/noDiv_m25_r5/assets/GPTCheetah_1.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
2 results/noDiv_m25_r5/assets/GPTCheetah_2.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
8

In [16]:
fitness_matrix = np.array([[13.64835462166776, 23.816542025827633, 9.719735693876222,
        170.838052435609, 19.860928825226598, 46.450322891924564,
        124.13986581092472, 124.14019935483357, 78.9331824229044,
        170.9534111307339, 183.84591839423396, 90.03683663969154,
        130.7502731189885, 20.29092374108581, 46.04899099414209,
        100.28791375656515, 10.499833937512848, 208.3136403059711,
        118.4893074023224, 97.40614054114535, 158.34022515904763,
        83.22651096191899, 0.10011092260757369, 172.1905606869142,
        63.793308105494496],
       [4.82755281297902, 28.669615943870834, 13.44507725326504,
        177.81334526143814, 23.172028191303983, 47.67406259659447,
        97.06825620263376, 130.18066987038625, 82.38538498698591,
        169.06968291467885, 159.40377166681301, 71.12614955471501,
        128.0181987901255, 24.49918841260679, 38.201558142922146,
        113.4879171484064, 14.656210513721284, 218.14876530077615,
        121.7287127967212, 98.1365393924705, 156.4750260890247,
        81.68115797260988, 23.364611227477965, 79.28701045546785,
        73.53820226673254],
       [0.3042161392141907, 20.03870697003718, 17.606110344888112,
        172.65749396963446, 22.248832517855213, 76.64728652244968,
        111.89713217187239, 130.8313494447441, 80.35861842539043,
        166.5914265185078, 161.68354338234604, 75.44992117025579,
        109.55233545394434, 8.817982502121252, 86.79515533966543,
        105.35768853283327, 58.816656008133776, 211.29340658076467,
        121.36353233969761, 94.43109127684689, 144.27997146614058,
        82.5379942583087, 57.6024849630065, 205.20804096973083,
        76.99276945752548],
       [14.552334465403726, 18.072999641029323, 17.2688674555898,
        172.96809894261395, 25.01821343073793, 112.98047539583388,
        106.11429392438131, 123.17797581837384, 81.82500235230087,
        162.8118938347244, 107.35390032665582, 80.53073308650322,
        88.91295740520977, 39.36573610296652, 29.464297818244226,
        97.17552526673653, 31.346047013211322, 213.20207440718926,
        122.9125876319281, 98.07839849907063, 155.94406802124934,
        81.3789018541996, 60.35346808161573, 153.13806122257003,
        69.26923432231224],
       [9.474326530422193, 25.203614242125617, 16.445011790629415,
        170.46254888973834, 21.100583483155972, 63.05648501909296,
        105.02911482962426, 127.37410754771149, 81.84639609494938,
        164.4395847966947, 143.2918862861484, 82.98741818518415,
        131.27025424727765, 47.997269815333496, 43.812272021448194,
        95.7055473556953, 47.63877311480782, 219.63731423414237,
        119.86577404753162, 94.25654752898117, 158.39687711891452,
        82.93804996065943, 83.7412460429174, 195.89045925232148,
        50.128923370147675]], dtype=object)

In [25]:
efficiency_matrix = np.array([[192.34874374834308, 421.11920183307427, 143.09633930793447,
        6125.897972276399, 429.1510732354531, 484.5020075149803,
        1968.7313141253737, 1457.5942231921101, 1178.726162991685,
        2861.2147499740117, 1825.8444926201576, 3027.8542119198355,
        1340.7162023049239, 431.9440599081178, 186.26235838171698,
        2849.1892400353777, 117.08314652257623, 2874.894440924962,
        2828.9551770890052, 437.86237975027257, 854.456901749037,
        2670.497415930324, 1.3609986910967329, 2159.1975476486477,
        1139.9596914519072],
       [68.03557972336941, 506.930257551699, 197.9417339369379,
        6376.017495231546, 500.6966620166025, 497.26627494467846,
        1539.4032718283481, 1528.5185085935339, 1230.2786451965464,
        2829.6871488511697, 1583.1001370213714, 2391.9055749227155,
        1312.7014515037952, 521.5277058070997, 154.52048264124028,
        3224.202601303974, 163.43070311931316, 3010.6270128863416,
        2906.296608666948, 441.1456858892013, 844.3916626923404,
        2620.911519357795, 317.6397187271135, 994.2259195445497,
        1314.0968677667945],
       [4.287373374144518, 354.32029871665094, 259.2014864552682,
        6191.139369289883, 480.74843011097073, 799.4726813224984,
        1774.5740792334916, 1536.1584736786726, 1200.0125049105814,
        2788.2090425762417, 1605.7414263553628, 2537.309951465674,
        1123.3520790414054, 187.71324611765436, 351.0754520491041,
        2993.2220272464915, 655.8617207044626, 2916.017592947447,
        2897.5778544846753, 424.48886814717434, 778.5830623871234,
        2648.4049113117244, 783.1004309677606, 2573.222928434884,
        1375.8285362735264],
       [205.0886960822596, 319.5630656753675, 254.2365136992061,
        6202.277018935422, 540.5886722980246, 1178.4475054487689,
        1682.8641787274153, 1446.2962594748594, 1221.9103309779398,
        2724.9517224015653, 1066.1728548816423, 2708.17288195873,
        911.7154384796477, 838.0000876528364, 119.17936704379885,
        2760.7659847909326, 349.53827243297707, 2942.358731797984,
        2934.561849788767, 440.884329574256, 841.5264350782311,
        2611.21299692985, 820.499790859117, 1920.287180228516,
        1237.8121989627903],
       [133.52340677719155, 445.6451277201654, 242.10750798475954,
        6112.43319450917, 455.93728910547804, 657.7132660556842,
        1665.6543480954379, 1495.565210227495, 1222.229808330937,
        2752.1940766663033, 1423.0868093121808, 2790.7888933715813,
        1346.04810033435, 1021.7442957789701, 177.21511235315373,
        2719.0037714758296, 531.2176826748964, 3031.170175817457,
        2861.818584998322, 423.7042549761112, 854.7626147038926,
        2661.24153881188, 1138.4544591800388, 2456.3843542761842,
        895.7828605368715]], dtype=object)


mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)



平均值： 1536.9433777496843
标准差： 1364.463208046991


In [19]:
none_coords = np.argwhere(efficiency_matrix == None)
print(none_coords)

[]


In [20]:
efficiency_matrix_select = efficiency_matrix[:10, :50]
efficiency_matrix_select.shape

efficiency_matrix_select

array([[192.34874374834308, 421.11920183307427, 143.09633930793447,
        6125.897972276399, 429.1510732354531, 484.5020075149803,
        1968.7313141253737, 1457.5942231921101, 1178.726162991685,
        2861.2147499740117, 1825.8444926201576, 3027.8542119198355,
        1340.7162023049239, 431.9440599081178, 186.26235838171698,
        2849.1892400353777, 117.08314652257623, 2874.894440924962,
        2828.9551770890052, 437.86237975027257, 854.456901749037,
        2670.497415930324, 1.3609986910967329, 2159.1975476486477,
        1139.9596914519072],
       [68.03557972336941, 506.930257551699, 197.9417339369379,
        6376.017495231546, 500.6966620166025, 497.26627494467846,
        1539.4032718283481, 1528.5185085935339, 1230.2786451965464,
        2829.6871488511697, 1583.1001370213714, 2391.9055749227155,
        1312.7014515037952, 521.5277058070997, 154.52048264124028,
        3224.202601303974, 163.43070311931316, 3010.6270128863416,
        2906.296608666948, 441.14568

# print coarse optimization info

In [None]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [21]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)


top_k = max(1, int(len(sorted_values) * 0.1))

efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [22]:
coarse_best = efficiency_coarse_best
coarse_best

[(1, 3),
 (3, 3),
 (2, 3),
 (0, 3),
 (4, 3),
 (1, 15),
 (4, 17),
 (0, 11),
 (1, 17),
 (2, 15),
 (3, 17),
 (3, 18)]

# enter fine optimization stage

In [23]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_cheetah_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()

        # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTCheetah.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_cheetah_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
        
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTCheetah.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTCheetahEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_cheetah_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
        
            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

[-0.5, 0.5, 0.8, 0.2, 0.3, -0.4, 0.4, -0.5, 0.5, -0.6, -0.3, -0.5, 0.4, -0.6, 0.6, -0.7, 0.05, 0.04, 0.04, 0.03, 0.03, 0.04, 0.03, 0.03]
Successfully saved GPTCheetah_refine_1_3_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[-0.5, 0.5, 0.8, 0.2, 0.35, -0.4, 0.35, -0.5, 0.5, -0.6, 0.35, -0.4, 0.4, -0.5, 0.5, -0.6, 0.05, 0.04, 0.04, 0.03, 0.03, 0.04, 0.03, 0.03]
Successfully saved GPTCheetah_refine_3_3_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[-0.5, 0.5, 0.8, 0.2, 0.3, -0.4, 0.4, -0.5, 0.5, -0.6, -0.3, -0.5, 0.4, -0.6, 0.6, -0.7, 0.05, 0.04, 0.04, 0.03, 0.03, 0.04, 0.03, 0.03]
Successfully saved GPTCheetah_refine_2_3_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [None]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [24]:
final_optimized_results

[{'best_morphology': 'results/noDiv_m25_r5/assets/GPTCheetah_3.xml',
  'best_parameter': [-0.5,
   0.5,
   0.8,
   0.2,
   0.3,
   -0.4,
   0.4,
   -0.5,
   0.5,
   -0.6,
   -0.3,
   -0.5,
   0.4,
   -0.6,
   0.6,
   -0.7,
   0.05,
   0.04,
   0.04,
   0.03,
   0.03,
   0.04,
   0.03,
   0.03],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_1.py',
  'best_fitness': 177.81334526143814,
  'best_material': 0.02788783835590477,
  'best_efficiency': 6376.017495231546,
  'best_iteration': 0},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTCheetah_3.xml',
  'best_parameter': [-0.5,
   0.5,
   0.8,
   0.2,
   0.3,
   -0.4,
   0.4,
   -0.5,
   0.5,
   -0.6,
   -0.3,
   -0.5,
   0.4,
   -0.6,
   0.6,
   -0.7,
   0.05,
   0.04,
   0.04,
   0.03,
   0.03,
   0.04,
   0.03,
   0.03],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 172.96809894261395,
  'best_material': 0.02788783835590477,
  'best_efficiency': 6202.277018935422,
  'best_iter