In [4]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTAnt import GPTAntEnv

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4-turbo"
        
    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTAnt_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        # env_path = os.path.join(os.path.dirname(__file__), "env", "ant_v5.py")
        # with open(env_path, "r") as f:
        #     env_content = f.read().rstrip()

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = ant_design(parameter)  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_ant_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = ant_design(initial_parameter['parameters'])  

        filename = f"GPTAnt_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_ant_volume(diverse_parameter['parameters']))
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = ant_design(diverse_parameter['parameters'])  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts

        for rewardfunc_file, fitness in zip(rewardfunc_list, fitness_list):
            with open(rewardfunc_file, "r") as fp:
                reward_content = fp.read()
            reward_improve_prompts += f"\nreward function:\n{reward_content}\nfitness: {fitness}\n"

        with open(best_rewardfunc, "r") as fp:
            best_reward_content = fp.read()
        reward_improve_prompts += f"\nbest reward function:\n{best_reward_content}\nbest fitness: {max(fitness_list)}\n"

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content":reward_improve_prompts+ zeroshot_rewardfunc_format}
        ]
        print(messages)
        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTrewardfunc_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path
    
    

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = ant_design(parameter)  
        filename = f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [6]:

folder_name = "results/Div_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 26
rewardfunc_nums = 6

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()



# return file list of morphology and reward function: [GPTAnt_{i}.xml] and [GPTAnt_{j}.py]



In [7]:
logging.info(f"start!")

# print configuration info

# enter coarse optimization stage

In [13]:
morphology_list = [f'results/Div_m25_r5/assets/GPTAnt_{i}.xml' for i in range(0,26) ]
rewardfunc_list = [f'results/Div_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,6)]

parameter_list = [[0.3, 0.1, 0.05, 0.15, 0.1, 0.1, 0.1, 0.03, 0.03, 0.03],
 [0.25, 0.2, 0.1, 0.1, 0.2, 0.15, 0.15, 0.02, 0.02, 0.02],
 [0.15, 0.25, 0.15, 0.25, 0.1, 0.2, 0.1, 0.04, 0.04, 0.04],
 [0.2, 0.05, 0.1, 0.05, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01],
 [0.4, 0.15, 0.2, 0.3, 0.15, 0.25, 0.2, 0.05, 0.05, 0.05],
 [0.1, 0.3, 0.2, 0.2, 0.3, 0.15, 0.1, 0.02, 0.02, 0.02],
 [0.5, 0.4, 0.3, 0.35, 0.25, 0.3, 0.25, 0.08, 0.06, 0.06],
 [0.2, 0.1, 0.2, 0.15, 0.1, 0.2, 0.15, 0.025, 0.025, 0.025],
 [0.35, 0.07, 0.12, 0.4, 0.2, 0.5, 0.3, 0.015, 0.015, 0.015],
 [0.45, 0.2, 0.05, 0.05, 0.2, 0.1, 0.05, 0.025, 0.025, 0.025],
 [0.6, 0.25, 0.2, 0.3, 0.45, 0.35, 0.4, 0.1, 0.08, 0.08],
 [0.15, 0.07, 0.08, 0.1, 0.05, 0.12, 0.06, 0.02, 0.02, 0.02],
 [0.5, 0.35, 0.25, 0.2, 0.3, 0.15, 0.2, 0.05, 0.05, 0.04],
 [0.1, 0.15, 0.05, 0.4, 0.15, 0.3, 0.2, 0.04, 0.04, 0.02],
 [0.25, 0.5, 0.15, 0.1, 0.25, 0.18, 0.12, 0.03, 0.02, 0.02],
 [0.2, 0.05, 0.1, 0.5, 0.05, 0.25, 0.05, 0.015, 0.015, 0.015],
 [0.35, 0.2, 0.2, 0.35, 0.2, 0.1, 0.1, 0.06, 0.06, 0.06],
 [0.05, 0.4, 0.2, 0.25, 0.2, 0.15, 0.15, 0.01, 0.01, 0.01],
 [0.1, 0.2, 0.3, 0.15, 0.25, 0.2, 0.2, 0.01, 0.015, 0.02],
 [0.55, 0.1, 0.05, 0.05, 0.1, 0.08, 0.03, 0.02, 0.02, 0.02],
 [0.3, 0.08, 0.04, 0.2, 0.1, 0.15, 0.07, 0.025, 0.03, 0.035],
 [0.4, 0.12, 0.18, 0.22, 0.08, 0.3, 0.15, 0.02, 0.015, 0.01],
 [0.25, 0.4, 0.3, 0.4, 0.3, 0.12, 0.12, 0.05, 0.04, 0.04],
 [0.2, 0.3, 0.15, 0.1, 0.05, 0.15, 0.2, 0.03, 0.02, 0.02],
[0.08, 0.32, 0.27, 0.16, 0.04, 0.14, 0.045, 0.02, 0.015, 0.015],
[0.25, 0.2, 0.2, 0.2, 0.2,0.4,0.4, 0.08, 0.08, 0.08 ]
                 ]


material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

params: [0.3, 0.1, 0.05, 0.15, 0.1, 0.1, 0.1, 0.03, 0.03, 0.03]
params: [0.25, 0.2, 0.1, 0.1, 0.2, 0.15, 0.15, 0.02, 0.02, 0.02]
params: [0.15, 0.25, 0.15, 0.25, 0.1, 0.2, 0.1, 0.04, 0.04, 0.04]
params: [0.2, 0.05, 0.1, 0.05, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01]
params: [0.4, 0.15, 0.2, 0.3, 0.15, 0.25, 0.2, 0.05, 0.05, 0.05]
params: [0.1, 0.3, 0.2, 0.2, 0.3, 0.15, 0.1, 0.02, 0.02, 0.02]
params: [0.5, 0.4, 0.3, 0.35, 0.25, 0.3, 0.25, 0.08, 0.06, 0.06]
params: [0.2, 0.1, 0.2, 0.15, 0.1, 0.2, 0.15, 0.025, 0.025, 0.025]
params: [0.35, 0.07, 0.12, 0.4, 0.2, 0.5, 0.3, 0.015, 0.015, 0.015]
params: [0.45, 0.2, 0.05, 0.05, 0.2, 0.1, 0.05, 0.025, 0.025, 0.025]
params: [0.6, 0.25, 0.2, 0.3, 0.45, 0.35, 0.4, 0.1, 0.08, 0.08]
params: [0.15, 0.07, 0.08, 0.1, 0.05, 0.12, 0.06, 0.02, 0.02, 0.02]
params: [0.5, 0.35, 0.25, 0.2, 0.3, 0.15, 0.2, 0.05, 0.05, 0.04]
params: [0.1, 0.15, 0.05, 0.4, 0.15, 0.3, 0.2, 0.04, 0.04, 0.02]
params: [0.25, 0.5, 0.15, 0.1, 0.25, 0.18, 0.12, 0.03, 0.02, 0.02]
params: [0.2

In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

# print coarse optimization info

In [8]:
efficiency_matrix = np.array([[70.12901472277564, 3.8950108978919857, 36.38857403994179,
        304.4116553571402, 8.020361623253192, 2017.8516590285078,
        1.2415581186726266, 96.21844827615774, 6.819354811214885,
        17.44389647393199, 2.961451393360138, 48.904232779343815,
        2.677638986129011, 6.792677716341557, 0.1082644939638393,
        178.92171372457375, 0.5305996358522651, 2777.3886334384847,
        429.7795806064214, -3.9330932345003826, 9.731541015144876,
        110.05889487574825, 1.3136332680205476, 79.87286091934128,
        991.6757041414498, 15.258911788833924],
       [63.24032185908237, 7.058282876583571, 29.60644096432709,
        196.65112945445128, 10.592262912674121, 2328.240901485146,
        14.249609494065261, 91.78182496369733, 10.319916115600542,
        12.874172232006638, 4.474624645915348, 80.88527697953651,
        14.273630365535217, 15.933260766873094, 0.4474799031617528,
        195.83407065661854, 4.3861795690295775, 3859.6762180864503,
        553.0126300970579, -3.9431994809443767, 23.35683214655262,
        108.76369206964385, 1.5505922045510778, 164.08396086658882,
        3142.5781115169507, 16.029479624540965],
       [64.43158787667042, 8.141649213230023, 34.03087821710788,
        239.54529125822526, 14.986610488684708, 2909.1893716011145,
        2.1217889315831404, 96.52944382960696, 13.438467711297411,
        16.942951352754896, 3.522105055618922, 40.78230525373979,
        7.147495893693545, -0.7721329475976957, 0.15241059365665097,
        137.75465803123544, 5.304310064373623, 2629.51604113934,
        815.8128430519389, -3.9466165400134794, 24.732374620766524,
        44.57757014440089, 1.0228895460372942, 217.98712766789797,
        1864.959214669196, 18.03633557101774],
       [72.84319575651284, 12.491211315362388, 25.668535935748118,
        151.76169703159263, 11.29488863559477, 4846.415432733164,
        8.879156521089088, 78.3430023117369, 20.23957107441512,
        5.283127328548385, 6.675908657060871, 86.84222482929549,
        4.323941019522104, -8.155150851430042, 0.2709330317808642,
        242.33690702783886, 1.1519432201609145, 3927.5924351284716,
        776.8823685360268, -3.9006578615629777, 13.468037088493993,
        51.47757253378492, 1.2405526954218664, 90.24069454088942,
        2662.2264439903734, 17.47779279594377],
       [49.57189495703011, 3.798243661884369, 26.097717711919106,
        328.0945829646423, 4.146449930076461, 2983.9363184163567,
        14.596416580989677, 100.24855867883234, 15.567627652266964,
        5.166609489553714, 5.364329426378386, 36.79022401302955,
        4.897485971948941, 25.99373528155112, 0.43409586104502706,
        141.97469571998894, 0.22459944692637557, 2364.93178435668,
        197.7430958007017, -3.987572097722751, 12.964421558578492,
        105.83639972826796, 1.2150169196799276, 100.11125346604726,
        6537.446107236452, 15.376241750159544],
       [65.76430696628158, 9.332153046637826, 27.49097298587031,
        228.2097970494801, 13.050052751227259, 1552.2917121961602,
        0.9104570123156992, 48.89612689745986, 7.034496743763769,
        7.4182394798083555, 4.6163442674044255, 54.76806618536609,
        5.081310178591359, 50.756260826484144, 0.3998242407655389,
        78.19442201715779, 3.4217758073483178, 3026.527223369712,
        379.47465765581677, -4.014168091877938, 21.420420550229224,
        60.15076306164326, 1.1030938076895507, 70.36574217207183,
        2912.473029236097, 8.679565785234878]], dtype=object)


[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [65.76430696628158 9.332153046637826 27.49097298587031 228.2097970494801
  13.050052751227259 1552.2917121961602 0.9104570123156992
  48.89612689745986 7.034496743763769 7.4182394798083555
  4.6163442674044255 54.76806618536609 5.081310178591359
  50.756260826484144 0.3998242407655389 78.19442201715779
  3.4217758073483178 3026.527223369712 379.47465765581677
  -4.014168091877938 21.420420550229224 60.15076306164326
  1.1030938076895507 70.36574217207183 2912.473029236097
  8.679565785234878]]


In [7]:
fitness_matrix = np.array([[8.370409798250126, 0.2694031161199691, 1.2053973361422903,
        10.31310120379217, 2.428685533666429, 18.40640581562191,
        0.7657339535055587, 3.7940199974245616, 1.2484189790466274,
        6.7439026027340105, 3.1544368738885797, 0.7976274302844721,
        1.4960668379240454, 0.1361814876394193, 0.008067331072490797,
        6.465820806363407, 0.12091377508465335, 5.012471511630434,
        3.0494048294076372, -2.7487032997006002, 1.170796293860529,
        29.766605852576514, 0.12986853889942698, 3.1823596808905643,
        5.333683749926142, 2.779807766447567],
       [7.548194022492659, 0.4881946292979448, 0.9807343654599262,
        6.6623040353839755, 3.207495735630992, 21.237709262482284,
        8.788480901294038, 3.619078103535323, 1.8892665798378692,
        4.977223050665417, 4.7662173390834734, 1.319237864667737,
        7.975050093490848, 0.31943443289816575, 0.03334397451021867,
        7.076994637976672, 0.9995286352556212, 6.965721993081067,
        3.923777352478973, -2.7557662070083957, 2.8100475013102706,
        29.41630439378042, 0.15329479614758057, 6.537567020041629,
        16.902216860080227, 2.920186745231664],
       [7.690380317072141, 0.5631269657260631, 1.1272969890069966,
        8.115506709950298, 4.5381699482271705, 26.53699538744324,
        1.3086184228109425, 3.8062829612328115, 2.460179680414685,
        6.550234570389192, 3.751626005432334, 0.6651588930428338,
        3.9934926389057277, -0.015479935579204317, 0.01135687863089945,
        4.978137731471474, 1.2087534758156344, 4.745599548763368,
        5.788417448742299, -2.758154271816045, 2.975538252297298,
        12.05648086737562, 0.10112500500199245, 8.68522096315044,
        10.030600342445501, 3.2857877673333813],
       [8.694367116185184, 0.8639696629072092, 0.8502884670792923,
        5.141503989096024, 3.420261320151747, 44.20795196061497,
        5.476241123498292, 3.089167646685705, 3.705257367677796,
        2.042484957120455, 7.110949881453576, 1.416395610239175,
        2.4158987832031924, -0.163496727100688, 0.02018858063087549,
        8.757500601757037, 0.2625064060006432, 7.088293281449661,
        5.512194979465266, -2.7260353355043763, 1.6203320608981857,
        13.922660350083554, 0.12264364027941445, 3.595443365581854,
        14.318666741188752, 3.1840346695023825],
       [5.916767502101989, 0.2627100937890489, 0.8645054179592436,
        11.115450341612473, 1.255607095327473, 27.21882084790471,
        9.002375005629885, 3.9529325524856374, 2.849964895192011,
        1.9974385445350915, 5.7139004827812006, 0.6000480975032809,
        2.736353328356418, 0.5211296174748321, 0.03234666232696633,
        5.130640225012216, 0.05118203099814166, 4.268093076107038,
        1.4030418812960872, -2.786776699484189, 1.559742356242155,
        28.624586855280924, 0.12011911995399198, 3.988714226168625,
        35.16136366209836, 2.8011824714228344],
       [7.849450068302702, 0.6454695960500014, 0.910657987552753,
        7.731473783114316, 3.9517512824607057, 14.159668809681037,
        0.5615265504306113, 1.9280386097386324, 1.2878050029767925,
        2.8679306031393277, 4.917172239366828, 0.8932664804330582,
        2.839060713851651, 1.0175756005229313, 0.02979291181225311,
        2.8257672604141844, 0.779758979094023, 5.462102531734381,
        2.692477506866849, -2.8053637231153403, 2.577078897793551,
        16.268417539618518, 0.1090541664566055, 2.803569300346892,
        15.664606890398295, 1.5812087200637983]], dtype=object)

# configuration of fine optimization

In [10]:
efficiency_matrix_select = efficiency_matrix

# enter fine optimization stage

In [22]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果
coarse_best = [(5,25)]
for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list[morphology_index],  # 这本身已经是list结构，可以保留
            [efficiency_matrix_select[rewardfunc_index, morphology_index]],  # 👈 用 [] 包装成列表
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )


        shutil.copy(improved_morphology, "GPTAnt.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material
        iteration +=1
        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True

            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            improved = False
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            [rewardfunc_list[rewardfunc_index]],
            [efficiency_matrix_select[rewardfunc_index, morphology_index]],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTAnt.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material

        iteration +=1
        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True

            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

params: [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]
ChatCompletion(id='chatcmpl-BVtOb10ctLwLHqRLXto2rzQRRt2Mj', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "parameters": [0.25, 0.18, 0.18, 0.22, 0.22, 0.42, 0.4, 0.07, 0.07, 0.07],\n  "desciption": "Optimized for reduced material cost while maintaining effective control over leg movement with slightly longer legs and ankles."\n}', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1746940809, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_7a53abb7a2', usage=CompletionUsage(completion_tokens=84, prompt_tokens=2512, total_tokens=2596, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))
[0.2

AttributeError: 'GPTAntEnv' object has no attribute 'last_x_velocity'

In [14]:
logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [23]:
final_optimized_results

[]

In [15]:
final_optimized_results

[{'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_24.xml',
  'best_parameter': [0.08,
   0.32,
   0.27,
   0.16,
   0.04,
   0.14,
   0.045,
   0.02,
   0.015,
   0.015],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_refine_4_24_0.py',
  'best_fitness': 56.989413719003714,
  'best_material': 0.0053784556056496475,
  'best_efficiency': 10595.869501858635,
  'best_iteration': 1},
 {'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_5.xml',
  'best_parameter': [0.1, 0.3, 0.2, 0.2, 0.3, 0.15, 0.1, 0.02, 0.02, 0.02],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 44.20795196061497,
  'best_material': 0.009121783424101478,
  'best_efficiency': 4846.415432733164,
  'best_iteration': 0},
 {'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_17.xml',
  'best_parameter': [0.05, 0.4, 0.2, 0.25, 0.2, 0.15, 0.15, 0.01, 0.01, 0.01],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_refine_3_17_0.py',
  'best_fitness': 7.2556478331806

In [28]:
best_efficiency

26274.418612610392

 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_refine_4_2_2.xml',
  'best_parameter': [0.1,
   0.25,
   0.17,
   0.25,
   0.15,
   0.12,
   0.18,
   0.03,
   0.03,
   0.03],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_4.py',
  'best_fitness': 67.6709174730936,
  'best_material': 0.014709160909209498,
  'best_efficiency': 4600.5967227351775,
  'best_iteration': 3},