In [5]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTAnt import GPTAntEnv

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4-turbo"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTAnt_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        # env_path = os.path.join(os.path.dirname(__file__), "env", "ant_v5.py")
        # with open(env_path, "r") as f:
        #     env_content = f.read().rstrip()

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = ant_design(parameter)  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_ant_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = ant_design(initial_parameter['parameters'])  

        filename = f"GPTAnt_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_ant_volume(diverse_parameter['parameters']))
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = ant_design(diverse_parameter['parameters'])  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts
        for reward_content, fitness in zip(rewardfunc_list, fitness_list):
            reward_improve_prompts = reward_improve_prompts + f"reward function:{reward_content} \n" + f"fintess:{fitness}"
        reward_improve_prompts = reward_improve_prompts + f"best reward function:{best_rewardfunc} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path
    
    

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = ant_design(parameter)  
        filename = f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [7]:

folder_name = "results/Random_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()



# return file list of morphology and reward function: [GPTAnt_{i}.xml] and [GPTAnt_{j}.py]



In [8]:
logging.info(f"start!")

# print configuration info

In [7]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

params: [0.2, 0.3, 0.06, 0.2, 0.15, 0.1, 0.05, 0.02, 0.02, 0.015]
params: [0.25, 0.35, 0.08, 0.25, 0.2, 0.15, 0.1, 0.03, 0.025, 0.02]
params: [0.15, 0.2, 0.05, 0.15, 0.1, 0.08, 0.04, 0.015, 0.015, 0.01]
params: [0.3, 0.45, 0.1, 0.4, 0.3, 0.25, 0.15, 0.05, 0.04, 0.03]
params: [0.18, 0.25, 0.04, 0.12, 0.09, 0.06, 0.03, 0.02, 0.018, 0.015]
params: [0.35, 0.5, 0.1, 0.25, 0.05, 0.15, 0.02, 0.04, 0.03, 0.025]
params: [0.45, 0.6, 0.15, 0.35, 0.2, 0.25, 0.1, 0.07, 0.06, 0.05]
params: [0.2, 0.1, 0.3, 0.2, 0.4, 0.15, 0.35, 0.015, 0.025, 0.02]
params: [0.22, 0.18, 0.07, 0.1, 0.05, 0.12, 0.06, 0.025, 0.02, 0.015]
params: [0.1, 0.15, 0.02, 0.3, 0.1, 0.05, 0.02, 0.01, 0.015, 0.01]
params: [0.4, 0.25, 0.06, 0.2, 0.05, 0.3, 0.15, 0.03, 0.025, 0.02]
params: [0.25, 0.1, 0.2, 0.15, 0.3, 0.1, 0.2, 0.02, 0.03, 0.02]
params: [0.3, 0.15, 0.1, 0.05, 0.2, 0.08, 0.04, 0.02, 0.015, 0.01]
params: [0.5, 0.2, 0.12, 0.25, 0.08, 0.3, 0.1, 0.04, 0.03, 0.025]
params: [0.15, 0.25, 0.08, 0.12, 0.07, 0.15, 0.09, 0.015, 0.

In [8]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_0.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_1.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_2.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_3.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_4.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_5.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_6.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_7.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_8.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_9.py


In [9]:
efficiency_matrix.shape

(10, 50)

# enter coarse optimization stage

In [9]:
morphology_list = [f'results/Random_m25_r5/assets/GPTAnt_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/Random_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list = [[0.1418985963760664, 0.24067589283686852, -0.002487863169200605, 0.11703047852431661, 0.1521769854171877, 0.3473240691588858, 0.4198248765815852, 0.08460476105730537, 0.24297941711040827, 0.14085526853463573], [0.38443790953667273, 0.3043376886535561, 0.1663941343958204, 0.09341513572140314, 0.1378217394071231, 0.2370761872054709, 0.46344531398083644, 0.18837501990610162, 0.1371871690957255, 0.09640786418983754], [0.31731447451395395, 0.17321644557806182, 0.013564286048548424, 0.07182404675869253, 0.21844181850779076, 0.33998381471015704, 0.4997400572040301, 0.06580079909123056, 0.04533983489425701, 0.05454148220248588], [0.20722537062234048, 0.08492045614623132, 0.2750487982373061, 0.22113955462420537, 0.1419815178126996, 0.19113764652003523, 0.4916347255172901, 0.054542516990266195, 0.14906580411023362, 0.026655300988224703], [0.18618770675834923, 0.31912034945155693, 0.18698237454091582, 0.2623311553853732, 0.14651671953895085, 0.430549730182229, 0.3232438421326786, 0.18037310243326388, 0.038858827037886914, 0.14488125604946656], [0.23544110140293162, 0.17442004167723196, 0.19957154811075806, 0.2185207744243966, 0.3340628784940034, 0.36657122240638274, 0.40234308453326745, 0.03911940479789043, 0.02133212735206609, 0.030926224294213808], [0.14144029040112155, 0.2868150449583742, 0.15488401824683928, 0.18831519172978836, 0.3529218294977929, 0.4792179460954108, 0.4084269747918979, 0.006422475051009749, 0.03958199178263045, 0.11255060989081953], [0.24347862512174157, 0.22143661251987906, 0.12011712560319501, 0.19716656976321753, 0.12955779047127758, 0.23253422504904508, 0.2849956694374818, 0.026041012846232184, 0.03480552911627784, 0.11052368473684478], [0.39265102824734155, 0.26474538516437673, 0.1688581768353017, 0.3937117745426201, 0.2774426752763817, 0.42309062452490803, 0.4711890069102859, 0.08399604280398205, 0.12636056485601682, 0.0925740363560684], [0.16852360646358253, 0.1456526383792233, 0.33748197320762996, 0.11365154226873764, 0.267767805853173, 0.32365854863099297, 0.2506765585613514, 0.03421787811314476, 0.1500991383303097, 0.13585936491592526], [0.2378826021092336, 0.33173177233826245, 0.2523463529869027, 0.26878999598064646, 0.14307218993901427, 0.39022474128980095, 0.28631476350705, 0.047318396061365606, 0.030534209113116266, 0.05291543961626083], [0.2040454952436318, 0.17644785009675718, 0.2385423544466968, 0.07102498964695728, 0.018418936403631403, 0.47679722542568603, 0.4068482227132462, 0.21486974663854114, 0.2591377979425282, 0.2625483498588877], [0.35014955466233455, 0.18622486833843524, 0.20898314126318956, 0.18134603021577295, 0.16634711245663297, 0.41298237011397343, 0.33667690337158696, 0.026201102191661566, 0.07079478797372184, 0.006744156817945601], [0.32909159759707424, 0.10829026561940151, 0.05080535887025045, 0.25345382047387677, 0.42924619274672443, 0.42151734081393966, 0.5010799837616928, 0.0710309138332388, 0.23608445008141382, 0.060965078855256025], [0.21092274142269568, 0.3313535750949689, 0.18524608839163098, 0.4135898530033844, 0.23134133638794183, 0.5807537518949656, 0.36823476665049254, 0.1316766371409342, 0.00693388135313322, 0.02082908987118387], [0.16070108187694968, 0.3566778813295166, 0.2564514442247764, 0.07806930813259792, 0.10529202813378158, 0.4986314442125751, 0.38067324663014995, 0.09348926309068255, 0.10735592835199965, 0.14479770583765966], [0.17760684208465496, 0.3421497522002473, 0.27122950413211677, 0.18339434492518145, 0.20726449815381856, 0.33003767694314495, 0.4924527970425448, 0.12075677308029811, 0.09954337914774704, 0.03282864434008144], [0.4569488596293795, -0.03380807015570961, 0.36308182676208306, 0.04637584145758622, 0.09915643500200577, 0.4802616679274313, 0.4858413847017101, 0.04554267139519268, 0.19383226795082892, 0.0893880873332293], [0.20228354519414626, 0.17873301605955386, 0.18023732096305764, 0.2648117204293425, 0.30581995622077535, 0.5100005405020374, 0.4409851035221484, 0.018981691786824698, 0.0030906138676422418, 0.18283055654880065], [0.24369388701135133, 0.3259489609167783, 0.3413607059607338, 0.0734295802671241, 0.19289994169134966, 0.40106971322225565, 0.539680245687538, 0.24980960867978735, 0.03223817422246501, 0.19600610293637777], [0.17622851340299428, 0.28054399475121794, 0.12573330986775044, 0.28665919010066876, 0.32704936596542267, 0.26046023668905505, 0.30901497836515807, 0.016160627558811744, 0.08117613306721512, 0.1396083586000371], [0.14386461574470294, 0.1359917387307294, 0.07200941641921937, 0.09738594661011518, 0.13213504786397606, 0.5041570592400195, 0.4014641510621919, 0.09376192737433635, 0.18785199073574427, 0.06537629366196611], [0.2065574993038762, 0.11141287379847431, 0.1650858323211418, 0.22841579202123255, 0.30262110338612064, 0.3573558905417927, 0.2798977157903329, 0.24470176437549174, 0.1045417027350808, 0.07088473237192934], [0.16403321210479138, 0.07413850913173861, 0.14835685137665294, 0.13151181056242128, 0.08355760138812032, 0.42106725041863285, 0.49994757935619927, 0.05997872786778154, 0.2466993532116712, 0.09825113257832332], [0.4478171572214204, 0.16822696933271705, 0.15700929122947943, 0.3216764799605139, 0.1146609519187504, 0.3496766259437236, 0.4987401860976383, 0.12607427017864725, 0.14509920552352634, 0.05957292594946533]]

material_list = [compute_ant_volume(parameter) for parameter in parameter_list]
len(material_list)

params: [0.1418985963760664, 0.24067589283686852, -0.002487863169200605, 0.11703047852431661, 0.1521769854171877, 0.3473240691588858, 0.4198248765815852, 0.08460476105730537, 0.24297941711040827, 0.14085526853463573]
params: [0.38443790953667273, 0.3043376886535561, 0.1663941343958204, 0.09341513572140314, 0.1378217394071231, 0.2370761872054709, 0.46344531398083644, 0.18837501990610162, 0.1371871690957255, 0.09640786418983754]
params: [0.31731447451395395, 0.17321644557806182, 0.013564286048548424, 0.07182404675869253, 0.21844181850779076, 0.33998381471015704, 0.4997400572040301, 0.06580079909123056, 0.04533983489425701, 0.05454148220248588]
params: [0.20722537062234048, 0.08492045614623132, 0.2750487982373061, 0.22113955462420537, 0.1419815178126996, 0.19113764652003523, 0.4916347255172901, 0.054542516990266195, 0.14906580411023362, 0.026655300988224703]
params: [0.18618770675834923, 0.31912034945155693, 0.18698237454091582, 0.2623311553853732, 0.14651671953895085, 0.430549730182229, 

25

In [10]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [14]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [0] or j not in [12]:
        #     continue
        # if i not in [1]:
        #     continue
        # if j not in [10]:
        #     continue
        if i not in [0] or j <17:
            continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTAnt.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew

        env_name = "GPTAntEnv"
        # model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        model_path = f"results/Random_m25_r5/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Random_m25_r5/env/GPTrewardfunc_0.py
17 results/Random_m25_r5/assets/GPTAnt_17.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Random_m25_r5/env/GPTrewardfunc_0.py
18 results/Random_m25_r5/assets/GPTAnt_18.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Random_m25_r5/env/GPTrewardfunc_0.py
19 results/Random_m25_r5/assets/GPTAnt_19.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
5

In [15]:
efficiency_matrix

array([[14.552962917760262, 4.633590463340446, 78.03991675212843,
        4.0776970552132035, 11.424204140586358, 24.265814133087538,
        -4.050142901290013, -1.0272759944902874, 13.584684930726016,
        1.4563486122009868, 11.431442024562298, 3.9111544446101956,
        4.264662741773204, 9.899803279431548, 5.507655031522047,
        16.259627660970338, 21.35977152305919, 5.7481199284901825,
        22.246178256094453, 11.594593702507002, 13.179450420758721,
        4.2529197225677695, 2.5043586804833553, 11.93669251701015,
        13.682809885903723],
       [9.746922900931724, 5.420153605286378, 54.351072686306175,
        1.3378909716904235, 14.781524221371198, 28.512990090885378,
        0.6610118194132797, 12.524570071445915, 11.63572857756077,
        10.930432121440086, 7.369699326705191, 3.1871414770251585,
        10.27530101351043, 11.011577742002492, 11.035527349203997,
        20.07473783372044, 18.543253030095727, 7.426398744476952,
        17.259430136801154, 8.19

In [16]:
none_coords = np.argwhere(efficiency_matrix == None)
print(none_coords)

[[ 0 17]
 [ 0 18]
 [ 0 19]
 [ 0 20]
 [ 0 21]
 [ 0 22]
 [ 0 23]
 [ 0 24]]


# print coarse optimization info

In [15]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

In [2]:
fitness_matrix

NameError: name 'fitness_matrix' is not defined

In [23]:
efficiency_matrix = np.array([[14.552962917760262, 4.633590463340446, 78.03991675212843,
        4.0776970552132035, 11.424204140586358, 24.265814133087538,
        -4.050142901290013, -1.0272759944902874, 13.584684930726016,
        1.4563486122009868, 11.431442024562298, 3.9111544446101956,
        4.264662741773204, 9.899803279431548, 5.507655031522047,
        16.259627660970338, 21.35977152305919, 0, 0, 0, 0,
        0, 0, 0, 0],
       [9.746922900931724, 5.420153605286378, 54.351072686306175,
        1.3378909716904235, 14.781524221371198, 28.512990090885378,
        0.6610118194132797, 12.524570071445915, 11.63572857756077,
        10.930432121440086, 7.369699326705191, 3.1871414770251585,
        10.27530101351043, 11.011577742002492, 11.035527349203997,
        20.07473783372044, 18.543253030095727, 7.426398744476952,
        17.259430136801154, 8.193586980139164, 17.871192505490917,
        7.89887931805134, 1.8212478460965105, 17.383699995278207,
        9.841680586634007],
       [8.927670690091745, 2.4550878237905724, 68.03489247825601,
        4.02883894625473, 12.425977920482692, 56.99380368947245,
        -10.877440096245131, 18.15689727968561, 11.220526660993384,
        5.503154500467815, 1.4651785421756447, 4.031356146889846,
        11.277285544435834, 7.492532333190024, 17.47616971514052,
        19.11242440527499, 30.197733210592755, 6.031831484204279,
        8.259324222723798, 11.093143184267573, 19.052805696508617,
        7.928189486737261, 2.7274833079280856, 21.206660545569644,
        14.926552894110564],
       [11.749151422554144, 3.9554721548482723, 73.38585477587628,
        1.753240964163793, 6.732427262972059, 33.94682807800021,
        6.171584645996037, 14.00276367043056, 16.064040336587503,
        4.050295705888182, 8.59551527718633, 5.341822123867529,
        11.471380802699692, 14.071351598244046, 12.721536282239297,
        20.275579808170516, 13.810275298875691, 4.491313161438622,
        7.026407761195367, 13.049926625200978, -5.576403894643957,
        3.96289418332857, 2.009367603853531, 11.527228170574519,
        17.759333473777502],
       [11.730776069361811, 4.19214999232722, 49.94925007457955,
        4.911573102975302, 7.6240790517770245, 8.691511192468662,
        25.252735802590887, 4.471228631158522, 12.080176088221133,
        6.733954420236201, 3.1566988390182726, 3.3129626739930265,
        6.510684405085069, 10.048928818069246, 9.506740582408272,
        17.440156048552645, 10.36400008225866, 7.503883775689197,
        3.4356335458561587, 9.665396917333446, 11.877520408196839,
        3.3115850821832455, 1.792706750991329, 20.40597019687417,
        14.67330572519078]], dtype=object)

mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)

平均值： 11.83062755160381
标准差： 13.850507984505606


In [44]:
fitness_matrix = np.array([[2.4132899493222233, 5.440050924635711, 3.7819453344579803,
        4.22824470899625, 2.833036622697607, 2.2942923750968047,
        1.876100466692908, 3.8190123150263555, 1.9523307183135512,
        12.757222314364817, 8.583491415654834, 6.764749405761416,
        2.0904823742968124, 5.721396718287495, 5.9593050200173465,
        3.89643885841512, 5.755116415440224, 3.2573764453691556,
        1.584933094009707, 23.47093595529263, 5.333074400554898,
        2.1106456525677317, 1.524998593937737, 1.5985951995161458,
        7.276399654616171],
       [2.457614937281347, 6.500053273163939, 4.534068182416721,
        2.5356829217567953, 3.392255389429215, 2.6420048475723927,
        1.925904380407692, 4.239615291633646, 1.5706638864672726,
        5.019223500610926, 6.973152087439853, 2.991034149981143,
        1.6801292143234041, 5.150139071380483, 6.414649428563153,
        3.33109405915313, 6.651328026170122, 3.3467608058064093,
        1.0753269665576277, 22.293478756040336, 3.0877055552417327,
        0.9102890827193313, 2.8086268750346477, 1.1530549435331876,
        4.343282657781613],
       [3.1590904658307464, 5.2114514020718365, 4.304751192813313,
        3.3751835527234264, 4.12975486733327, 1.6491448530745823,
        1.7241058587282618, 3.6142225373309516, 10.648913713171394,
        7.274632159504679, 8.253882593079046, 3.9715659626142217,
        2.59652987918389, 6.095753132566353, 4.754331398626368,
        2.8663196789658723, 7.645400946702129, 3.574054197538739,
        1.2672612769732512, 14.114431655770657, 3.878756445687669,
        1.2780047721464514, 1.643720663205062, 1.2651183771925134,
        7.826755621848812],
       [3.2513279520985474, 6.276415980808591, 3.9043544395753376,
        3.8108178297243342, 3.4668906856444623, 3.1467451133420443,
        1.7560260784514452, 4.531841316126064, 1.5679365016292355,
        8.803121218361442, 5.787253120888878, 6.573461078435954,
        2.0458403476250946, 5.689971849326319, 8.03982392855181,
        2.164236774706171, 5.574217082539855, 2.721823341832421,
        2.054963300153421, 27.39167754436603, 6.409123521972624,
        1.3698995003014238, 3.0041615957352854, 1.7238207383084188,
        9.09216542762348],
       [3.025648213015101, 6.587443981866637, 4.168057097532286,
        2.6445077025169814, 3.4594316956540263, 2.004384315718356,
        3.5003981074384694, 4.332574595050536, 2.8343777594318365,
        11.389473406729676, 6.764241949652532, 7.242075534787578,
        2.1087160835038956, 6.465628653274422, 10.477328786500182,
        2.6712791539846537, 4.955981948689203, 3.542358864369053,
        1.2751494817772229, 26.65869244882609, 6.092513412339092,
        1.9509484013476233, 2.468800282419986, 1.4398889767156033,
        8.551347442460118]], dtype=object)

# configuration of fine optimization

In [16]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix)):
    for j in range(len(efficiency_matrix[0])):
        value = efficiency_matrix[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.1))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [17]:
coarse_best = efficiency_coarse_best
coarse_best

[(0, 2),
 (3, 2),
 (2, 2),
 (2, 5),
 (1, 2),
 (4, 2),
 (3, 5),
 (2, 16),
 (1, 5),
 (4, 6),
 (0, 5),
 (0, 18)]

In [18]:
efficiency_matrix_select = efficiency_matrix

# enter fine optimization stage

In [None]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTAnt.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTAnt.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

In [20]:
logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [21]:
final_optimized_results

[{'best_morphology': 'results/Random_m25_r5/assets/GPTAnt_2.xml',
  'best_parameter': [0.31731447451395395,
   0.17321644557806182,
   0.013564286048548424,
   0.07182404675869253,
   0.21844181850779076,
   0.33998381471015704,
   0.4997400572040301,
   0.06580079909123056,
   0.04533983489425701,
   0.05454148220248588],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_0.py',
  'best_fitness': None,
  'best_material': 0.18087342405801293,
  'best_efficiency': 78.03991675212843,
  'best_iteration': 0},
 {'best_morphology': 'results/Random_m25_r5/assets/GPTAnt_2.xml',
  'best_parameter': [0.31731447451395395,
   0.17321644557806182,
   0.013564286048548424,
   0.07182404675869253,
   0.21844181850779076,
   0.33998381471015704,
   0.4997400572040301,
   0.06580079909123056,
   0.04533983489425701,
   0.05454148220248588],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': None,
  'best_material': 0.18087342405801293,
  'best_efficiency':

In [22]:
best_efficiency

74.53551454779625