In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTAnt import GPTAntEnv

In [None]:
import prompts
class DGA:
    def __init__(self):
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"
        
    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTAnt_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        # env_path = os.path.join(os.path.dirname(__file__), "env", "ant_v5.py")
        # with open(env_path, "r") as f:
        #     env_content = f.read().rstrip()

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = ant_design(parameter)  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_ant_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = ant_design(initial_parameter['parameters'])  

        filename = f"GPTAnt_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_ant_volume(diverse_parameter['parameters']))
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = ant_design(diverse_parameter['parameters'])  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts
        for reward_content, fitness in zip(rewardfunc_list, fitness_list):
            reward_improve_prompts = reward_improve_prompts + f"reward function:{reward_content} \n" + f"fintess:{fitness}"
        reward_improve_prompts = reward_improve_prompts + f"best reward function:{best_rewardfunc} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path
    
    

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = ant_design(parameter)  
        filename = f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/Random_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()



# return file list of morphology and reward function: [GPTAnt_{i}.xml] and [GPTAnt_{j}.py]



In [4]:
logging.info(f"start!")

# print configuration info

In [7]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

params: [0.2, 0.3, 0.06, 0.2, 0.15, 0.1, 0.05, 0.02, 0.02, 0.015]
params: [0.25, 0.35, 0.08, 0.25, 0.2, 0.15, 0.1, 0.03, 0.025, 0.02]
params: [0.15, 0.2, 0.05, 0.15, 0.1, 0.08, 0.04, 0.015, 0.015, 0.01]
params: [0.3, 0.45, 0.1, 0.4, 0.3, 0.25, 0.15, 0.05, 0.04, 0.03]
params: [0.18, 0.25, 0.04, 0.12, 0.09, 0.06, 0.03, 0.02, 0.018, 0.015]
params: [0.35, 0.5, 0.1, 0.25, 0.05, 0.15, 0.02, 0.04, 0.03, 0.025]
params: [0.45, 0.6, 0.15, 0.35, 0.2, 0.25, 0.1, 0.07, 0.06, 0.05]
params: [0.2, 0.1, 0.3, 0.2, 0.4, 0.15, 0.35, 0.015, 0.025, 0.02]
params: [0.22, 0.18, 0.07, 0.1, 0.05, 0.12, 0.06, 0.025, 0.02, 0.015]
params: [0.1, 0.15, 0.02, 0.3, 0.1, 0.05, 0.02, 0.01, 0.015, 0.01]
params: [0.4, 0.25, 0.06, 0.2, 0.05, 0.3, 0.15, 0.03, 0.025, 0.02]
params: [0.25, 0.1, 0.2, 0.15, 0.3, 0.1, 0.2, 0.02, 0.03, 0.02]
params: [0.3, 0.15, 0.1, 0.05, 0.2, 0.08, 0.04, 0.02, 0.015, 0.01]
params: [0.5, 0.2, 0.12, 0.25, 0.08, 0.3, 0.1, 0.04, 0.03, 0.025]
params: [0.15, 0.25, 0.08, 0.12, 0.07, 0.15, 0.09, 0.015, 0.

In [8]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_0.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_1.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_2.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_3.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_4.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_5.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_6.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_7.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_8.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_9.py


In [9]:
efficiency_matrix.shape

(10, 50)

# enter coarse optimization stage

In [7]:
morphology_list = [f'results/Random_m25_r5/assets/GPTAnt_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/Random_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list =[[0.13619491465988937, 0.18097388692499644, 0.23277764494533498, 0.18254366234423364, 0.21809597994707916, 0.31858929709688905, 0.44178507573227027, 0.1442158192431316, 0.1993351975638973, 0.07526577729837026], [0.21843631248473971, 0.1965048753195906, 0.21090269422943356, 0.13082763295518235, 0.18557398959850083, 0.3047891602589721, 0.45524345746482947, 0.2088962105348458, 0.029339274946532448, 0.07236424875698348], [0.3385326970388605, 0.09212532906927472, 0.05319775873885757, 0.15386932222044858, 0.21856656575097042, 0.4663739102322223, 0.39181117130586085, 0.025346250317768, 0.07346716343054138, 0.03900791964021172], [0.21726648550791763, 0.3217424978715445, 0.06721499734083239, 0.20743559123639124, 0.11223654085373608, 0.4383838691039458, 0.4173805305519996, 0.17626510915998964, 0.11605452189301738, 0.16561181995529195], [0.20468530290311404, 0.053069605245353396, 0.2732364279867242, 0.19532200101694203, 0.2608200423352626, 0.34519477655537173, 0.46286261100631354, 0.0490441030344964, 0.1150165755284441, 0.0912709554305652], [0.07167844356659148, 0.14941397820127153, 0.2802304538884226, 0.2215252408705442, 0.36054989577473456, 0.15928173302286633, 0.4340165470285263, 0.0679976603164105, 0.07083858319246004, 0.06083871526941281], [0.22345463527593867, 0.21992922747717603, 0.3353947552745714, 0.40841862353653613, 0.177141569339203, 0.42368485326727134, 0.4988029677176171, 0.17822241075458706, 0.04503920091085181, 0.07230262626272123], [0.200633082005564, 0.09564848996948719, 0.23996256683177536, 0.23371957644990843, 0.11068508531837687, 0.4628780413515355, 0.4491295120368287, 0.1086690323485594, 0.12477798764632356, 0.0636850209712405], [0.3812449751640946, 0.10198820512900014, 0.29717488788274227, 0.24517392467148846, 0.2178818027121512, 0.18306475102209907, 0.3721007444130523, 0.029028831987031094, 0.1475561595104963, 0.07542356987737897], [0.23412631309475102, 0.2668220281463649, 0.20068375112141398, 0.2941388900051325, 0.20738744161378153, 0.34595914267120337, 0.36255388452864407, 0.14670894707413926, 0.15105302150787608, 0.06438324480239248], [0.2126312277079122, -0.02155412406282617, 0.24079933176478338, 0.1527851839381813, 0.20439045954413992, 0.39557225052357436, 0.4677610094655221, 0.1432399581377785, 0.008809173861126118, 0.016270412198067935], [0.26620068425350624, 0.17141101238154094, 0.09488008050461771, 0.15018915477866407, 0.17074096519507745, 0.4046620683841376, 0.4001831446433975, 0.04451234497492167, 0.031779123389827434, 0.04315058034087204], [0.3317484552329267, 0.12996275998220355, 0.13256296931006173, 0.10538558985282298, 0.26658077694191634, 0.43038829055441047, 0.4415197849801059, 0.030084011013545818, 0.10113878955274594, 0.08511656992334651], [0.3144409741405167, 0.2879970477668456, 0.0712877415454782, 0.23013091052762957, 0.2034170211687466, 0.40780043646845415, 0.5019579291164175, 0.03165551748822795, 0.06039305097446594, 0.20202931396189808], [0.5348072346441732, 0.1479978830242556, 0.10272421879723206, 0.3878925041356094, 0.2907027694710891, 0.3950261631674491, 0.36047907205160223, 0.015189112780361086, 0.005968768877703479, 0.04345300518440867], [0.34556740564324, 0.3747866922208457, 0.3198074172112194, 0.3881825934714416, 0.4117892028968261, 0.5288212766352727, 0.44921621942515144, 0.11011548624741019, 0.003789659156926556, 0.02414267222342091], [0.2776074515405991, 0.07570099934340502, 0.2682556969736177, 0.27186077391856556, 0.4444154568067938, 0.5269243592054318, 0.3637236536395938, 0.12138053807617757, 0.08096475747352157, 0.002168375609160819], [0.37583245177514024, 0.3369485961254882, 0.19243948685063766, 0.05982538607788271, 0.30478765725339374, 0.40562959631455325, 0.31760801644030434, 0.15845019277902128, 0.0969455096995083, 0.03636618619671307], [0.27839388343318705, 0.3024680001702259, 0.19226017016919206, 0.2528653218869201, 0.19998319348209395, 0.528474304113682, 0.4083873645038347, 0.12374959893550148, 0.14980742943630201, 0.05654809196054293], [0.42310750841982714, 0.1643474449792918, 0.2858195435728248, 0.27123207435490015, 0.2200849904704001, 0.403096963500809, 0.48272287914635126, 0.14266467123022294, 0.027661333455906427, 0.055553048916871184], [0.32839451181826185, 0.24416927295385638, 0.10718703920948632, 0.2136849776264164, 0.24018506523988034, 0.41680455336069255, 0.23252065643437803, 0.06545190699016748, 0.22235651937218442, 0.057718877120059395], [0.3641292221693235, 0.33750327578895095, 0.2370404000843654, 0.18315677727915308, 0.06063890537404368, 0.41746033386624504, 0.3706238748652991, 0.03439878747404673, 0.12381798220274126, 0.08657342672216571], [0.400004321481627, 0.15002482931949815, -0.006519919703982857, 0.23776343840867897, 0.15560635751604204, 0.17217478994440963, 0.2672702927047701, 0.08065804880815293, 0.014464397077744023, 0.057485722152429405], [0.2556547763689825, 0.19737348587971268, 0.053933402049874124, -0.014601762887632708, 0.13834342062793584, 0.40842054559697155, 0.4872583414691396, 0.12782534881472607, 0.04957941093181832, 0.06992513526464286], [0.32193899214683475, 0.2152555988796685, 0.17499362271813923, 0.07752477887820426, 0.16475460751197646, 0.31377508144586125, 0.35986518336549145, 0.16786321314606156, 0.10101440538084626, 0.10531204963812024]]  

material_list = [compute_ant_volume(parameter) for parameter in parameter_list]
len(material_list)

params: [0.13619491465988937, 0.18097388692499644, 0.23277764494533498, 0.18254366234423364, 0.21809597994707916, 0.31858929709688905, 0.44178507573227027, 0.1442158192431316, 0.1993351975638973, 0.07526577729837026]
params: [0.21843631248473971, 0.1965048753195906, 0.21090269422943356, 0.13082763295518235, 0.18557398959850083, 0.3047891602589721, 0.45524345746482947, 0.2088962105348458, 0.029339274946532448, 0.07236424875698348]
params: [0.3385326970388605, 0.09212532906927472, 0.05319775873885757, 0.15386932222044858, 0.21856656575097042, 0.4663739102322223, 0.39181117130586085, 0.025346250317768, 0.07346716343054138, 0.03900791964021172]
params: [0.21726648550791763, 0.3217424978715445, 0.06721499734083239, 0.20743559123639124, 0.11223654085373608, 0.4383838691039458, 0.4173805305519996, 0.17626510915998964, 0.11605452189301738, 0.16561181995529195]
params: [0.20468530290311404, 0.053069605245353396, 0.2732364279867242, 0.19532200101694203, 0.2608200423352626, 0.34519477655537173, 0

25

In [13]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [18]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [0] or j not in [12]:
        #     continue
        if i not in [1] or j not in [23, 24]:
            continue
        # if j < 22:
        #     continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTAnt.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew

        env_name = "GPTAntEnv"
        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/Random_m25_r5/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

1 results/Random_m25_r5/env/GPTrewardfunc_1.py
23 results/Random_m25_r5/assets/GPTAnt_23.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
1 results/Random_m25_r5/env/GPTrewardfunc_1.py
24 results/Random_m25_r5/assets/GPTAnt_24.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99


In [19]:
efficiency_matrix

array([[1.8749228737492734, 1.9850464819352198, 5.061426583626903,
        1.5054240763131213, 4.544839509801864, 9.463125820170546,
        3.3896504256910394, 4.899185675460227, 2.385403836194799,
        2.4369221995335355, 5.4100629929445985, 8.75905572564843,
        3.5015129092521615, 1.7881903518172881, 1.4159928287121146,
        2.563727502684578, 2.2374828829565807, 1.0476489004824716,
        1.1968727562583001, 2.337268204551157, 2.1013850734129615,
        2.9311301236406555, 2.38334835632192, 2.2577442245746275,
        1.5786410828672541],
       [1.8549399145277896, 1.9695311417115802, 4.13737769433147,
        2.0200745964918108, 4.286210045993126, 23.75922783769693,
        4.067347290876644, 6.508129220833612, 3.797296943980215,
        2.8461343150684018, 5.4100629929445985, 9.531902599668399,
        3.2294012920869983, 1.8415527061162775, 1.506862986258095,
        6.87938357798543, 4.308422646244716, 2.2458713159801813,
        2.7569683381842496, 1.767576721849

In [5]:
efficiency_matrix=np.array([[1.8749228737492734, 1.9850464819352198, 5.061426583626903,
        1.5054240763131213, 4.544839509801864, 9.463125820170546,
        3.3896504256910394, 4.899185675460227, 2.385403836194799,
        2.4369221995335355, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None],
       [1.8549399145277896, 1.9695311417115802, 4.13737769433147,
        2.0200745964918108, 4.286210045993126, 23.75922783769693,
        4.067347290876644, 6.508129220833612, 3.797296943980215,
        2.8461343150684018, 5.4100629929445985, 9.531902599668399,
        3.2294012920869983, 1.8415527061162775, 1.506862986258095,
        6.87938357798543, 4.308422646244716, 2.2458713159801813,
        2.7569683381842496, 1.7675767218492406, 1.440114233256041,
        2.6466238722993345, 2.36958512082521, None, None],
       [1.6831961807165858, 2.198219547242392, 4.222084897356419,
        1.4929577413575175, 4.953156967181566, 9.193118131673362,
        4.60203355119635, 5.47224101695512, 2.8169815420034703,
        2.579111875601141, 5.4100629929445985, 9.609250384409899,
        3.7932633555992004, 1.9124208238440972, 1.2817347354436512,
        6.392855359640642, 7.033062857839337, 2.6844044221477192,
        2.9721153208163478, 1.7675767218492406, 1.4322288618702539,
        2.5319536160012994, 2.1873043220717903, 4.3906407966590715,
        1.7403048226843327],
       [1.7118889697760054, 2.3180043671994977, 4.9718350630413894,
        1.6131871636008808, 4.986812226364095, 9.340290934359286,
        3.3851885141071327, 5.609351553411248, 1.8956609196206604,
        2.5499550687372468, 5.4100629929445985, 9.375559523664696,
        3.46464237104344, 2.089584518093825, 1.396699556345449,
        6.2115722077764355, 5.828840184150941, 2.62654907620894,
        4.005781771760414, 1.7707906099924156, 1.6014884497272608,
        2.378019986092518, 2.6306360860135864, 4.389002825831869,
        1.6645657310124504],
       [1.8529704644180118, 2.0440991339915504, 4.351022897128468,
        1.5813385550154229, 5.615684133683129, 9.841119880607891,
        4.207632561207415, 4.954993460838662, 2.1168190944886405,
        2.5624236791416126, 5.4100629929445985, 7.644053852032125,
        3.3432445512163294, 1.91426380229937, 1.3057891581237135,
        5.452466353514591, 4.732457007092199, 2.1722671961314446,
        3.0208159419739253, 1.7745571656769124, 1.4661470559784295,
        2.617709182129496, 2.7665924521335117, 4.392639323400537,
        1.6843471155992655]], dtype=object)

In [15]:
none_coords = np.argwhere(efficiency_matrix == None)
print(none_coords)

[[ 1 23]
 [ 1 24]]


# print coarse optimization info

In [15]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

In [20]:
efficiency_matrix = np.array([[1.8749228737492734, 1.9850464819352198, 5.061426583626903,
        1.5054240763131213, 4.544839509801864, 9.463125820170546,
        3.3896504256910394, 4.899185675460227, 2.385403836194799,
        2.4369221995335355, 5.4100629929445985, 8.75905572564843,
        3.5015129092521615, 1.7881903518172881, 1.4159928287121146,
        2.563727502684578, 2.2374828829565807, 1.0476489004824716,
        1.1968727562583001, 2.337268204551157, 2.1013850734129615,
        2.9311301236406555, 2.38334835632192, 2.2577442245746275,
        1.5786410828672541],
       [1.8549399145277896, 1.9695311417115802, 4.13737769433147,
        2.0200745964918108, 4.286210045993126, 23.75922783769693,
        4.067347290876644, 6.508129220833612, 3.797296943980215,
        2.8461343150684018, 5.4100629929445985, 9.531902599668399,
        3.2294012920869983, 1.8415527061162775, 1.506862986258095,
        6.87938357798543, 4.308422646244716, 2.2458713159801813,
        2.7569683381842496, 1.7675767218492406, 1.440114233256041,
        2.6466238722993345, 2.36958512082521, 4.103559298462468,
        2.0409314297061263],
       [1.6831961807165858, 2.198219547242392, 4.222084897356419,
        1.4929577413575175, 4.953156967181566, 9.193118131673362,
        4.60203355119635, 5.47224101695512, 2.8169815420034703,
        2.579111875601141, 5.4100629929445985, 9.609250384409899,
        3.7932633555992004, 1.9124208238440972, 1.2817347354436512,
        6.392855359640642, 7.033062857839337, 2.6844044221477192,
        2.9721153208163478, 1.7675767218492406, 1.4322288618702539,
        2.5319536160012994, 2.1873043220717903, 4.3906407966590715,
        1.7403048226843327],
       [1.7118889697760054, 2.3180043671994977, 4.9718350630413894,
        1.6131871636008808, 4.986812226364095, 9.340290934359286,
        3.3851885141071327, 5.609351553411248, 1.8956609196206604,
        2.5499550687372468, 5.4100629929445985, 9.375559523664696,
        3.46464237104344, 2.089584518093825, 1.396699556345449,
        6.2115722077764355, 5.828840184150941, 2.62654907620894,
        4.005781771760414, 1.7707906099924156, 1.6014884497272608,
        2.378019986092518, 2.6306360860135864, 4.389002825831869,
        1.6645657310124504],
       [1.8529704644180118, 2.0440991339915504, 4.351022897128468,
        1.5813385550154229, 5.615684133683129, 9.841119880607891,
        4.207632561207415, 4.954993460838662, 2.1168190944886405,
        2.5624236791416126, 5.4100629929445985, 7.644053852032125,
        3.3432445512163294, 1.91426380229937, 1.3057891581237135,
        5.452466353514591, 4.732457007092199, 2.1722671961314446,
        3.0208159419739253, 1.7745571656769124, 1.4661470559784295,
        2.617709182129496, 2.7665924521335117, 4.392639323400537,
        1.6843471155992655]], dtype=object)

mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)

平均值： 3.702278944469835
标准差： 2.798228513572983


In [16]:
fitness_matrix

# configuration of fine optimization

In [46]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix)):
    for j in range(len(efficiency_matrix[0])):
        value = efficiency_matrix[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.1))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [47]:
coarse_best = efficiency_coarse_best
coarse_best

[(4, 14),
 (4, 1),
 (1, 1),
 (3, 14),
 (3, 1),
 (0, 21),
 (3, 19),
 (4, 19),
 (4, 21),
 (0, 1),
 (2, 1),
 (0, 19)]

In [48]:
efficiency_matrix_select = efficiency_matrix

# enter fine optimization stage

In [None]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTAnt.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTAnt.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

In [53]:
logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [54]:
final_optimized_results

[{'best_morphology': 'results/Random_m25_r5/assets/GPTAnt_14.xml',
  'best_parameter': [0.31153541270951474,
   0.26498060426228787,
   0.34069517135255734,
   0.2692558039769255,
   0.09473152571101379,
   0.29639454101594015,
   0.38229914722891223,
   0.08502938923001663,
   0.031049502237502677,
   0.0863959245124321],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_4.py',
  'best_fitness': 10.477328786500182,
  'best_material': 0.23630460301817519,
  'best_efficiency': 44.3382340110164,
  'best_iteration': 0},
 {'best_morphology': 'results/Random_m25_r5/assets/GPTAnt_1.xml',
  'best_parameter': [0.33801908496793387,
   0.09354222187600843,
   0.16686477425102697,
   0.2217327113238335,
   0.09337094962825687,
   0.5277450341296895,
   0.31357597565930206,
   0.03233484980523353,
   0.031055784882686482,
   0.04758609742489654],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_4.py',
  'best_fitness': 6.587443981866637,
  'best_material': 0.18754668229518

In [17]:
best_efficiency

23.75922783769693