In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTAnt import GPTAntEnv

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4-turbo"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTAnt_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        # env_path = os.path.join(os.path.dirname(__file__), "env", "ant_v5.py")
        # with open(env_path, "r") as f:
        #     env_content = f.read().rstrip()

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = ant_design(parameter)  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_ant_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = ant_design(initial_parameter['parameters'])  

        filename = f"GPTAnt_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_ant_volume(diverse_parameter['parameters']))
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = ant_design(diverse_parameter['parameters'])  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts
        for reward_content, fitness in zip(rewardfunc_list, fitness_list):
            reward_improve_prompts = reward_improve_prompts + f"reward function:{reward_content} \n" + f"fintess:{fitness}"
        reward_improve_prompts = reward_improve_prompts + f"best reward function:{best_rewardfunc} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path
    
    

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = ant_design(parameter)  
        filename = f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/noDiv_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()



# return file list of morphology and reward function: [GPTAnt_{i}.xml] and [GPTAnt_{j}.py]



In [4]:
logging.info(f"start!")

# print configuration info

In [7]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

params: [0.2, 0.3, 0.06, 0.2, 0.15, 0.1, 0.05, 0.02, 0.02, 0.015]
params: [0.25, 0.35, 0.08, 0.25, 0.2, 0.15, 0.1, 0.03, 0.025, 0.02]
params: [0.15, 0.2, 0.05, 0.15, 0.1, 0.08, 0.04, 0.015, 0.015, 0.01]
params: [0.3, 0.45, 0.1, 0.4, 0.3, 0.25, 0.15, 0.05, 0.04, 0.03]
params: [0.18, 0.25, 0.04, 0.12, 0.09, 0.06, 0.03, 0.02, 0.018, 0.015]
params: [0.35, 0.5, 0.1, 0.25, 0.05, 0.15, 0.02, 0.04, 0.03, 0.025]
params: [0.45, 0.6, 0.15, 0.35, 0.2, 0.25, 0.1, 0.07, 0.06, 0.05]
params: [0.2, 0.1, 0.3, 0.2, 0.4, 0.15, 0.35, 0.015, 0.025, 0.02]
params: [0.22, 0.18, 0.07, 0.1, 0.05, 0.12, 0.06, 0.025, 0.02, 0.015]
params: [0.1, 0.15, 0.02, 0.3, 0.1, 0.05, 0.02, 0.01, 0.015, 0.01]
params: [0.4, 0.25, 0.06, 0.2, 0.05, 0.3, 0.15, 0.03, 0.025, 0.02]
params: [0.25, 0.1, 0.2, 0.15, 0.3, 0.1, 0.2, 0.02, 0.03, 0.02]
params: [0.3, 0.15, 0.1, 0.05, 0.2, 0.08, 0.04, 0.02, 0.015, 0.01]
params: [0.5, 0.2, 0.12, 0.25, 0.08, 0.3, 0.1, 0.04, 0.03, 0.025]
params: [0.15, 0.25, 0.08, 0.12, 0.07, 0.15, 0.09, 0.015, 0.

In [8]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_0.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_1.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_2.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_3.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_4.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_5.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_6.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_7.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_8.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_9.py


In [9]:
efficiency_matrix.shape

(10, 50)

# enter coarse optimization stage

In [5]:
morphology_list = [f'results/noDiv_m25_r5/assets/GPTAnt_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/noDiv_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list = [[0.2, 0.1, 0.05, 0.2, 0.1, 0.15, 0.1, 0.02, 0.02, 0.02],
 [0.35, 0.2, 0.1, 0.2, 0.15, 0.15, 0.1, 0.02, 0.02, 0.02],
 [0.1, 0.2, 0.15, 0.2, 0.15, 0.1, 0.15, 0.05, 0.05, 0.05],
 [0.15, 0.08, 0.1, 0.12, 0.18, 0.1, 0.15, 0.035, 0.035, 0.035],
 [0.15, 0.2, 0.05, 0.25, 0.08, 0.25, -0.08, 0.025, 0.02, 0.02],
 [0.5, 0.2, 0.1, 0.4, 0.2, 0.3, 0.15, 0.05, 0.04, 0.03],
 [0.15, 0.08, 0.08, 0.12, 0.12, 0.16, 0.16, 0.025, 0.025, 0.02],
 [0.2, 0.1, 0.15, 0.2, 0.2, 0.15, 0.15, 0.05, 0.04, 0.04],
 [0.1, 0.4, 0.2, 0.3, 0.1, 0.25, 0.1, 0.05, 0.05, 0.04],
 [0.25, 0.4, 0.2, 0.3, 0.2, 0.2, 0.15, 0.08, 0.08, 0.06],
 [0.2, 0.3, 0.1, 0.15, 0.05, 0.1, 0.02, 0.02, 0.02, 0.01],
 [0.15, 0.2, 0.1, 0.3, 0.1, 0.25, 0.15, 0.04, 0.05, 0.03],
 [0.25, 0.2, 0.2, 0.3, 0.3, 0.3, 0.3, 0.08, 0.08, 0.08],
 [0.5, 0.15, 0.1, 0.3, 0.2, 0.35, 0.25, 0.03, 0.04, 0.03],
 [0.25, 0.2, 0.15, 0.3, 0.2, 0.35, 0.25, 0.02, 0.02, 0.02],
 [0.25, 0.3, 0.2, 0.3, 0.2, 0.2, 0.1, 0.04, 0.03, 0.02],
 [0.2, 0.15, 0.1, 0.25, 0.2, 0.15, 0.1, 0.05, 0.04, 0.03],
 [0.5, 0.3, 0.1, 0.4, 0.15, 0.2, 0.1, 0.08, 0.06, 0.05],
 [0.45, 0.15, 0.075, 0.2, 0.11, 0.18, 0.09, 0.03, 0.03, 0.03],
 [0.2, 0.2, 0.05, 0.4, 0.1, 0.3, 0.1, 0.05, 0.05, 0.04],
 [0.6, 0.2, 0.2, 0.3, 0.15, -0.2, -0.1, 0.08, 0.07, 0.07],
 [0.2, 0.2, 0.2, 0.25, 0.25, 0.15, 0.15, 0.05, 0.05, 0.05],
 [0.15, 0.2, 0.05, 0.3, 0.08, 0.2, 0.05, 0.04, 0.03, 0.03],
 [0.5, 0.2, 0.2, 0.3, 0.1, 0.35, 0.1, 0.08, 0.08, 0.05],
 [0.15, 0.12, 0.06, 0.18, 0.06, 0.14, -0.06, 0.015, 0.015, 0.01]]


material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

params: [0.2, 0.1, 0.05, 0.2, 0.1, 0.15, 0.1, 0.02, 0.02, 0.02]
params: [0.35, 0.2, 0.1, 0.2, 0.15, 0.15, 0.1, 0.02, 0.02, 0.02]
params: [0.1, 0.2, 0.15, 0.2, 0.15, 0.1, 0.15, 0.05, 0.05, 0.05]
params: [0.15, 0.08, 0.1, 0.12, 0.18, 0.1, 0.15, 0.035, 0.035, 0.035]
params: [0.15, 0.2, 0.05, 0.25, 0.08, 0.25, -0.08, 0.025, 0.02, 0.02]
params: [0.5, 0.2, 0.1, 0.4, 0.2, 0.3, 0.15, 0.05, 0.04, 0.03]
params: [0.15, 0.08, 0.08, 0.12, 0.12, 0.16, 0.16, 0.025, 0.025, 0.02]
params: [0.2, 0.1, 0.15, 0.2, 0.2, 0.15, 0.15, 0.05, 0.04, 0.04]
params: [0.1, 0.4, 0.2, 0.3, 0.1, 0.25, 0.1, 0.05, 0.05, 0.04]
params: [0.25, 0.4, 0.2, 0.3, 0.2, 0.2, 0.15, 0.08, 0.08, 0.06]
params: [0.2, 0.3, 0.1, 0.15, 0.05, 0.1, 0.02, 0.02, 0.02, 0.01]
params: [0.15, 0.2, 0.1, 0.3, 0.1, 0.25, 0.15, 0.04, 0.05, 0.03]
params: [0.25, 0.2, 0.2, 0.3, 0.3, 0.3, 0.3, 0.08, 0.08, 0.08]
params: [0.5, 0.15, 0.1, 0.3, 0.2, 0.35, 0.25, 0.03, 0.04, 0.03]
params: [0.25, 0.2, 0.15, 0.3, 0.2, 0.35, 0.25, 0.02, 0.02, 0.02]
params: [0.25, 0

In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [15]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i in [0] or j in [1,2,3,4]:
        #     continue
        # if i not in [5]:
        #     continue
        if i not in [0] or j not in [20,21,22,23,24]:
            continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTAnt.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew

        env_name = "GPTAntEnv"
        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/div2025-03-17_15-13-46/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
20 results/noDiv_m25_r5/assets/GPTAnt_20.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
21 results/noDiv_m25_r5/assets/GPTAnt_21.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
22 results/noDiv_m25_r5/assets/GPTAnt_22.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
5

In [17]:
efficiency_matrix

array([[32.94409095277604, 5.042274425850351, 530.888295465982,
        32.38450839720384, 12.817670964036482, 3.959710983897008,
        36.28077373397893, 34.15451508018792, 26.004649784223183,
        29.19895714421399, 68.20026854923199, 10.202471271728559,
        14.879867731079777, 6.375736592123467, 7.81910214122997,
        14.053159992114955, 20.234803192292205, 3.829015879422846,
        13.544732931936283, 8.74864566014534, -0.9741934366056402,
        105.03398146276811, 20.112085253040444, 11.439038377082184,
        13.075943174694013],
       [32.49916827016327, 8.46928910791899, 334.74961491066654,
        12.208070932013744, 19.894617280603093, 4.001802972624017,
        43.940799924804274, 37.28079524220187, 35.4545560697721,
        54.01203315878738, 28.742505785364465, 19.086891605876964,
        18.3594558281302, 8.87728436181615, 7.4922603565913395,
        21.545665542487694, 30.271083280420697, 2.593433502869734,
        18.379932692055007, 11.245271339443685,

# print coarse optimization info

In [15]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

In [18]:
efficiency_matrix = np.array([[32.94409095277604, 5.042274425850351, 530.888295465982,
        32.38450839720384, 12.817670964036482, 3.959710983897008,
        36.28077373397893, 34.15451508018792, 26.004649784223183,
        29.19895714421399, 68.20026854923199, 10.202471271728559,
        14.879867731079777, 6.375736592123467, 7.81910214122997,
        14.053159992114955, 20.234803192292205, 3.829015879422846,
        13.544732931936283, 8.74864566014534, -0.9741934366056402,
        105.03398146276811, 20.112085253040444, 11.439038377082184,
        13.075943174694013],
       [32.49916827016327, 8.46928910791899, 334.74961491066654,
        12.208070932013744, 19.894617280603093, 4.001802972624017,
        43.940799924804274, 37.28079524220187, 35.4545560697721,
        54.01203315878738, 28.742505785364465, 19.086891605876964,
        18.3594558281302, 8.87728436181615, 7.4922603565913395,
        21.545665542487694, 30.271083280420697, 2.593433502869734,
        18.379932692055007, 11.245271339443685, 3.626701800142327,
        148.26655767419757, 1.609575219174359, 8.78851454573012,
        13.573477266514125],
       [34.66181125073692, 6.956364996813744, 586.2780834403495,
        26.713626821469735, 23.26443211363671, 2.5652601809797857,
        36.10365810313643, 35.88974405820809, 27.64039658584968,
        23.90365050545669, 62.013228684663645, 10.84480560322066,
        9.686524941664139, 16.276559958145878, 14.300806716110563,
        8.51087806024666, 51.23819615599343, 5.670802886432697,
        18.171600088125, 0.5920027172192128, 1.9654912881253654,
        78.48532524743736, 4.149464360892902, 12.60089449779445,
        0.916148318940982],
       [50.09199240760774, 5.098867098061485, 389.57408308991825,
        59.14472366433369, 51.74831663790402, 4.210616910617497,
        30.622128896042778, 43.977911290418376, 40.39946869012857,
        23.694756913534953, 109.07254001987351, 64.1592809893828,
        26.76440015004287, 10.142737119712134, 35.27667962800713,
        39.74616503819631, 127.02236667378223, 2.2889861598543577,
        17.763649730878335, 32.96790904102237, 3.7971988805349692,
        167.81779012091093, 42.44672884976876, 11.868643515556816,
        22.97679333618805],
       [18.29927799225419, 2.382295498344802, 557.0807258720989,
        20.5425622405761, 13.976575743351997, 5.067941594716108,
        37.68658375947998, 33.890634882767856, 29.584173572929085,
        32.64950646965401, 34.089264629508136, 13.942325612248737,
        20.144980356482858, 12.998496406300408, 16.415423943209415,
        0.1084846003014571, 22.64931878616364, 4.362572203577842,
        32.75124749663153, 9.683496710339135, 2.046109120551328,
        220.59619484787603, 5.067634632641362, 13.026408200725072,
        38.92420170996183]], dtype=object)

mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)



平均值： 45.56208314953302
标准差： 96.3753609873544


In [21]:
fitness_matrix

array([[1.202610035873667, 0.9241646215500069, 16.90535496343106,
        0.7891785629311393, 0.24257451068735514, 2.166072256929404,
        0.6586276156628594, 1.822656442047366, 1.010225685181438,
        4.744847916414742, 2.4761889497498335, 0.36202527605635004,
        2.7107550299121987, 3.4411678126318335, 0.5558048916173982,
        1.1180659185469939, 1.0374188708667815, 2.257872525022805,
        5.279938849813229, 0.5648060560578538, -0.9566779449239333,
        6.979592575855754, 0.5249720987860913, 6.891633203193622,
        0.20103258992379813],
       [1.1863683224791481, 1.552279130840358, 10.659607891655414,
        0.29749866065960956, 0.3765057681457861, 2.189097747777433,
        0.7976848701461984, 1.9894904510667166, 1.3773345726855044,
        8.776987538597078, 1.0435717736575045, 0.6772807311717643,
        3.344652528637988, 4.791324856048898, 0.5325720114981809,
        1.7141677991984798, 1.551969284714892, 1.529281265996174,
        7.164771809484505, 0.72

In [20]:
fitness_matrix = np.array([[1.202610035873667, 0.9241646215500069, 16.90535496343106,
        0.7891785629311393, 0.24257451068735514, 2.166072256929404,
        0.6586276156628594, 1.822656442047366, 1.010225685181438,
        4.744847916414742, 2.4761889497498335, 0.36202527605635004,
        2.7107550299121987, 3.4411678126318335, 0.5558048916173982,
        1.1180659185469939, 1.0374188708667815, 2.257872525022805,
        5.279938849813229, 0.5648060560578538, -0.9566779449239333,
        6.979592575855754, 0.5249720987860913, 6.891633203193622,
        0.20103258992379813],
       [1.1863683224791481, 1.552279130840358, 10.659607891655414,
        0.29749866065960956, 0.3765057681457861, 2.189097747777433,
        0.7976848701461984, 1.9894904510667166, 1.3773345726855044,
        8.776987538597078, 1.0435717736575045, 0.6772807311717643,
        3.344652528637988, 4.791324856048898, 0.5325720114981809,
        1.7141677991984798, 1.551969284714892, 1.529281265996174,
        7.164771809484505, 0.7259863527752124, 3.5614955866476463,
        9.852432048930213, 0.04201364852688788, 5.2947823631266635,
        0.20868179470525913],
       [1.2653146851569885, 1.2749854295286405, 18.66916108809607,
        0.650984766144126, 0.44027953691576394, 1.403268807350547,
        0.6554123246524264, 1.9152569742988204, 1.0737709913927531,
        3.884357435621543, 2.251552301387569, 0.38481791692594797,
        1.7646525280020482, 8.784926011143323, 1.0165436111874186,
        0.6771233446008077, 2.626932967732013, 3.343927117384106,
        7.083560654218445, 0.03821925505696635, 1.9301527762713144,
        5.215413009977325, 0.10831064939160008, 7.591612166012417,
        0.014085077217793073],
       [1.8285868890008143, 0.9345371124299904, 12.4054122410206,
        1.4412986435949207, 0.9793372464119494, 2.303324791060958,
        0.5559026907530482, 2.346882195020982, 1.5694339772732437,
        3.850412102591776, 3.960163560402853, 2.27663286601178,
        4.875831804456954, 5.474325986343008, 2.5075706574900867,
        3.1621950185641823, 6.512314399961006, 1.3497564709163938,
        6.924535522394459, 2.12838368420382, 3.728927218145685,
        11.15162717543584, 1.107958128185567, 7.150455749195916,
        0.35325056179940456],
       [0.6680073641817276, 0.4366349452066596, 17.739414288437867,
        0.500602002324701, 0.26450679156462686, 2.7723052850815457,
        0.684148165802162, 1.8085744695577202, 1.1492826193095242,
        5.305564235714683, 1.237699824008046, 0.4947305553928502,
        3.669932274650377, 7.015661139644728, 1.1668568539985786,
        0.008631007855337021, 1.161208759865479, 2.572497014133911,
        12.76692459764821, 0.6251593444607502, 2.009321141969012,
        14.658794633697092, 0.13227702426214794, 7.847969760668774,
        0.5984297251775927]], dtype=object)

# configuration of fine optimization

In [22]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix)):
    for j in range(len(efficiency_matrix[0])):
        value = efficiency_matrix[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.1))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [23]:
coarse_best = efficiency_coarse_best
coarse_best

[(2, 2),
 (4, 2),
 (0, 2),
 (3, 2),
 (1, 2),
 (4, 21),
 (3, 21),
 (1, 21),
 (3, 16),
 (3, 10),
 (0, 21),
 (2, 21)]

In [24]:
efficiency_matrix_select = efficiency_matrix

# enter fine optimization stage

In [None]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTAnt.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTAnt.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

params: [0.1, 0.2, 0.15, 0.2, 0.15, 0.1, 0.15, 0.05, 0.05, 0.05]
ChatCompletion(id='chatcmpl-BS2bVHXtjyWiMfhQlXFh7KoUgMtH5', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "parameters": [0.15, 0.25, 0.18, 0.25, 0.18, 0.12, 0.18, 0.04, 0.04, 0.04],\n  "desciption": "Optimized parameter design focusing on longer limbs for enhanced movement and moderately small geometries for reduced material cost, maintaining balance and efficiency."\n}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))], created=1746022893, model='gpt-4-turbo-2024-04-09', object='chat.completion', service_tier=None, system_fingerprint='fp_5603ee5e2e', usage=CompletionUsage(completion_tokens=89, prompt_tokens=4071, total_tokens=4160, completion_tokens_details=None, prompt_tokens_details=None))
[0.15, 0.25, 0.18, 0.25, 0.18, 0.12, 0.18, 0.04, 0.04, 0.04]
Successfully saved GPTAnt_refine_2_2_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
params: [0.15, 0.25, 0.18, 0.25, 0.18, 0.12, 0.18, 0.04, 0.04, 0.04]
ChatCompletion(id='chatcmpl-BS2k4aAAvaCUgH8gGySRm0XHSaK09', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```python\ndef _get_rew(self, x_velocity: float, action):\n    # The primary reward driving the agent is the forward velocity along the x-axis\n    forward_reward = x_velocity * self._forward_reward_weight\n    \n    # Additional reward for keeping the ant healthy (not flipping over or moving out of bounds)\n    healthy_reward = self.healthy_reward\n\n    # We sum these to get an overall reward that promotes healthiness and speed\n    rewards = forward_reward + healthy_reward

In [None]:
logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [29]:
final_optimized_results

[{'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_refine_2_2_0.xml',
  'best_parameter': [0.15,
   0.25,
   0.18,
   0.25,
   0.18,
   0.12,
   0.18,
   0.04,
   0.04,
   0.04],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_2.py',
  'best_fitness': 45.609774996671874,
  'best_material': 0.034091557132800765,
  'best_efficiency': 1337.8613015240949,
  'best_iteration': 1},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_refine_4_2_2.xml',
  'best_parameter': [0.1,
   0.25,
   0.17,
   0.25,
   0.15,
   0.12,
   0.18,
   0.03,
   0.03,
   0.03],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_4.py',
  'best_fitness': 67.6709174730936,
  'best_material': 0.014709160909209498,
  'best_efficiency': 4600.5967227351775,
  'best_iteration': 3},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_refine_0_2_0.xml',
  'best_parameter': [0.12,
   0.22,
   0.17,
   0.22,
   0.17,
   0.12,
   0.17,
   0.05,
   0.05,
   0.05],
  'best_rewardfunc': 're

In [30]:
best_efficiency

777.7203832403778

 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_refine_4_2_2.xml',
  'best_parameter': [0.1,
   0.25,
   0.17,
   0.25,
   0.15,
   0.12,
   0.18,
   0.03,
   0.03,
   0.03],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_4.py',
  'best_fitness': 67.6709174730936,
  'best_material': 0.014709160909209498,
  'best_efficiency': 4600.5967227351775,
  'best_iteration': 3},