In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTAnt import GPTAntEnv

In [None]:
import prompts
class DGA:
    def __init__(self):
        self.client = OpenAI(api_key="api_key")
        self.model = "gpt-4o-mini"
        
        
    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTAnt_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        # env_path = os.path.join(os.path.dirname(__file__), "env", "ant_v5.py")
        # with open(env_path, "r") as f:
        #     env_content = f.read().rstrip()

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = ant_design(parameter)  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_ant_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = ant_design(initial_parameter['parameters'])  

        filename = f"GPTAnt_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_ant_volume(diverse_parameter['parameters']))
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = ant_design(diverse_parameter['parameters'])  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts
        for reward_content, fitness in zip(rewardfunc_list, fitness_list):
            reward_improve_prompts = reward_improve_prompts + f"reward function:{reward_content} \n" + f"fintess:{fitness}"
        reward_improve_prompts = reward_improve_prompts + f"best reward function:{best_rewardfunc} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path
    
    

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = ant_design(parameter)  
        filename = f"GPTAnt_refine_{step}_{rewardfunc_index}_{morphology_index}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/noDiv_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()



# return file list of morphology and reward function: [GPTAnt_{i}.xml] and [GPTAnt_{j}.py]



In [4]:
logging.info(f"start!")

# print configuration info

In [7]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

params: [0.2, 0.3, 0.06, 0.2, 0.15, 0.1, 0.05, 0.02, 0.02, 0.015]
params: [0.25, 0.35, 0.08, 0.25, 0.2, 0.15, 0.1, 0.03, 0.025, 0.02]
params: [0.15, 0.2, 0.05, 0.15, 0.1, 0.08, 0.04, 0.015, 0.015, 0.01]
params: [0.3, 0.45, 0.1, 0.4, 0.3, 0.25, 0.15, 0.05, 0.04, 0.03]
params: [0.18, 0.25, 0.04, 0.12, 0.09, 0.06, 0.03, 0.02, 0.018, 0.015]
params: [0.35, 0.5, 0.1, 0.25, 0.05, 0.15, 0.02, 0.04, 0.03, 0.025]
params: [0.45, 0.6, 0.15, 0.35, 0.2, 0.25, 0.1, 0.07, 0.06, 0.05]
params: [0.2, 0.1, 0.3, 0.2, 0.4, 0.15, 0.35, 0.015, 0.025, 0.02]
params: [0.22, 0.18, 0.07, 0.1, 0.05, 0.12, 0.06, 0.025, 0.02, 0.015]
params: [0.1, 0.15, 0.02, 0.3, 0.1, 0.05, 0.02, 0.01, 0.015, 0.01]
params: [0.4, 0.25, 0.06, 0.2, 0.05, 0.3, 0.15, 0.03, 0.025, 0.02]
params: [0.25, 0.1, 0.2, 0.15, 0.3, 0.1, 0.2, 0.02, 0.03, 0.02]
params: [0.3, 0.15, 0.1, 0.05, 0.2, 0.08, 0.04, 0.02, 0.015, 0.01]
params: [0.5, 0.2, 0.12, 0.25, 0.08, 0.3, 0.1, 0.04, 0.03, 0.025]
params: [0.15, 0.25, 0.08, 0.12, 0.07, 0.15, 0.09, 0.015, 0.

In [8]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_0.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_1.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_2.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_3.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_4.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_5.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_6.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_7.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_8.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_9.py


In [9]:
efficiency_matrix.shape

(10, 50)

# enter coarse optimization stage

In [6]:
morphology_list = [f'results/noDiv_m25_r5/assets/GPTAnt_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/noDiv_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list =[[0.25, 0.2, 0.05, 0.3, 0.1, 0.15, 0.05, 0.03, 0.02, 0.02],
 [0.1, 0.18, 0.08, 0.1, 0.08, 0.08, 0.05, 0.02, 0.02, 0.02],
 [0.6, 0.35, 0.15, 0.25, 0.15, 0.2, 0.1, 0.08, 0.07, 0.05],
 [0.2, 0.8, 0.3, 0.6, 0.2, 0.4, 0.15, 0.05, 0.05, 0.03],
 [0.3, 0.25, 0.2, 0.4, 0.15, 0.35, 0.15, 0.05, 0.045, 0.04],
 [0.25, 0.2, 0.2, 0.1, 0.1, 0.15, 0.15, 0.05, 0.05, 0.05],
 [0.32, 0.15, 0.075, 0.2, 0.1, 0.15, -0.075, 0.03, 0.03, 0.02],
 [0.15, 0.2, 0.07, 0.25, 0.08, 0.18, 0.06, 0.04, 0.03, 0.03],
 [0.3, 0.2, 0.1, 0.4, 0.15, 0.3, 0.1, 0.04, 0.04, 0.03],
 [0.25, 0.3, 0.2, 0.4, 0.2, 0.2, 0.15, 0.08, 0.07, 0.06],
 [0.35, 0.25, 0.075, 0.15, 0.075, 0.15, 0.075, 0.025, 0.025, 0.025],
 [0.15, 0.13, 0.03, 0.18, 0.04, 0.16, 0.02, 0.02, 0.02, 0.02],
 [0.25, 0.2, 0.1, 0.3, 0.15, 0.2, 0.1, 0.05, 0.05, 0.05],
 [0.35, 0.2, 0.15, 0.2, 0.1, 0.15, 0.1, 0.05, 0.05, 0.05],
 [0.25, 0.15, 0.08, 0.1, 0.05, 0.1, 0.05, 0.025, 0.025, 0.025],
 [0.15, 0.35, 0.1, 0.2, 0.1, 0.15, 0.1, 0.04, 0.04, 0.04],
 [0.2, 0.15, 0.04, 0.18, 0.04, 0.1, 0.04, 0.02, 0.02, 0.015],
 [0.2, 0.25, 0.05, 0.3, 0.05, 0.2, 0.05, 0.04, 0.04, 0.03],
 [0.25, 0.15, 0.05, 0.1, 0.05, 0.08, 0.02, 0.02, 0.02, 0.02],
 [0.2, 0.3, 0.05, 0.35, 0.05, 0.25, 0.04, 0.07, 0.06, 0.05],
 [0.35, 0.25, 0.1, 0.2, 0.12, 0.15, 0.06, 0.025, 0.015, 0.015],
 [0.5, 0.2, 0.2, 0.3, 0.15, 0.35, 0.2, 0.05, 0.045, 0.04],
 [0.3, 0.1, 0.1, 0.15, 0.15, 0.1, 0.1, 0.03, 0.03, 0.02],
 [0.3, 0.25, 0.1, 0.35, 0.1, 0.2, 0.1, 0.05, 0.04, 0.03],
 [0.7, 0.2, 0.1, 0.15, 0.2, 0.1, 0.2, 0.045, 0.045, 0.03]]

material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

params: [0.25, 0.2, 0.05, 0.3, 0.1, 0.15, 0.05, 0.03, 0.02, 0.02]
params: [0.1, 0.18, 0.08, 0.1, 0.08, 0.08, 0.05, 0.02, 0.02, 0.02]
params: [0.6, 0.35, 0.15, 0.25, 0.15, 0.2, 0.1, 0.08, 0.07, 0.05]
params: [0.2, 0.8, 0.3, 0.6, 0.2, 0.4, 0.15, 0.05, 0.05, 0.03]
params: [0.3, 0.25, 0.2, 0.4, 0.15, 0.35, 0.15, 0.05, 0.045, 0.04]
params: [0.25, 0.2, 0.2, 0.1, 0.1, 0.15, 0.15, 0.05, 0.05, 0.05]
params: [0.32, 0.15, 0.075, 0.2, 0.1, 0.15, -0.075, 0.03, 0.03, 0.02]
params: [0.15, 0.2, 0.07, 0.25, 0.08, 0.18, 0.06, 0.04, 0.03, 0.03]
params: [0.3, 0.2, 0.1, 0.4, 0.15, 0.3, 0.1, 0.04, 0.04, 0.03]
params: [0.25, 0.3, 0.2, 0.4, 0.2, 0.2, 0.15, 0.08, 0.07, 0.06]
params: [0.35, 0.25, 0.075, 0.15, 0.075, 0.15, 0.075, 0.025, 0.025, 0.025]
params: [0.15, 0.13, 0.03, 0.18, 0.04, 0.16, 0.02, 0.02, 0.02, 0.02]
params: [0.25, 0.2, 0.1, 0.3, 0.15, 0.2, 0.1, 0.05, 0.05, 0.05]
params: [0.35, 0.2, 0.15, 0.2, 0.1, 0.15, 0.1, 0.05, 0.05, 0.05]
params: [0.25, 0.15, 0.08, 0.1, 0.05, 0.1, 0.05, 0.025, 0.025, 0.025

In [9]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [15]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [0] or j not in [12]:
        #     continue
        if i in [0] and j in [0, 1,2,3,4,5]:
            continue
            
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTAnt.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew

        
        env_name = "GPTAntEnv"
        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/div2025-03-17_15-13-46/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
6 results/noDiv_m25_r5/assets/GPTAnt_6.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
7 results/noDiv_m25_r5/assets/GPTAnt_7.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
8 results/noDiv_m25_r5/assets/GPTAnt_8.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
5

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
3 results/noDiv_m25_r5/env/GPTrewardfunc_3.py
15 results/noDiv_m25_r5/assets/GPTAnt_15.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
3 results/noDiv_m25_r5/env/GPTrewardfunc_3.py
16 results/noDiv_m25_r5/assets/GPTAnt_16.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
3 results/noDiv_m25_r5/env/GPTrewardfunc_3.py
17 results/noDiv_m25_r5/assets/GPTAnt_1

In [25]:
efficiency_matrix

array([[2.4973032993524127, 1786.568243176656, 0.40622330838665005,
        13.858097655117414, 3.7335658623017176, 143.17308585702267,
        1.5885444810196896, 153.87259750897664, 4.150058655502022,
        6.8364347710507305, 1.2128913452525025, 18.92222761339106,
        18.567603036660863, 1.2848122614631128, 2.996696086547479,
        35.23547562898728, 7.410428197929562, 12.871405100419967,
        4.103373681719028, 6.754267006644329, 1.4155634183259813,
        1.5614767591056642, 0.8292706484020508, 4.748724127616231,
        0.1800660437675233],
       [2.6455420414992266, 1236.6341176264557, 0.2663519935576021,
        19.08719536485681, 64.93650911771947, 109.84642277706467,
        1.7219933659004643, 8.994203475574567, 16.283742244850448,
        12.374272384954324, 1.1938346436150373, 16.966458695947676,
        12.638658491993882, 1.4084390513198177, 3.645086652638952,
        5.6960399595824835, 5.828863859717012, 8.944281812209768,
        3.8290736218427814, 20.81

In [26]:
fitness_matrix

array([[0.17702429456538782, 11.968122602966202, 0.39679913214416,
        1.2429878979828755, 0.5465044733661754, 13.132695257720727,
        0.22806051382114784, 3.9220960763215276, 0.5492855288920173,
        1.0339368940927784, 0.22446256407313064, 0.320680142618512,
        1.7884293672255323, 0.2652108807174479, 0.2077508278499806,
        1.155494550750498, 0.2656390749140572, 0.6394525155536691,
        0.2774833528590148, 0.5919877170345557, 0.259331610340337,
        0.8647743984486604, 0.09855517723741357, 0.6411876682817567,
        0.26196761011503467],
       [0.18753237292439412, 8.284144079740539, 0.2601727614012921,
        1.7120064698195285, 9.505147097027226, 10.075773577462389,
        0.24721919752085308, 0.2292554407494844, 2.155252422624369,
        1.8714749990061406, 0.22093585401038257, 0.2875351943498023,
        1.217354117534017, 0.29072991630077144, 0.252701557915776,
        0.18679308329642702, 0.20894528118197223, 0.44435269187913706,
        0.2589342

# print coarse optimization info

In [None]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

In [27]:
efficiency_matrix = np.array([[2.4973032993524127, 1786.568243176656, 0.40622330838665005,
        13.858097655117414, 3.7335658623017176, 143.17308585702267,
        1.5885444810196896, 153.87259750897664, 4.150058655502022,
        6.8364347710507305, 1.2128913452525025, 18.92222761339106,
        18.567603036660863, 1.2848122614631128, 2.996696086547479,
        35.23547562898728, 7.410428197929562, 12.871405100419967,
        4.103373681719028, 6.754267006644329, 1.4155634183259813,
        1.5614767591056642, 0.8292706484020508, 4.748724127616231,
        0.1800660437675233],
       [2.6455420414992266, 1236.6341176264557, 0.2663519935576021,
        19.08719536485681, 64.93650911771947, 109.84642277706467,
        1.7219933659004643, 8.994203475574567, 16.283742244850448,
        12.374272384954324, 1.1938346436150373, 16.966458695947676,
        12.638658491993882, 1.4084390513198177, 3.645086652638952,
        5.6960399595824835, 5.828863859717012, 8.944281812209768,
        3.8290736218427814, 20.815487880896434, 1.0392628660819145,
        2.7543291892883683, 1.7173075245873206, 5.633651747757704,
        0.17969344714513982],
       [4.369262506294933, 784.8408632734864, 0.316897274631478,
        33.11911199183276, 16.027336716148866, 44.90522692518686,
        1.8059237952926688, 2.496025100248316, 8.600366178617392,
        17.96043028141921, 1.7491323822476672, 13.1449674331455,
        5.350787495023535, 0.6281878929860515, 3.0534594089385023,
        7.654589654249169, 7.145976314089774, 6.0531634495426685,
        3.3827741407573355, 4.380360089711522, 1.7294041066207322,
        0.6845753502941542, 1.716990278599547, 3.5951601293534416,
        0.18267533877868342],
       [2.8772586509066973, 960.9267258255151, 0.32535906606094567,
        29.334229385430426, 14.426448879063644, 47.039774447428705,
        1.4588362898401461, 7.090380394151854, 13.04307365479495,
        8.46511293538344, 2.0064242976979267, 13.877347024688083,
        3.37631558527616, 1.131307482082434, 3.3968399443662975,
        20.467068686641692, 4.05276572292296, 6.367450630716795,
        3.441237299807174, 1.600175689499694, 1.4489800608832335,
        2.055861556212554, 1.7667623722096844, 3.7570255202325296,
        0.18981287633891855],
       [2.874139270792855, 421.2349408939174, 0.28358337894052343,
        14.47707509386499, 31.52828886541409, 65.83502231774104,
        1.8870583452831902, 4.650517850253799, 2.3879809119145348,
        30.665532355545462, 1.6241627960810225, 14.06406218771072,
        2.977118882570077, 0.7541664808059855, 3.5256031159479386,
        2.7427990291404707, 5.196875590574029, 6.30607861272087,
        3.4146232260074574, 3.0360691485416, 1.1892747790976448,
        1.140018539078841, 2.1382635623188593, 2.311299750422899,
        0.18643964550606884]], dtype=object)

mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)



平均值： 52.44085177427613
标准差： 222.25176759324185


In [28]:
fitness_matrix = np.array([[0.17702429456538782, 11.968122602966202, 0.39679913214416,
        1.2429878979828755, 0.5465044733661754, 13.132695257720727,
        0.22806051382114784, 3.9220960763215276, 0.5492855288920173,
        1.0339368940927784, 0.22446256407313064, 0.320680142618512,
        1.7884293672255323, 0.2652108807174479, 0.2077508278499806,
        1.155494550750498, 0.2656390749140572, 0.6394525155536691,
        0.2774833528590148, 0.5919877170345557, 0.259331610340337,
        0.8647743984486604, 0.09855517723741357, 0.6411876682817567,
        0.26196761011503467],
       [0.18753237292439412, 8.284144079740539, 0.2601727614012921,
        1.7120064698195285, 9.505147097027226, 10.075773577462389,
        0.24721919752085308, 0.2292554407494844, 2.155252422624369,
        1.8714749990061406, 0.22093585401038257, 0.2875351943498023,
        1.217354117534017, 0.29072991630077144, 0.252701557915776,
        0.18679308329642702, 0.20894528118197223, 0.44435269187913706,
        0.25893429878602503, 1.8244042081028777, 0.19039324493611087,
        1.5253978990764188, 0.20409446274624565, 0.7606733789923221,
        0.26142554096829007],
       [0.3097203343896714, 5.257605866078373, 0.3095454174011459,
        2.9705848827372012, 2.3460175967329615, 4.118977090961041,
        0.25926872907701637, 0.06362179108280054, 1.138310823347969,
        2.716321024574969, 0.3237014930969923, 0.2227713415831469,
        0.515387229842683, 0.12967051245692973, 0.21168603745337983,
        0.2510207816363246, 0.2561593590452619, 0.3007216822615032,
        0.22875408952491105, 0.38392313581128695, 0.3168273113680214,
        0.3791303541927054, 0.20405675945285992, 0.485429828832113,
        0.26576372160755574],
       [0.20395787852080205, 6.437195394617503, 0.3178108995295171,
        2.6311037077501913, 2.1116860229529264, 4.314770608680698,
        0.20943886546275245, 0.18072843101194636, 1.7263302053221041,
        1.2802568692116332, 0.3713169726560697, 0.2351831778988963,
        0.32520632489863127, 0.23352443207823076, 0.23549145129662435,
        0.6711868057712479, 0.1452781011738896, 0.3163354965296525,
        0.23270755675704546, 0.1402497639417059, 0.2654535485130456,
        1.138573744501779, 0.2099719543493348, 0.507285403037763,
        0.2761477097134705],
       [0.2037367575763507, 2.821829749031944, 0.27700438731842636,
        1.2985064463893778, 4.614985120928705, 6.038783617806107,
        0.2709168682261376, 0.11853818099275003, 0.3160638118803519,
        4.637830439581766, 0.30057411746526713, 0.23834749059666582,
        0.28675574487440697, 0.1556750060544749, 0.24441816749342585,
        0.08994597846064778, 0.18629061471902972, 0.3132865293822492,
        0.23090782731390863, 0.2661007689266841, 0.2178754689530395,
        0.6313631250694222, 0.254123240429062, 0.3120789622322805,
        0.2712401924428136]], dtype=object)

# configuration of fine optimization

In [29]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix)):
    for j in range(len(efficiency_matrix[0])):
        value = efficiency_matrix[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.1))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [30]:
coarse_best = efficiency_coarse_best
coarse_best

[(0, 1),
 (1, 1),
 (3, 1),
 (2, 1),
 (4, 1),
 (0, 7),
 (0, 5),
 (1, 5),
 (4, 5),
 (1, 4),
 (3, 5),
 (2, 5)]

In [31]:
efficiency_matrix_select = efficiency_matrix

# enter fine optimization stage

In [32]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTAnt.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTAnt.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

params: [0.1, 0.18, 0.08, 0.1, 0.08, 0.08, 0.05, 0.02, 0.02, 0.02]
ChatCompletion(id='chatcmpl-BSG514hyT87P3XdrbKKmOfW9i7eZx', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "parameters": [0.1, 0.18, 0.1, 0.15, 0.1, 0.1, 0.05, 0.02, 0.02, 0.02],\n  "desciption": "Optimized light-weight design prioritizing minimal material for maximum walking distance, with slightly longer limbs to optimize movement efficiency while keeping the material costs low."\n}', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))], created=1746074695, model='gpt-4-turbo-2024-04-09', object='chat.completion', service_tier=None, system_fingerprint='fp_5603ee5e2e', usage=CompletionUsage(completion_tokens=92, prompt_tokens=4312, total_tokens=4404, completion_tokens_details=None, prompt_tokens_details=None))
[0.1, 0.18, 0.1, 0.15, 0.1, 0.1, 0.05, 0.02, 0.02, 0.02]
Successfully saved GPTAnt_refine_0_1_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
params: [0.1, 0.18, 0.1, 0.15, 0.1, 0.1, 0.05, 0.02, 0.02, 0.02]
ChatCompletion(id='chatcmpl-BSGDcfYpQorLxF9mmKMgjK4rfTxXH', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```python\ndef _get_rew(self, x_velocity: float, action):\n    forward_reward = x_velocity * self._forward_reward_weight\n    healthy_reward = self.healthy_reward\n    rewards = forward_reward + healthy_reward\n\n    ctrl_cost = self.control_cost(action)\n    contact_cost = self.contact_cost\n    costs = ctrl_cost + contact_cost\n\n    # Exponential function to promote faster movement forward\n    adjusted_forward_reward = np.exp(forward_reward)\n    \n    reward = adjusted_forwa

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [36]:
logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [37]:
final_optimized_results

[{'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_refine_0_1_0.xml',
  'best_parameter': [0.1, 0.18, 0.1, 0.15, 0.1, 0.1, 0.05, 0.02, 0.02, 0.02],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_0.py',
  'best_fitness': 28.671216806088314,
  'best_material': 0.007094102747340339,
  'best_efficiency': 4041.5564627728977,
  'best_iteration': 1},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_1.xml',
  'best_parameter': [0.1, 0.18, 0.08, 0.1, 0.08, 0.08, 0.05, 0.02, 0.02, 0.02],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_1.py',
  'best_fitness': 8.284144079740539,
  'best_material': 0.006698945113725943,
  'best_efficiency': 1236.6341176264557,
  'best_iteration': 0},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_refine_3_1_0.xml',
  'best_parameter': [0.08,
   0.2,
   0.06,
   0.1,
   0.09,
   0.08,
   0.05,
   0.015,
   0.015,
   0.015],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 13.781

In [38]:
best_efficiency

1347.3786102645602