In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTHopper import GPTHopperEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        self.client = OpenAI(api_key=api_key)
        # self.model = "gpt-3.5-turbo"
        self.model = "gpt-4-turbo"


    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTHopper_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")
        messages.append({"role": "assistant", "content": initial_code})

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]
            # print(diverse_messages)
            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )
            diverse_code = self.extract_code(response.choices[0].message.content)
            messages.append({"role": "assistant", "content": diverse_code})

            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files


    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_hopper_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = hopper_design(parameter)  
            filename = f"GPTHopper_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_hopper_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = hopper_design(initial_parameter['parameters'])  

        filename = f"GPTHopper_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_hopper_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = hopper_design(diverse_parameter['parameters'])  
            filename = f"GPTHopper_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        reward_improve_prompts = prompts.reward_improve_prompts

        for reward_filename, fitness in zip(rewardfunc_list, fitness_list):
            with open(reward_filename, 'r') as f:
                reward_content = f.read()
            reward_improve_prompts += f"reward function:\n{reward_content}\nfitness: {fitness}\n"
            
        with open(best_rewardfunc, 'r') as f:
            best_reward_content = f.read()

        reward_improve_prompts += f"This is best reward function, please carefully review it :\n{best_reward_content}\nbest fitness: {max(fitness_list)}"
        # print(reward_improve_prompts)
        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        # print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTHopper_refine_{rewardfunc_index}_{morphology_index}_{iteration}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = hopper_design(parameter)  
        filename = f"GPTHopper_refine_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/noDiv_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [4]:
logging.info(f"start!")

In [5]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

In [8]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/Div_m50_r10\env\GPTrewardfunc_0.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_1.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_2.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_3.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_4.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_5.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_6.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_7.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_8.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_9.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_10.py


In [5]:
parameter_list = [[1.9, 1.3, 0.7, 0.2, 0.2, -0.2, 0.07, 0.06, 0.05, 0.03],
 [1.5, 1.2, 0.8, 0.3, 0.3, -0.2, 0.12, 0.1, 0.08, 0.07],
 [0.8, 0.6, 0.4, 0.2, 0.15, -0.15, 0.05, 0.045, 0.04, 0.035],
 [0.9, 0.7, 0.4, 0.1, -0.05, -0.1, 0.07, 0.05, 0.04, 0.03],
 [1.2, 0.9, 0.6, 0.3, 0.1, -0.3, 0.05, 0.04, 0.035, 0.03],
 [1.5, 1.2, 0.8, 0.3, 0.1, -0.2, 0.1, 0.1, 0.08, 0.05],
 [1.2, 0.9, 0.55, 0.2, 0.1, -0.1, 0.045, 0.045, 0.045, 0.03],
 [1.2, 0.9, 0.6, 0.3, 0.1, -0.1, 0.08, 0.08, 0.05, 0.02],
 [1.0, 0.8, 0.5, 0.2, 0.1, -0.1, 0.07, 0.06, 0.05, 0.04],
 [0.6, 0.3, 0.15, 0.05, 0.1, -0.1, 0.07, 0.065, 0.05, 0.04],
 [1.2, 0.8, 0.4, 0.05, 0.1, -0.1, 0.05, 0.04, 0.03, 0.02],
 [0.6, 0.3, 0.15, 0.05, 0.1, -0.1, 0.07, 0.05, 0.04, 0.03],
 [1.0, 0.7, 0.35, 0.1, 0.15, -0.15, 0.07, 0.06, 0.05, 0.04],
 [1.0, 0.8, 0.5, 0.1, -0.2, -0.3, 0.06, 0.04, 0.03, 0.02],
 [1.2, 1.0, 0.5, 0.1, 0.15, -0.15, 0.07, 0.06, 0.05, 0.04],
 [1.5, 1.0, 0.5, 0.2, 0.1, -0.1, 0.07, 0.06, 0.05, 0.04],
 [1.5, 1.1, 0.7, 0.3, 0.4, -0.1, 0.06, 0.04, 0.03, 0.02],
 [1.0, 0.8, 0.5, 0.2, 0.1, -0.1, 0.05, 0.03, 0.02, 0.01],
 [0.7, 0.5, 0.2, 0.05, 0.1, -0.1, 0.1, 0.05, 0.05, 0.04],
 [0.6, 0.45, 0.3, 0.1, 0.05, -0.05, 0.07, 0.06, 0.05, 0.04],
 [1.2, 1.0, 0.6, 0.2, 0.2, -0.4, 0.05, 0.05, 0.04, 0.03],
 [1.0, 0.6, 0.3, 0.1, 0.12, -0.1, 0.08, 0.07, 0.06, 0.05],
 [1.0, 0.8, 0.6, 0.2, 0.15, -0.05, 0.3, 0.2, 0.15, 0.1],
 [1.0, 0.8, 0.5, 0.2, 0.25, -0.25, 0.05, 0.04, 0.03, 0.02],
 [0.7, 0.5, 0.2, 0.05, 0.07, -0.05, 0.06, 0.045, 0.04, 0.035],
 [1.0, 0.8, 0.6, 0.3, 0.25, -0.1, 0.08, 0.07, 0.06, 0.05],
 [1.2, 0.8, 0.4, 0.1, 0.15, -0.05, 0.06, 0.04, 0.04, 0.02],
 [1.0, 0.7, 0.5, 0.2, 0.2, -0.2, 0.07, 0.05, 0.04, 0.03],
 [1.6, 1.3, 0.7, 0.2, 0.15, -0.2, 0.06, 0.05, 0.04, 0.03],
 [1.2, 0.7, 0.3, 0.1, 0.01, -0.05, 0.05, 0.05, 0.04, 0.03],
 [1.65, 1.4, 1.0, 0.4, 0, -0.2, 0.06, 0.06, 0.04, 0.03],
 [1.0, 0.8, 0.5, 0.2, 0.15, -0.05, 0.1, 0.08, 0.07, 0.06],
 [1.5, 1.2, 0.8, 0.3, 0.1, -0.3, 0.1, 0.09, 0.08, 0.05],
 [1.5, 1.2, 0.8, 0.4, 0.4, -0.8, 0.09, 0.08, 0.07, 0.06],
 [0.6, 0.4, 0.3, 0.1, 0.05, -0.05, 0.01, 0.02, 0.02, 0.02],
 [1.0, 0.8, 0.6, 0.4, 0.4, -0.2, 0.1, 0.1, 0.09, 0.08],
 [1.2, 0.9, 0.55, 0.25, 0.25, -0.1, 0.1, 0.07, 0.05, 0.03],
 [1.0, 0.7, 0.4, 0.15, 0.1, -0.05, 0.07, 0.06, 0.05, 0.04],
 [1.2, 0.8, 0.4, 0.1, 0.12, -0.12, 0.05, 0.04, 0.03, 0.02],
 [1.5, 1.2, 0.9, 0.6, 0.65, -0.65, 0.08, 0.07, 0.07, 0.06],
 [1.6, 1.1, 0.7, 0.3, 0.1, -0.1, 0.1, 0.07, 0.07, 0.05],
 [1.2, 0.9, 0.6, 0.3, 0.35, -0.05, 0.1, 0.08, 0.08, 0.06],
 [1.0, 0.8, 0.6, 0.3, 0.05, -0.15, 0.07, 0.05, 0.04, 0.02],
 [1.5, 1.1, 0.7, 0.2, 0.1, -0.15, 0.07, 0.07, 0.06, 0.05],
 [1.0, 0.9, 0.6, 0.3, 0.35, -0.05, 0.35, 0.08, 0.06, 0.05],
 [1.0, 0.75, 0.45, 0.15, 0.01, -0.15, 0.06, 0.05, 0.04, 0.03],
 [1.2, 0.9, 0.6, 0.3, 0.15, -0.15, 0.07, 0.06, 0.05, 0.04],
 [1.5, 1.2, 0.9, 0.6, 0.3, -0.3, 0.1, 0.08, 0.06, 0.04],
 [1.3, 1.0, 0.5, 0.1, 0.05, -0.1, 0.04, 0.04, 0.03, 0.02],
 [1.0, 0.85, 0.7, 0.55, 0.44, -0.1, 0.06, 0.05, 0.045, 0.04],
 [1.4, 1.0, 0.6, 0.15, 0.07, -0.1, 0.25, 0.15, 0.12, 0.05]]

morphology_list = [f'results/noDiv_m25_r5/assets/GPTHopper_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/noDiv_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

material_list = [compute_hopper_volume(parameter) for parameter in parameter_list]

# enter coarse optimization stage

In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [None]:
for j, morphology in enumerate(morphology_list):
    for i, rewardfunc in enumerate(rewardfunc_list):

        # if i not in [0] or j not in [12]:
        #     continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTHopper.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTHopperEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/div2025-03-17_15-13-46/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
0 results/noDiv_m25_r5/assets/GPTHopper_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
1 results/noDiv_m25_r5/env/GPTrewardfunc_1.py
0 results/noDiv_m25_r5/assets/GPTHopper_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
2 results/noDiv_m25_r5/env/GPTrewardfunc_2.py
0 results/noDiv_m25_r5/assets/GPTHopper_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81


In [26]:
efficiency_matrix = np.array([[94.13712815210884, 39.63415526485556, 794.9985096845128,
        4.028420267769271, 25.084192986245878, 52.02047797953102,
        -13.96496333300926, 74.56014566747464, 51.143313136479605,
        94.36409997180706, 243.39425071008756, 154.800298645754,
        84.88268003885675, -28.772437463018438, -7.740832366512328,
        17.392658448155792, 265.02008664462227, -62.56215182950684,
        55.93502216136016, 16.224002067506415, 6.3449314016687595,
        103.35269991564525, 1.7994849318537698, 5.711431314183117,
        53.38265653753691],
       [111.76977226887519, 40.21073928871543, 172.33921006841533,
        -22.7091163129148, -3.4105729284859425, 48.43400689621782,
        14.99393357767097, 82.46493582448669, 137.43724333324982,
        93.83580529780373, 37.35236435346463, -25.628165525724018,
        197.12111946352107, 166.03220190895195, 22.894885416829524,
        109.67486011793167, 465.83104462814657, -30.407249400468025,
        89.54843417786726, 94.89631551124693, 36.70067162475883,
        248.4313080616958, 0.3415864250120802, -37.22164585256122,
        172.57071065239012],
       [119.20096935864598, 39.635080719089494, 357.7549277116462,
        -15.49835851002158, -19.244582834337688, 45.632786204976206,
        -2.5482279103553105, 78.88108992550539, -9.096869308701367,
        285.98509542986346, 419.7098788870396, 36.38349408372024,
        32.431159585719946, 23.50112633317982, 85.54398285764597,
        20.028400813242957, 1222.0645575053077, -42.82101656324627,
        215.25713368501332, 116.42008380055327, -9.743460487148315,
        272.2773501354677, 3.9453165755053403, -40.28971080570104,
        172.79501613780783],
       [140.50113071734702, 42.98140380165936, 957.5199877085341,
        -6.768645645245227, -1.474571424217352, 46.547155503455016,
        -0.025183381801435114, 74.44827367475733, 3.16998003121368,
        75.45548741576553, 249.26294486787847, 74.91722663191304,
        85.3450238166704, 160.77373572079108, 29.2798342583135,
        96.03841990605125, 348.0146407588653, 80.32994141751179,
        31.92051054767285, 157.4346649075247, -6.53679160244414,
        289.129923101146, 3.6993441980801256, 30.81774192337683,
        201.72831410647368],
       [115.52496002161297, 39.241662490891514, 40.0421402151087,
        126.80136821458173, -1.6404241933971473, 44.51513288562585,
        1.9521754941727498, 1.6718884255840305, 85.89602265939853,
        150.25033820652033, 61.78614100787644, 229.69986157777782,
        369.29870105571916, 230.10271297380945, 138.07444134275954,
        162.1318962769087, 378.3446451751616, -6.503574803075852,
        342.63221622687627, 7.778222512031701, 1.1357591586983966,
        340.3735911456131, 1.4619019561580562, -11.812590004652368,
        218.56866545830295]], dtype=object)

mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("方差：", std)


平均值： 111.90974585267875
方差： 178.3462845471911


In [15]:
efficiency_matrix_select = efficiency_matrix[:5, :25]
# parameter_list[48]

In [17]:
fitness_matrix =np.array([[2.42574107581863, 1.7385140513808621, 5.75233626731039,
        5.248073442475785, 6.314211169846406, 4.378582880501472,
        0.09148535165083706, -0.11978743182740402, 2.0579761944420962,
        6.250244810281483, 1.699255559986155, 2.302140141972929,
        1.4903121588467891, 4.1494420774055705, 0.5152826513380817,
        1.45486309738636, 0.09378590810707321, 2.2404290367133135,
        3.1832310869784353, 1.2835533084523838, 1.1922648113359686,
        2.453084645611067, 1.1594336987129432, -0.0004655845481020472,
        0.9144528551392227, 1.1190742634158262, 0.14270906673788175,
        10.864171596606289, 2.358083794522678, 1.6147434066386037,
        3.402128268825633, 0.7052139235590158, 4.711767896283045,
        0.2037278230878554, 0.22851936328476796, 0.49691558608629427,
        2.8879867229106475, 0.847298921349251, 0.6073935958850748,
        6.72236597470676, 0.9684335708715157, -0.008585808047347217,
        -0.20331925315146956, 1.6004697631360338, 0.9291327038871491,
        2.1062319522131703, 0.669225369377844, 3.7106658210191297,
        0.2133091924500219, 1.6285911042935708, 0.3972716791049427],
       [-0.18694208657855088, 1.9139860510698605, 0.31862028768065576,
        1.4939485969505777, 0.8194928426624359, 5.123519796585105,
        0.1319414185251182, 1.611511503345777, 1.9532368953601955,
        -0.24587648774302961, 2.2127796859788247, 1.966163587849377,
        1.6102037922044388, 6.464971595208877, 0.07584031351675172,
        1.2082708744864339, 0.20381020760396645, 2.262888507417508,
        3.0915574192916506, 0.7330069135748714, -0.33814507669780597,
        5.496261448466442, 0.7045451189314258, 0.15650892789617882,
        0.5671912196976975, 1.9519070660462023, 0.9539530960396011,
        1.7930758676505718, 2.256741118254442, 0.020159089873407893,
        3.2517113563341673, 0.5754216905234013, 0.8843592925067392,
        -0.05434478318427098, 0.6671038524364227, 0.9011457250121968,
        9.760359218577396, 4.098352952109594, 2.5354592088605896,
        2.292814013672692, 0.18566196688959188, 1.4545118529127692,
        1.090331486424509, 0.5466565428187979, 0.8479463133775902,
        2.142517074957079, 1.139467789542916, 3.3939925090594896,
        1.2283015776969974, -0.035363340447767476, 0.4091160491081394],
       [-0.018063551448320087, 1.8226348075089334, 0.9698800697924375,
        -0.036129301144362226, 0.6402557642010599, 5.549521742994282,
        4.02679088667429, -0.1214651782522687, 1.855131984992567,
        3.1629657373977587, 0.9196288685474865, 0.8583231743580738,
        1.4156795666848998, 2.667558201650156, 5.127764078848299,
        0.8385027431365727, 1.0491079684899782, 0.7911584267777938,
        2.5004934529045912, 0.845373636335961, 3.220488863156616,
        3.1547533645083767, 0.5831856429837822, 4.939475702201308,
        1.0758045872900126, 1.7409351763695236, 2.1098800589161324,
        5.884582788021682, 1.5272534922141197, 1.8688910330218216,
        5.538634803344925, 0.640736461791638, 2.1967911820892367,
        0.13920594405513229, 0.9179170661998214, 0.7565601571328133,
        2.5401269065292187, 4.85933121456916, 2.154283593522655,
        2.3660007555569025, 0.4426405835298397, 2.4721788825264115,
        1.527502242761719, 1.3962005937616881, 0.8764661444845427,
        1.6971181651410554, 0.5953676919299291, 0.17187713747370753,
        1.0443006895850726, 0.15005546299625633, 0.6375669022134196],
       [0.8377564935101576, 2.2645827639171863, 2.765763141114626,
        3.2044240194810385, 0.36376856916084804, 5.007061562499993,
        5.636229037894995, -0.13482217973092683, 2.133013159991834,
        0.28036702583207035, 1.609701914869149, -0.09254856152625092,
        1.1678251708331921, 3.721894071762559, 0.8648880477869635,
        0.8375923660023975, 0.0869974022902861, 1.9123278746365633,
        3.5899742039392883, 1.216098950896109, 2.8388047696926195,
        5.400405392253706, 0.8541778481657275, 0.8790519379710606,
        0.7534742068127642, 1.0967622212658596, 0.8040623320955178,
        5.7074804835506825, 1.7531067474241502, 0.907340774044492,
        6.9471686145749105, 1.378602697087362, 0.5488133289699477,
        0.2505873700762013, 0.17346079874466394, 0.38073956796399017,
        2.644322367643524, 5.311861978476981, 1.295142937311938,
        1.3628338423301143, 0.47107500615039066, 1.205590085302281,
        1.0600493731105307, 0.6220394157590117, 0.0945823192841042,
        2.4586268436825036, 0.427239988360692, 0.1795296239376685,
        1.0590774086436083, -0.20915725062354487, 1.6017991556134878],
       [0.8542279062817609, 1.5211809558208405, 5.08434200848323,
        0.5947346846458889, -0.12024898666798604, 3.015310765872944,
        -0.03924792519634399, -0.5363717328291675, 5.0885673888142575,
        2.9442864368080865, 0.7045012041962461, -0.09533027883462401,
        1.2493887364978193, 7.096346554636067, 0.3500555646886502,
        1.053064071382753, 0.26095044860647365, 1.2896749148363367,
        2.2790471672511274, 0.7234397162404871, 3.6916891642865037,
        3.3873261100678196, 0.2029744732003841, 0.5064103128482201,
        0.8961991383441166, 2.0287465779322256, 0.33376708078480277,
        0.6300990030527843, -0.10745690385008057, 1.630269806761772,
        3.5522214262203264, 1.25175463888373, 4.435281659515157,
        0.3031462346754154, 0.32471117002902344, 0.5344209701330658,
        2.9285386398918694, 2.173807569584307, None, None, None, None,
        None, None, None, None, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None]], dtype=object)

# print coarse optimization info

In [18]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [19]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.1))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [20]:
coarse_best = efficiency_coarse_best
coarse_best

[(2, 16),
 (3, 2),
 (0, 2),
 (1, 16),
 (2, 10),
 (4, 16),
 (4, 12),
 (2, 2),
 (3, 16),
 (4, 18),
 (4, 21),
 (3, 21)]

# enter fine optimization stage

In [22]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_hopper_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTHopper.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_hopper_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTHopper.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_hopper_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

[1.6, 1.2, 0.8, 0.35, 0.45, -0.15, 0.055, 0.045, 0.035, 0.025]
Successfully saved GPTHopper_refine_2_16_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[0.85, 0.65, 0.42, 0.22, 0.2, -0.2, 0.055, 0.05, 0.045, 0.038]
Successfully saved GPTHopper_refine_3_2_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[0.85, 0.65, 0.45, 0.25, 0.2, -0.2, 0.05, 0.045, 0.04, 0.035]
Successfully saved GPTHopper_refine_0_2_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
3

In [24]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [25]:
final_optimized_results

[{'best_morphology': 'results/noDiv_m25_r5/assets/GPTHopper_16.xml',
  'best_parameter': [1.5, 1.1, 0.7, 0.3, 0.4, -0.1, 0.06, 0.04, 0.03, 0.02],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_2.py',
  'best_fitness': 1.0491079684899782,
  'best_material': 0.009613273519984767,
  'best_efficiency': 1222.0645575053077,
  'best_iteration': 0},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTHopper_2.xml',
  'best_parameter': [0.8,
   0.6,
   0.4,
   0.2,
   0.15,
   -0.15,
   0.05,
   0.045,
   0.04,
   0.035],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 2.765763141114626,
  'best_material': 0.006355965536987749,
  'best_efficiency': 957.5199877085341,
  'best_iteration': 0},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTHopper_2.xml',
  'best_parameter': [0.8,
   0.6,
   0.4,
   0.2,
   0.15,
   -0.15,
   0.05,
   0.045,
   0.04,
   0.035],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_0.py',
  'best_fitn