In [11]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTWalker import GPTWalkerEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4-turbo"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTWalker_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_walker_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = walker_design(parameter)  
            filename = f"GPTWalker_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_walker_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = walker_design(initial_parameter['parameters'])  

        filename = f"GPTWalker_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_walker_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = walker_design(diverse_parameter['parameters'])  
            filename = f"GPTWalker_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        reward_improve_prompts = prompts.reward_improve_prompts

        for reward_filename, fitness in zip(rewardfunc_list, fitness_list):
            with open(reward_filename, 'r') as f:
                reward_content = f.read()
            reward_improve_prompts += f"reward function:\n{reward_content}\nfitness: {fitness}\n"
            
        with open(best_rewardfunc, 'r') as f:
            best_reward_content = f.read()

        reward_improve_prompts += f"This is best reward function, please carefully review it :\n{best_reward_content}\nbest fitness: {max(fitness_list)}"
        # print(reward_improve_prompts)
        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        # print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTWalker_{rewardfunc_index}_{morphology_index}_{iteration}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = walker_design(parameter)  
        filename = f"GPTWalker_refine_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [8]:

folder_name = "results/noDiv_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [9]:
logging.info(f"start!")

In [83]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

KeyboardInterrupt: 

In [10]:
morphology_list

NameError: name 'morphology_list' is not defined

In [6]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)


KeyboardInterrupt



In [1]:
# Extracting all the parameters from the provided log and storing them in a list

rewardfunc_list

NameError: name 'rewardfunc_list' is not defined

[0.026529702762014606,
 0.008886518419454328,
 0.004190884599888784,
 0.016680809793010606,
 0.011613420842770271,
 0.010209128926615631,
 0.012498040974143596,
 0.002794184865980322,
 0.0184058059592192,
 0.006889250889934617,
 0.005333377128244272,
 0.0020522454009575327,
 0.006236061417375739,
 0.0057983328409755616,
 0.009552012463239766,
 0.030787608005179972,
 0.004744590305083985,
 0.015168656529082721,
 0.04292698381803873,
 0.0049959177173711684,
 0.006911503837897545,
 0.005650677986256842,
 0.019626576504526635,
 0.004328852877258936,
 0.009433155541178953,
 0.01616689759476088,
 0.005129173605760936,
 0.02040464428506571,
 0.020224002707484295,
 0.008499055325511587,
 0.016015577548612967,
 0.04491325577327088,
 0.011969468010177111,
 0.006421938982713135,
 0.011972609602830702,
 0.002044129619935759,
 0.022065499601263514,
 0.024396561350227136,
 0.03223116982950448,
 0.01590012401859354,
 0.010207034531513236,
 0.027933994678169243,
 0.0021933845925127073,
 0.004600338842

# enter coarse optimization stage

In [10]:
morphology_list = [f'results/noDiv_m25_r5/assets/GPTWalker_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/noDiv_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list = np.array([[0.9, 0.6, 0.3, 0.05, 0.2, -0.1, 0.05, 0.04, 0.03, 0.02],
 [0.8, 0.6, 0.45, 0.3, 0.05, -0.05, 0.05, 0.04, 0.04, 0.03],
 [0.9, 0.7, 0.45, 0.2, 0.1, -0.1, 0.06, 0.05, 0.045, 0.025],
 [0.6, 0.5, 0.25, 0.1, 0.05, -0.05, 0.08, 0.07, 0.06, 0.04],
 [1.5, 1.3, 0.7, 0.35, 0.1, -0.1, 0.08, 0.06, 0.06, 0.05],
 [1.2, 1.0, 0.5, 0.2, 0.3, -0.3, 0.08, 0.08, 0.06, 0.05],
 [0.6, 0.3, 0.15, 0.05, 0.1, -0.1, 0.08, 0.07, 0.06, 0.05],
 [1.2, 0.6, 0.3, 0.1, -0.05, -0.2, 0.08, 0.07, 0.06, 0.04],
 [0.7, 0.5, 0.25, 0.1, 0.15, -0.1, 0.06, 0.05, 0.04, 0.03],
 [1.2, 0.9, 0.6, 0.3, 0.2, -0.2, 0.08, 0.07, 0.065, 0.05],
 [0.9, 0.7, 0.5, 0.3, 0.15, -0.15, 0.08, 0.07, 0.06, 0.04],
 [1.4, 0.7, 0.35, 0.15, 0.15, -0.15, 0.08, 0.06, 0.05, 0.04],
 [1.0, 0.7, 0.4, 0.1, 0.05, -0.1, 0.05, 0.04, 0.03, 0.02],
 [1.2, 0.7, 0.3, 0.1, 0.15, -0.15, 0.05, 0.04, 0.03, 0.02],
 [1.2, 0.9, 0.6, 0.3, 0.35, -0.2, 0.05, 0.04, 0.04, 0.035],
 [1.5, 1.0, 0.5, 0.1, 0.2, -0.3, 0.09, 0.08, 0.07, 0.05],
 [0.6, 0.3, 0.15, 0.05, 0.1, -0.1, 0.08, 0.06, 0.05, 0.04],
 [0.6, 0.3, 0.15, 0.05, 0.1, -0.15, 0.08, 0.07, 0.06, 0.04],
 [1.5, 1.2, 0.6, 0.2, 0.1, -0.1, 0.1, 0.08, 0.07, 0.06],
 [0.6, 0.4, 0.2, 0.05, 0.1, -0.1, 0.05, 0.04, 0.03, 0.02],
 [0.6, 0.45, 0.3, 0.15, 0.1, -0.1, 0.04, 0.04, 0.035, 0.03],
 [0.6, 0.45, 0.25, 0.1, 0.1, -0.1, 0.05, 0.045, 0.04, 0.035],
 [0.6, 0.3, 0.15, 0.05, 0.1, -0.1, 0.07, 0.05, 0.05, 0.03],
 [1.0, 0.9, 0.6, 0.2, 0.25, -0.1, 0.06, 0.05, 0.04, 0.03],
 [0.6, 0.4, 0.2, 0.1, 0.15, -0.15, 0.05, 0.04, 0.04, 0.02],
 [0.6, 0.45, 0.3, 0.15, 0.1, -0.05, 0.08, 0.07, 0.06, 0.05],
 [0.6, 0.4, 0.2, 0.05, 0.1, -0.1, 0.05, 0.04, 0.03, 0.02],
 [0.6, 0.3, 0.15, 0.05, 0.02, -0.02, 0.08, 0.06, 0.04, 0.03],
 [1.4, 0.8, 0.4, 0.1, 0.2, -0.2, 0.09, 0.08, 0.07, 0.06],
 [0.7, 0.6, 0.5, 0.35, 0.1, -0.1, 0.07, 0.06, 0.05, 0.04],
 [1.2, 0.8, 0.5, 0.1, 0.2, -0.2, 0.05, 0.04, 0.03, 0.02],
 [0.65, 0.45, 0.25, 0.1, 0.2, -0.15, 0.08, 0.07, 0.06, 0.05],
 [0.6, 0.3, 0.15, 0.05, 0.025, -0.025, 0.04, 0.03, 0.02, 0.01],
 [0.6, 0.3, 0.15, 0.05, 0.15, -0.1, 0.04, 0.03, 0.025, 0.02],
 [1.25, 1.0, 0.6, 0.2, 0.1, -0.1, 0.08, 0.07, 0.07, 0.05],
 [1.0, 0.8, 0.6, 0.4, 0.2, -0.2, 0.05, 0.04, 0.03, 0.02],
 [0.6, 0.35, 0.15, 0.02, 0.02, -0.18, 0.05, 0.04, 0.035, 0.03],
 [0.6, 0.4, 0.2, 0.05, 0.08, -0.1, 0.06, 0.05, 0.04, 0.03],
 [1.2, 0.8, 0.4, 0.0, 0.1, -0.1, 0.05, 0.05, 0.04, 0.03],
 [1.2, 0.6, 0.3, 0.1, 0.15, -0.15, 0.08, 0.07, 0.06, 0.05],
 [0.6, 0.3, 0.15, 0.05, 0.02, -0.05, 0.08, 0.07, 0.06, 0.05],
 [0.6, 0.5, 0.3, 0.1, 0.1, -0.1, 0.05, 0.04, 0.03, 0.02],
 [0.6, 0.4, 0.2, 0.1, 0.05, -0.05, 0.05, 0.04, 0.03, 0.02],
 [1.4, 1.0, 0.7, 0.3, 0.15, -0.15, 0.09, 0.08, 0.07, 0.05],
 [1.0, 0.9, 0.8, 0.6, 0.2, -0.2, 0.08, 0.07, 0.06, 0.05],
 [1.2, 0.9, 0.5, 0.2, 0.05, -0.05, 0.05, 0.04, 0.035, 0.03],
 [1.2, 1.0, 0.6, 0.3, 0.1, -0.1, 0.08, 0.06, 0.05, 0.04],
 [1.0, 0.8, 0.6, 0.4, 0.2, -0.2, 0.05, 0.04, 0.03, 0.025],
 [1.5, 1.0, 0.5, 0.1, 0.15, -0.15, 0.06, 0.05, 0.05, 0.02],
 [0.6, 0.4, 0.2, 0.1, 0.05, -0.05, 0.08, 0.05, 0.04, 0.025]])
material_list = [compute_walker_volume(parameter) for parameter in parameter_list]


In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [None]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
#         if i in [0] or j in [11, 48, 50]:
#             continue
            
        # if i not in [10]:
        #     continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTWalker.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTWalkerEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/noDiv_m50_r10/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
0 results/noDiv_m25_r5/assets/GPTWalker_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
1 results/noDiv_m25_r5/assets/GPTWalker_1.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
2 results/noDiv_m25_r5/assets/GPTWalker_2.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81


In [9]:
fitness_matrix

array([[0.5336602918901562, 0.7896406824664972, 0.6155428925457181,
        1.1892390659601058, 0.9188638662736607, 0.659539244624002,
        0.626753026318415, 1.6883034369495489, 0.22443399333371677,
        0.5210756742298377, 1.1115057045745436, 2.1226879263480014,
        1.6044312050718248, 1.7736058859267132, 1.7353355181602585,
        3.1333640141090964, 0.8816549099641585, 0.6526683587760691,
        1.3404735096456504, 0.7540371420612904, 1.304847609511935,
        0.531347858889157, 1.7684675112188029, 2.038304581194203,
        0.9204205681488749],
       [0.40964927723367967, 0.4978821609279219, 1.2510488706971996,
        0.7455483837353669, 1.0560179602415374, 1.9018118025594999,
        1.1112304648592244, 0.9911377453882888, 0.16613241067875353,
        0.815720395991827, 1.7743338287646138, 2.3248005861193355,
        0.8966466219502749, 2.2097794383490683, 1.5423943003797596,
        1.9108156484432703, 1.6607147531721216, 0.5501334410057084,
        0.571799261331

In [28]:
efficiency_matrix = np.array([[50.62304840223553, 90.1196293057718, 71.80512067956171,
        34.10558652637897, 42.41056918933855, 12.51206244402908,
        42.644340805039484, 101.0581796396811, 6.602836647072625,
        3.042151875075633, 64.85259273848959, 81.77243982596598,
        135.6803525805508, 234.4452666789291, 110.12725418374285,
        37.43166188674219, 2.5732974267290745, 89.17593905304416,
        13.627085604877172, 145.64667956497632, 289.1482801977803,
        21.62009293947034, 97.06767634546185, 99.84082667418902,
        5.379707336257232],
       [56.31627465614159, 157.43120491424943, 88.21995742762708,
        29.088103831558215, 51.02440488531429, 32.89143645068948,
        27.399439475874225, 18.706211589531495, 141.44488992883527,
        28.631418777804157, 47.81339758094234, 16.565174890265606,
        185.9628533470005, 188.57113054856646, 113.49423934435214,
        42.611314739816834, 109.44268593980362, 58.72829386089613,
        12.439466008380792, 70.8119644071393, 427.68481006358724,
        -6.525823265407806, 174.21609855814427, 348.8423058013457,
        104.44307538497566],
       [28.220550984642998, 149.13255274365594, 43.486468370577334,
        33.69291421673807, 44.309051906336144, 15.41422781932271,
        21.449153864456676, 64.23264166594576, 21.246757939507013,
        36.22244332243947, 63.8513488337815, 86.45610324033335,
        260.3688627354291, 188.68171696650325, 73.08216357800374,
        39.123226792047504, 44.865989292905454, 142.64023573445772,
        16.94506349152021, 59.61908191557577, 133.01471020331252,
        60.77125050407854, 30.272808734810123, 406.8132137271518,
        150.54649941338198],
       [-25.621661025425322, 111.59960914208175, 110.450176370656,
        37.47928143260862, 32.489118259339136, 34.10342179348445,
        50.18785843115071, 94.25400055355884, 50.453551386439706,
        11.473635900165078, 30.006964593129986, 82.52892758551329,
        170.89176616985097, 185.8287625298209, 110.82221648534633,
        41.77203776297098, 91.73178714269638, 57.51057679305126,
        29.756982595653945, 113.51434637874006, 54.50503185848842,
        61.1730351197105, 35.060021044778104, 179.18976951065062,
        89.38593146268869],
       [34.17595502276464, 192.56200487767734, 84.39390238934168,
        60.06941642051543, 65.53685479121104, 16.41730074514431,
        44.24741622469472, 45.276373635085065, 71.75425568461607,
        43.25622987701197, 71.16982952592546, 57.01159516951028,
        354.6869056259, 104.7264040878187, 105.8666767088484,
        39.11025886860853, 54.4550921828861, 25.089437018658096,
        12.453080594778434, 126.57135377050174, 269.30839923277074,
        69.0366646631811, 115.06301845782107, 212.53480225451946,
        202.74836985066074]], dtype=object)
mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)


平均值： 87.53675834122669
标准差： 81.6844399556135


In [18]:



fitness_matrix = np.array([[0.4501807260693196, 0.6285245471241708, 0.9335729263551182,
        0.7322348077706137, 1.5039762375864028, 0.6209584737863981,
        1.0204135756693868, 3.506289262081589, 0.07807824456080922,
        0.11524074149705112, 1.6266635844453017, 2.754606918704435,
        1.1937908851958232, 2.6225432588023394, 1.6052662108495428,
        2.4054456187133373, 0.047718646524860284, 2.0322405616936305,
        0.8660329729870841, 0.9154301187745133, 1.7166983556340398,
        0.1935537061650376, 1.3039537949303286, 1.4534967680553057,
        0.03767762613999153],
       [0.5008094576363257, 1.0979780713057579, 1.1469901176849673,
        0.6245112395600855, 1.8094426448723901, 1.6323620722344736,
        0.655626502347288, 0.6490260270317237, 1.6725794227601571,
        1.0845960575142926, 1.1992783851703164, 0.5580186363448228,
        1.6362041746527691, 2.109387637593647, 1.6543449566225015,
        2.7383021533652863, 2.02948045991367, 1.3383657315027953,
        0.7905569864295937, 0.44507300256690663, 2.539201719144284,
        -0.0584223796971639, 2.3403232817107518, 5.0784952502230984,
        0.7314834992499496],
       [0.25095976108348717, 1.0401004852854474, 0.5653885008388371,
        0.7233748801136461, 1.5713007972046835, 0.7649894191385017,
        0.5132453070379409, 2.228599629950259, 0.25124195117875936,
        1.3721541194287192, 1.6015499084893694, 2.9123819792695524,
        2.290869453171758, 2.1106246754276095, 1.0652796955403157,
        2.5141495122927213, 0.8319847763488738, 3.2506444660670017,
        1.0768981819387653, 0.374722605432655, 0.7897198424454536,
        0.5440541257745617, 0.40666826815208074, 5.922443864414351,
        1.0543760779239983],
       [-0.22784834828355766, 0.7783331371378491, 1.4360158912753136,
        0.8046668370286089, 1.1521387893672514, 1.6925114338669036,
        1.2009183659574232, 3.2702131705468584, 0.596610962026192,
        0.43463652148180026, 0.7526489616260927, 2.7800901551200194,
        1.5036004029210333, 2.0787110585242745, 1.6153963054282832,
        2.6843682635756942, 1.7010535510930314, 1.3106150395087477,
        1.8911254285520716, 0.7134694172121008, 0.32360108972893115,
        0.5476510992775612, 0.4709770462514318, 2.608669815027064,
        0.6260284244698654],
       [0.3039199876706985, 1.3429920633429975, 1.0972457350464664,
        1.28966899752769, 2.3240874663127635, 0.8147707110638691,
        1.0587727082097775, 1.5708977073277677, 0.8484908264550419,
        1.6386032683755551, 1.7851155229495492, 1.9205068949213564,
        3.1207318302267324, 1.1714867565880482, 1.5431620468090634,
        2.5133161633830614, 1.0098029354721014, 0.5717660180628527,
        0.7914222250489005, 0.7955363607440863, 1.5989072658675356,
        0.6180501787962687, 1.54569332679023, 3.0941114819122344,
        1.4199800848349833]], dtype=object)

In [19]:
efficiency_matrix_select = efficiency_matrix[:10, :50]
# parameter_list[48]
# material_list[48]
efficiency_matrix_select.shape

(5, 25)

# print coarse optimization info

In [20]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [21]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.1))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [22]:
coarse_best = efficiency_coarse_best
coarse_best

[(1, 20),
 (2, 23),
 (4, 12),
 (1, 23),
 (0, 20),
 (4, 20),
 (2, 12),
 (0, 13),
 (4, 23),
 (4, 24),
 (4, 1),
 (2, 13)]

# enter fine optimization stage

In [None]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTWalker.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_walker_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTWalker.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_walker_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

params: [ 0.6    0.45   0.3    0.15   0.1   -0.1    0.04   0.04   0.035  0.03 ]
[0.7, 0.5, 0.3, 0.15, 0.15, -0.15, 0.06, 0.05, 0.04, 0.035]
Successfully saved GPTWalker_refine_1_20_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
params: [ 1.    0.9   0.6   0.2   0.25 -0.1   0.06  0.05  0.04  0.03]
[1.1, 0.9, 0.65, 0.25, 0.3, -0.15, 0.06, 0.05, 0.04, 0.03]
Successfully saved GPTWalker_refine_2_23_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
params: [ 1.    0.7   0.4   0.1   0.05 -0.1   0.05  0.04 

In [17]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [27]:
final_optimized_results

[{'best_morphology': 'results/noDiv_m25_r5/assets/GPTWalker_20.xml',
  'best_parameter': array([ 0.6  ,  0.45 ,  0.3  ,  0.15 ,  0.1  , -0.1  ,  0.04 ,  0.04 ,
          0.035,  0.03 ]),
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_1.py',
  'best_fitness': 2.539201719144284,
  'best_material': 0.9218891209166312,
  'best_efficiency': 427.68481006358724,
  'best_iteration': 0},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTWalker_23.xml',
  'best_parameter': array([ 1.  ,  0.9 ,  0.6 ,  0.2 ,  0.25, -0.1 ,  0.06,  0.05,  0.04,
          0.03]),
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_2.py',
  'best_fitness': 5.922443864414351,
  'best_material': 4.234146892547145,
  'best_efficiency': 406.8132137271518,
  'best_iteration': 0},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTWalker_12.xml',
  'best_parameter': array([ 1.  ,  0.7 ,  0.4 ,  0.1 ,  0.05, -0.1 ,  0.05,  0.04,  0.03,
          0.02]),
  'best_rewardfunc': 'results/noDiv_m25_r5/