In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTWalker import GPTWalkerEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4-turbo"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTWalker_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_walker_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = walker_design(parameter)  
            filename = f"GPTWalker_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_walker_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = walker_design(initial_parameter['parameters'])  

        filename = f"GPTWalker_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_walker_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = walker_design(diverse_parameter['parameters'])  
            filename = f"GPTWalker_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts

        for rewardfunc_file, fitness in zip(rewardfunc_list, fitness_list):
            with open(rewardfunc_file, "r") as fp:
                reward_content = fp.read()
            reward_improve_prompts += f"\nreward function:\n{reward_content}\nfitness: {fitness}\n"

        with open(best_rewardfunc, "r") as fp:
            best_reward_content = fp.read()
        reward_improve_prompts += f"\nbest reward function:\n{best_reward_content}\nbest fitness: {max(fitness_list)}\n"

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content":reward_improve_prompts+ zeroshot_rewardfunc_format}
        ]
        print(messages)
        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTrewardfunc_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = walker_design(parameter)  
        filename = f"GPTWalker_refine2_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [5]:

folder_name = "results/Div_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 26
rewardfunc_nums = 6

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [None]:
logging.info(f"start!")

In [None]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

In [10]:
morphology_list

NameError: name 'morphology_list' is not defined

In [6]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)


KeyboardInterrupt



In [1]:
# Extracting all the parameters from the provided log and storing them in a list

rewardfunc_list

NameError: name 'rewardfunc_list' is not defined

# enter coarse optimization stage

In [7]:
morphology_list = [f'results/Div_m25_r5/assets/GPTWalker_{i}.xml' for i in range(0,26) ]
rewardfunc_list = [f'results/Div_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,6)]

parameter_list =np.array([[ 1.  ,  0.5  ,  0.25 ,  0.05 ,  0.25 , -0.25 ,  0.015,  0.02 , 0.025,  0.01 ],[1.5, 1.0, 0.6, 0.2, 0.3, -0.4, 0.05, 0.04, 0.03, 0.02], [1.0, 0.7, 0.5, 0.2, 0.25, -0.15, 0.045, 0.035, 0.025, 0.015], [1.8, 1.2, 0.9, 0.3, 0.35, -0.25, 0.06, 0.055, 0.045, 0.025], [1.0, 0.55, 0.35, 0.1, 0.1, -0.2, 0.03, 0.06, 0.07, 0.04], [1.3, 0.9, 0.6, 0.2, 0.25, -0.2, 0.04, 0.05, 0.05, 0.03], [1.2, 0.6, 0.3, 0.15, 0.2, -0.05, 0.065, 0.045, 0.03, 0.025], [0.8, 0.5, 0.2, 0.05, 0.1, -0.15, 0.04, 0.02, 0.01, 0.025], [1.1, 0.85, 0.55, 0.25, 0.3, -0.25, 0.035, 0.07, 0.08, 0.04], [2.0, 1.0, 0.5, 0.2, 0.25, -0.3, 0.02, 0.03, 0.04, 0.035], [1.2, 0.9, 0.4, 0.1, 0.15, -0.05, 0.025, 0.035, 0.045, 0.02], [1.0, 0.45, 0.25, 0.05, -0.05, -0.3, 0.02, 0.025, 0.03, 0.015], [0.9, 0.6, 0.4, 0.2, 0.25, -0.2, 0.02, 0.04, 0.06, 0.03], [0.6, 0.3, 0.15, 0.05, 0.1, -0.1, 0.04, 0.05, 0.06, 0.02], [0.5, 0.35, 0.2, 0.05, 0.15, -0.15, 0.035, 0.06, 0.075, 0.04], [0.8, 0.5, 0.3, 0.1, 0.2, -0.3, 0.04, 0.08, 0.12, 0.06], [0.6, 0.45, 0.15, 0.05, 0.1, -0.2, 0.025, 0.04, 0.055, 0.03], [1.0, 0.2, 0.1, 0.05, 0.05, -0.4, 0.03, 0.07, 0.1, 0.05], [0.8, 0.55, 0.25, 0.1, -0.05, -0.2, 0.045, 0.1, 0.15, 0.06], [0.6, 0.4, 0.3, 0.1, 0.2, -0.25, 0.055, 0.04, 0.03, 0.025], [1.2, 0.8, 0.4, 0.2, -0.1, -0.25, 0.03, 0.04, 0.06, 0.02], [1.0, 0.6, 0.4, 0.1, 0.2, -0.1, 0.02, 0.04, 0.05, 0.03], [1.5, 1.0, 0.7, 0.3, 0.4, -0.2, 0.05, 0.06, 0.07, 0.04], [1.0, 0.6, 0.25, 0.1, 0.15, -0.25, 0.03, 0.035, 0.04, 0.02], [1.2, 0.6, 0.3, 0.1, 0.12, -0.08, 0.025, 0.045, 0.065, 0.05], [1.7, 0.9, 0.45, 0.15, 0.25, -0.3, 0.025, 0.05, 0.08, 0.035], [1.5, 1.1, 0.6, 0.2, 0.35, -0.25, 0.02, 0.03, 0.04, 0.02], [1.2, 0.3, 0.15, 0.05, 0.1, -0.15, 0.02, 0.07, 0.12, 0.06], [1.0, 0.8, 0.5, 0.2, 0.25, -0.1, 0.025, 0.05, 0.1, 0.05], [1.0, 0.7, 0.4, 0.1, 0.2, -0.2, 0.03, 0.03, 0.06, 0.04], [0.9, 0.5, 0.25, 0.05, 0.1, -0.5, 0.03, 0.035, 0.07, 0.065], [1.8, 1.3, 0.6, 0.15, 0.25, -0.2, 0.025, 0.07, 0.12, 0.05], [1.0, 0.7, 0.5, 0.1, 0.2, -0.1, 0.01, 0.04, 0.08, 0.02], [1.0, 0.9, 0.6, 0.1, 0.2, -0.2, 0.02, 0.03, 0.05, 0.025], [1.2, 0.6, 0.3, 0.1, 0.25, -0.2, 0.02, 0.03, 0.08, 0.05], [0.8, 0.5, 0.3, 0.1, 0.15, -0.15, 0.015, 0.025, 0.035, 0.02], [1.0, 0.6, 0.35, 0.05, 0.1, -0.3, 0.02, 0.05, 0.1, 0.06], [1.0, 0.7, 0.3, 0.1, 0.15, -0.2, 0.025, 0.06, 0.12, 0.04], [1.2, 0.4, 0.2, 0.1, 0.2, -0.6, 0.02, 0.07, 0.15, 0.045], [0.8, 0.4, 0.2, 0.05, 0.1, -0.3, 0.015, 0.04, 0.1, 0.06], [0.7, 0.2, 0.05, 0.01, 0.2, -0.3, 0.01, 0.03, 0.08, 0.06], [1.0, 0.3, 0.15, 0.05, 0.1, -0.2, 0.02, 0.08, 0.15, 0.025], [1.5, 0.9, 0.6, 0.2, 0.25, -0.15, 0.018, 0.02, 0.025, 0.015], [0.85, 0.5, 0.25, 0.05, 0.1, -0.25, 0.015, 0.035, 0.05, 0.03], [1.0, 0.8, 0.5, 0.15, 0.2, -0.25, 0.02, 0.03, 0.035, 0.015], [1.0, 0.5, 0.3, 0.15, 0.3, -0.25, 0.04, 0.03, 0.02, 0.015], [1.2, 0.8, 0.4, 0.15, 0.3, -0.4, 0.02, 0.03, 0.04, 0.035], [0.9, 0.6, 0.25, 0.05, 0.1, -0.2, 0.02, 0.01, 0.03, 0.015], [1.0, 0.5, 0.25, 0.05, 0.25, -0.25, 0.015, 0.02, 0.025, 0.01], [1.4, 0.9, 0.6, 0.2, 0.3, -0.15, 0.025, 0.015, 0.015, 0.01], [1.45, 1.05, 0.6, 0.05, 0.1, 0, 0.2, 0.05,0.04,0.06]])

material_list = [compute_walker_volume(parameter) for parameter in parameter_list]


In [None]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [13]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [10]:
        #     continue
        if j not in [50]:
            continue

        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTWalker.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTWalkerEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = "results/Div_m50_r10/coarse/SAC_morphology50_rewardfunc0_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Div_m25_r5/env/GPTrewardfunc_0.py
50 results/Div_m25_r5/assets/GPTWalker_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
1 results/Div_m25_r5/env/GPTrewardfunc_1.py
50 results/Div_m25_r5/assets/GPTWalker_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
2 results/Div_m25_r5/env/GPTrewardfunc_2.py
50 results/Div_m25_r5/assets/GPTWalker_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
5

Process ForkServerProcess-524:
Traceback (most recent call last):
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/root/miniconda3/envs/robodesign/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 32, in _worker
    cmd, data = remote.recv()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
Process ForkServerProcess-517:
Traceback (most recent call la

KeyboardInterrupt: 

In [14]:
efficiency_matrix

array([[None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, 22.03956807368918],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, 22.37018317079236],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, No

In [8]:
fitness_matrix = np.array([[2.1684423634862577, 2.260962152299912, 2.7830358346106645,
        2.267754423199486, 1.2019426754496374, 2.2180311735828018,
        1.1420885490570625, 1.361656197547038, 1.6480213650841316,
        3.583995803620787, 0.9911623374110732, 2.832058260689889,
        2.0765715894062553, 0.5379509086343696, 1.3115382453304014,
        1.8274472777097637, 2.74449354132574, 3.656782967997985,
        0.7861817564663918, 1.9263857740896508, 1.3509708122407755,
        1.2926124369864613, 1.5461287579259553, 0.8030222253603531,
        1.118556567616124],
       [3.296117704275014, 2.1808387002338883, 1.4803630351537576,
        2.3965327714544897, 1.1166557585148793, 1.5370766857209004,
        2.2791414846352063, 0.6935612371162847, 3.115774124010825,
        1.8580872496255996, 1.3124392280655628, 3.551990245232875,
        1.9878199356831245, 0.18095867926972256, 0.9974099576625679,
        2.4471040225094973, 0.13097317573228934, 2.2469147585163824,
        2.3464868387191493, 1.6976871702661034, 1.3822707157233114,
        0.9038684847770183, 1.7621550661800363, 2.813059184445691,
        0.2771921904725525],
       [3.150755544052107, 0.4936148243852119, 1.6894076122096988,
        1.8781770282697574, 1.4165204655434278, 1.5587308446578891,
        2.0146879015880352, 1.3121857207121383, 1.192973647433868,
        1.5749288784877309, 1.1600410291788124, 1.3149447091526922,
        2.811299860369774, 0.5491428089460723, 1.9408819004445945,
        2.058863057231706, 0.5674005974561951, 5.5429768680638,
        0.9168869472706261, 2.8074231351843446, 1.8073516899339324,
        -0.1910151615774878, 1.088694608983779, 2.6067062564529886,
        0.17939723865521628],
       [3.9743075022649754, 1.3000726519486097, 1.0079781782462414,
        2.516884441478525, 1.8344365120705952, 0.8175545429630111,
        3.568250871354235, 1.8108210732761214, 1.4402524772215386,
        3.5355060497471067, 0.9842685911195281, 5.010931913179973,
        1.7366540219688487, 1.3110460430385134, 1.8258827896409806,
        1.4331772987480949, 0.2306285658101946, 3.6890675113195948,
        0.4510896438230694, 1.985574528281275, 1.3732822375978082,
        0.7316284386825173, 1.9013945832827728, 0.2864017371052294,
        1.8282309234707705],
       [1.5493358592468198, 1.0287715538818076, 0.7361911461574775,
        2.3924977865989403, 1.4234553627228812, 2.5699812500455765,
        2.503516634235497, 1.2124805677454231, -0.23619038873382994,
        2.0331253927381554, 0.3087242005045853, 2.9831742056642696,
        1.3648269575619312, 0.475993552248324, 1.6747878244121595,
        3.053773634638773, -0.2254332359696839, 1.98517413335603,
        0.7770595857596254, 1.623537335568082, 1.2941700643303151,
        1.5346291542977477, 1.4308150135371362, 0.9605436791852171,
        0.616100189409229]], dtype=object)


In [9]:
# efficiency_matrix_select = efficiency_matrix[:10, :50]
efficiency_matrix = np.array([[50.57913839160387, 169.71072284965652, 456.8683959568497,
        88.33915138387243, 55.03047939135277, 122.2759352987948,
        71.91179138774183, 357.17233560993066, 46.20201188838541,
        286.98703526067237, 98.9949076909252, 837.9291798228314,
        172.16355259847157, 54.777700735723904, 71.48452612208305,
        30.559743711894424, 300.62783175872613, 130.7756164716796,
        9.372491393851774, 260.52441714907957, 107.39944192856754,
        120.0733417403442, 44.42571483669403, 108.31693473727309,
        63.47225487619574],
       [76.88228026107649, 163.696553637004, 243.01917959282287,
        93.3556425370078, 51.125650966008386, 84.73618027148908,
        143.50651455314286, 181.9261627094962, 87.3502226178499,
        148.78559581096238, 131.08326994310994, 1050.9375157422257,
        164.80536659519854, 18.426403263696244, 54.363171205117176,
        40.92204063909154, 14.346611222095596, 80.35523717864157,
        27.973719207794705, 229.59521736712296, 109.88772082844008,
        83.96214236804126, 50.63291014207957, 379.4439785984215,
        15.729211979740409],
       [73.49169310846413, 37.05136265571969, 277.33633045925677,
        73.16337392121522, 64.85484031703581, 85.92993379879053,
        126.85515165178973, 344.1958693134537, 33.44482287003379,
        126.11180211957438, 115.86210479022125, 389.0564530209492,
        233.07810520476025, 55.917333657796334, 105.78648652160183,
        34.42962658038504, 62.15223639020261, 198.230582277669,
        10.930697579467848, 379.67591217827623, 143.68079688235557,
        -17.74377850421455, 31.28202356693746, 351.60973450361996,
        10.179858208044966],
       [92.7011261874951, 97.5851229065605, 165.4715931889148,
        98.04387698108626, 83.9889644774293, 45.07026212670541,
        224.67549691467332, 474.99155313863355, 40.377244788612956,
        283.10423754906, 98.30637691042892, 1482.5987609224244,
        143.98180519366034, 133.49933357047288, 99.51853591510897,
        23.96650862526436, 25.262717742688395, 131.93019170689013,
        5.3776798683169575, 268.52910442146145, 109.17315503196295,
        67.96242170598926, 54.63375098350023, 38.63175549437341,
        103.74257548224753],
       [36.13841629340406, 77.22091406034578, 120.8545229205266,
        93.19846187655378, 65.17235189567437, 141.6782887420304,
        157.63433236899536, 318.042481680349, -6.621559270650414,
        162.80170534397476, 30.834629785000413, 882.6402867898502,
        113.1545181946799, 48.46879508649463, 91.28320459536242,
        51.06715841655201, -24.693628866469112, 70.99474411394986,
        9.263741138914932, 219.5671935277486, 102.8838975722906,
        142.5547562564717, 41.11234555958338, 129.5644674889495,
        34.96047440389759]], dtype=object)
efficiency_matrix_select = efficiency_matrix
# parameter_list[48]
# material_list[48]
efficiency_matrix_select
mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)


平均值： 149.5382972569018
标准差： 199.40039305172402


# print coarse optimization info

In [16]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [10]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.05))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [11]:
coarse_best = efficiency_coarse_best
coarse_best

[(3, 11), (1, 11), (4, 11), (0, 11), (3, 7), (0, 2)]

# enter fine optimization stage

In [None]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_walker_volume(parameter)
    
    print(f"Initial parameter:{parameter}")
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0
    
    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        iteration +=1
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )
        print(f"improved_morphology:{improved_parameter}")
        
        shutil.copy(improved_morphology, "GPTWalker.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTWalkerEnv._get_rew = _get_rew
        
        # model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        try:
            model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
            improved_fitness, _ = Eva(model_path)
            improved_material = compute_walker_volume(best_parameter)
            improved_efficiency = improved_fitness / improved_material
            print("improved_fitness", improved_fitness)
        except Exception as e:
            print(f"Error evaluating design: {e}")
            continue
        print(f"improved_efficiency:{improved_efficiency}")
        
        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            improved = False
            
            
        # -------- 优化 reward function --------
        iteration +=1
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTWalker.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTWalkerEnv._get_rew = _get_rew
        
        try:
            model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
            improved_fitness, _ = Eva(model_path)
            improved_material = compute_walker_volume(best_parameter)
            improved_efficiency = improved_fitness / improved_material
            print("improved_fitness", improved_fitness)
        except Exception as e:
            print(f"Error evaluating design: {e}")
            continue


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True

            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

Initial parameter:[ 1.     0.45   0.25   0.05  -0.05  -0.3    0.02   0.025  0.03   0.015]
[2.2, 1.0, 0.7, 0.25, 0.3, -0.4, 0.08, 0.04, 0.03, 0.02]
Successfully saved GPTWalker_refine2_3_11_1.xml
improved_morphology:[2.2, 1.0, 0.7, 0.25, 0.3, -0.4, 0.08, 0.04, 0.03, 0.02]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
improved_fitness 1.6387538435548066
improved_efficiency:484.86278800171647
[{'role': 'system', 'content': 'You are a reinforcement learning reward function designer'}, {'role': 'user', 'content': '\nYou are a reward engineer trying to write reward functions to solve refiforcement learning tasks as effective as possible.\nDescription: The walker is a two-dimensional bipedal robot consisting of seven main body parts - a single torso at 

In [None]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [21]:
final_optimized_results

[{'best_morphology': 'results/Div_m25_r5/assets/GPTWalker_11.xml',
  'best_parameter': array([ 1.   ,  0.45 ,  0.25 ,  0.05 , -0.05 , -0.3  ,  0.02 ,  0.025,
          0.03 ,  0.015]),
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 5.010931913179973,
  'best_material': 0.0033798300964870192,
  'best_efficiency': 1482.5987609224244,
  'best_iteration': 2},
 {'best_morphology': 'results/Div_m25_r5/assets/GPTWalker_11.xml',
  'best_parameter': array([ 1.   ,  0.45 ,  0.25 ,  0.05 , -0.05 , -0.3  ,  0.02 ,  0.025,
          0.03 ,  0.015]),
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_1.py',
  'best_fitness': 3.551990245232875,
  'best_material': 0.0033798300964870192,
  'best_efficiency': 1050.9375157422257,
  'best_iteration': 2},
 {'best_morphology': 'results/Div_m25_r5/assets/GPTWalker_11.xml',
  'best_parameter': array([ 1.   ,  0.45 ,  0.25 ,  0.05 , -0.05 , -0.3  ,  0.02 ,  0.025,
          0.03 ,  0.015]),
  'best_rewardfunc': 'res

2025-04-07 06:13:50,208 - Final optimized result: rewardfunc_index0 morphology_index0
2025-04-07 06:13:50,208 -   Morphology: results/Div_m25_r5/assets/GPTWalker_refine_0_0_0.xml
2025-04-07 06:13:50,208 -   Parameter: [1.05, 0.55, 0.3, 0.1, 0.3, -0.25, 0.015, 0.025, 0.03, 0.015]
2025-04-07 06:13:50,208 -   Rewardfunc: results/Div_m25_r5/env/GPTWalker_0_0_1.py
2025-04-07 06:13:50,208 -   Fitness: 6.466572426996648
2025-04-07 06:13:50,208 -   Material: 0.003643200280612963
2025-04-07 06:13:50,208 -   Efficiency: 1774.970336220071

In [None]:
{'best_morphology': 'results/Div_m25_r5/assets/GPTWalker_11.xml',
  'best_parameter': array([ 1.   ,  0.45 ,  0.25 ,  0.05 , -0.05 , -0.3  ,  0.02 ,  0.025,
          0.03 ,  0.015]),
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_refine_1_11_1.py',
  'best_fitness': 14.883819809931813,
  'best_material': 0.0033798300964870192,
  'best_efficiency': 4403.718348269042,
  'best_iteration': 2}]

In [22]:
# Robodesign best

morphology = "results/Div_m25_r5/assets/GPTWalker_11.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_refine_1_11_1.py"

morphology_index=9999
rewardfunc_index=9999

parameter = [ 1.   ,  0.45 ,  0.25 ,  0.05 , -0.05 , -0.3  ,  0.02 ,  0.025,
          0.03 ,  0.015]


shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path)
material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info("Robodesign best best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign best 1e6 steps train

fitness:3.4504453808491076
efficiency:1020.8931462074042


In [None]:
# Robodesign best

morphology = "results/Div_m25_r5/assets/GPTWalker_refine.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_3.py"



# morphology = "results/Div_m25_r5/assets/GPTWalker_0.xml"
# rewardfunc = "results/Div_m25_r5/env/GPTWalker_0_0_1.py"

morphology_index=9998
rewardfunc_index=9998


parameter = [0.95, 0.75, 0.45, 0.15, 0.2, -0.2, 0.01, 0.01, 0.01, 0.008]


shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path)
material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info("Robodesign best best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

In [34]:
parameter = [ 1.   ,  0.5  ,  0.25 ,  0.05 ,  0.25 , -0.25 ,  0.015,  0.02, 0.025,  0.01 ]
parameter = [1.1, 0.75, 0.45, 0.15, 0.2, -0.2, 0.015, 0.012, 0.012, 0.008]
material = compute_walker_volume(parameter)
material




0.0009984965818806487

In [None]:
# Robodesign best

morphology = "results/Div_m25_r5/assets/GPTWalker_refine.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_0.py"



# morphology = "results/Div_m25_r5/assets/GPTWalker_0.xml"
# rewardfunc = "results/Div_m25_r5/env/GPTWalker_0_0_1.py"

morphology_index=999
rewardfunc_index=999

# parameter = [1.05, 0.55, 0.3, 0.1, 0.3, -0.25, 0.015, 0.025, 0.03, 0.015]
# parameter = [ 1.   ,  0.5  ,  0.25 ,  0.05 ,  0.25 , -0.25 ,  0.015,  0.02, 0.025,  0.01 ]

parameter = [0.95, 0.75, 0.45, 0.15, 0.2, -0.2, 0.01, 0.01, 0.01, 0.008]


shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)



material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info("Robodesign best best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

In [28]:
# coarse best

morphology = "results/Div_m25_r5/assets/GPTWalker_11.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_3.py"

morphology_index=888
rewardfunc_index=888

parameter = [ 1.    , 0.45  , 0.25 ,  0.05  ,-0.05  ,-0.3  ,  0.02  , 0.025  ,0.03  , 0.015]

morphology_index=11
rewardfunc_index=3

# morphology = "results/Div_m25_r5/assets/GPTWalker_0.xml"
# rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_3.py"
# morphology_index=0
# rewardfunc_index=3
# parameter =[ 1.   ,  0.5  ,  0.25 ,  0.05 ,  0.25 , -0.25 ,  0.015,  0.02 ,
#         0.025,  0.01 ]

shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/coarse/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info("Robodesign coarse best best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign coarse best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign coarse best 1e6 steps train

fitness:3.819145605463566
efficiency:1129.9815364781707


In [8]:
# human

morphology = "results/Div_m25_r5/assets/GPTWalker_50.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_10.py"

morphology_index=666
rewardfunc_index=666

parameter = [1.45, 1.06, 0.6, 0.1, -0.13, 0.26, 0.05, 0.05, 0.04, 0.06]

shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info("Robodesign human 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign human 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
fitness 8.74718176148099
Saved qpos log to /root/MML/Walker2D/qpos.txt
Average Fitness: 4.4272, Average Reward: 995.8090
Robodesign human 1e6 steps train

fitness:4.427216305766132
efficiency:157.8140339392407


In [9]:
# Robodesign (w/o Morphology Design)

morphology = "results/Div_m25_r5/assets/GPTWalker_50.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_1.py"

morphology_index=555
rewardfunc_index=555

parameter = [1.45, 1.06, 0.6, 0.1, -0.13, 0.26, 0.05, 0.05, 0.04, 0.06]

shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info("Robodesign (w/o Morphology Design) 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Morphology Design) 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Morphology Design) 1e6 steps train

fitness:1.912675745085834
efficiency:68.17987965860266


In [19]:
parameter_list[0]

array([ 1.   ,  0.5  ,  0.25 ,  0.05 ,  0.25 , -0.25 ,  0.015,  0.02 ,
        0.025,  0.01 ])

In [27]:
# Robodesign (w/o Reward Design)

morphology = "results/Div_m25_r5/assets/GPTWalker_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_10.py"

morphology_index=444
rewardfunc_index=444

morphology_index=0
rewardfunc_index=10
parameter =[ 1.   ,  0.5  ,  0.25 ,  0.05 ,  0.25 , -0.25 ,  0.015,  0.02 ,
        0.025,  0.01 ]
shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
model_path = f"results/Div_m25_r5/coarse/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info(" Robodesign (w/o Reward Design) 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print(" Robodesign (w/o Reward Design) 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
 Robodesign (w/o Reward Design) 1e6 steps train

fitness:3.273572012442703
efficiency:1422.2161074091166


In [12]:
# Robodesign (w/o diversity reflection)

morphology = "results/noDiv_m25_r5/assets/GPTWalker_20.xml"
rewardfunc = "results/noDiv_m25_r5/env/GPTrewardfunc_1.py"

morphology_index=333
rewardfunc_index=333

parameter = [ 0.6 ,   0.45,   0.3    ,0.15 ,  0.1 ,  -0.1  ,  0.04 ,  0.04   ,0.035 , 0.03 ]

shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info(" Robodesign  (w/o diversity reflection) 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print(" Robodesign (w/o diversity reflection) 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
 Robodesign (w/o diversity reflection) 1e6 steps train

fitness:5.337485313971282
efficiency:899.007501259998


In [7]:
# eureka reward
morphology = "results/eureka/assets/GPTWalker_0.xml"
rewardfunc = "results/eureka/env/GPTrewardfunc_2_1.py"

morphology_index=111
rewardfunc_index=111

parameter = [1.45, 1.06, 0.6, 0.1, -0.13, 0.26, 0.05, 0.05, 0.04, 0.06]

shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info("# eureka reward 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("# eureka reward 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
fitness 1.1383605759929971
Saved qpos log to /root/MML/Walker2D/qpos.txt
Average Fitness: 0.1414, Average Reward: 444.0662
# eureka reward 1e6 steps train

fitness:0.1414111423563164
efficiency:5.040788901626663


In [4]:
# eureka morphology

morphology = "results/eureka_morphology/assets/GPTWalker_5.xml"
rewardfunc = "results/eureka_morphology/env/GPTrewardfunc_0.py"

morphology_index=111
rewardfunc_index=111

parameter = [ 1.25   , 1.0 ,  0.75 ,   0.45  , 0.4 , -0.4  ,  0.025  , 0.018 ,  0.01 , 0.006 ]

shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info("eureka morphology 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("eureka morphology 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/MML/Walker2D/qpos.txt
Average Fitness: 3.6028, Average Reward: 718.2425
eureka morphology 1e6 steps train

fitness:3.602824393752411
efficiency:2411.9194215410907


In [None]:
# Robodesign (w/o Reward Design w/o morphology design)

morphology = "results/Div_m25_r5/assets/GPTWalker_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_0.py"

morphology_index=123
rewardfunc_index=123

parameter = parameter_list[0]


shutil.copy(morphology, "GPTWalker.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTWalkerEnv._get_rew = _get_rew
        
# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path)
material = compute_walker_volume(parameter)
efficiency = fitness / material

logging.info(" (w/o Reward Design w/o morphology design) 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print(" (w/o Reward Design w/o morphology design) 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")