In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTAnt import GPTAntEnv

In [None]:
import prompts
class DGA:
    def __init__(self):
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTAnt_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        # env_path = os.path.join(os.path.dirname(__file__), "env", "ant_v5.py")
        # with open(env_path, "r") as f:
        #     env_content = f.read().rstrip()

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = ant_design(parameter)  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_ant_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = ant_design(initial_parameter['parameters'])  

        filename = f"GPTAnt_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_ant_volume(diverse_parameter['parameters']))
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = ant_design(diverse_parameter['parameters'])  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts

        for rewardfunc_file, fitness in zip(rewardfunc_list, fitness_list):
            with open(rewardfunc_file, "r") as fp:
                reward_content = fp.read()
            reward_improve_prompts += f"\nreward function:\n{reward_content}\nfitness: {fitness}\n"

        with open(best_rewardfunc, "r") as fp:
            best_reward_content = fp.read()
        reward_improve_prompts += f"\nbest reward function:\n{best_reward_content}\nbest fitness: {max(fitness_list)}\n"
        
        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": reward_improve_prompts+ zeroshot_rewardfunc_format}
        ]
        print(messages)
        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTrewardfunc_refine7_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path
    
    

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" + "[0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012] is a very helpful parameter"

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = ant_design(parameter)  
        filename = f"GPTAnt_refine7_{step}_{rewardfunc_index}_{morphology_index}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/Div_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 26
rewardfunc_nums = 6

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()



# return file list of morphology and reward function: [GPTAnt_{i}.xml] and [GPTAnt_{j}.py]



In [4]:
logging.info(f"start!")

# print configuration info

In [7]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

params: [0.2, 0.3, 0.06, 0.2, 0.15, 0.1, 0.05, 0.02, 0.02, 0.015]
params: [0.25, 0.35, 0.08, 0.25, 0.2, 0.15, 0.1, 0.03, 0.025, 0.02]
params: [0.15, 0.2, 0.05, 0.15, 0.1, 0.08, 0.04, 0.015, 0.015, 0.01]
params: [0.3, 0.45, 0.1, 0.4, 0.3, 0.25, 0.15, 0.05, 0.04, 0.03]
params: [0.18, 0.25, 0.04, 0.12, 0.09, 0.06, 0.03, 0.02, 0.018, 0.015]
params: [0.35, 0.5, 0.1, 0.25, 0.05, 0.15, 0.02, 0.04, 0.03, 0.025]
params: [0.45, 0.6, 0.15, 0.35, 0.2, 0.25, 0.1, 0.07, 0.06, 0.05]
params: [0.2, 0.1, 0.3, 0.2, 0.4, 0.15, 0.35, 0.015, 0.025, 0.02]
params: [0.22, 0.18, 0.07, 0.1, 0.05, 0.12, 0.06, 0.025, 0.02, 0.015]
params: [0.1, 0.15, 0.02, 0.3, 0.1, 0.05, 0.02, 0.01, 0.015, 0.01]
params: [0.4, 0.25, 0.06, 0.2, 0.05, 0.3, 0.15, 0.03, 0.025, 0.02]
params: [0.25, 0.1, 0.2, 0.15, 0.3, 0.1, 0.2, 0.02, 0.03, 0.02]
params: [0.3, 0.15, 0.1, 0.05, 0.2, 0.08, 0.04, 0.02, 0.015, 0.01]
params: [0.5, 0.2, 0.12, 0.25, 0.08, 0.3, 0.1, 0.04, 0.03, 0.025]
params: [0.15, 0.25, 0.08, 0.12, 0.07, 0.15, 0.09, 0.015, 0.

In [8]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_0.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_1.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_2.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_3.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_4.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_5.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_6.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_7.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_8.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_9.py


In [9]:
efficiency_matrix.shape

(10, 50)

# enter coarse optimization stage

In [5]:
morphology_list = [f'results/Div_m25_r5/assets/GPTAnt_{i}.xml' for i in range(0,26) ]
rewardfunc_list = [f'results/Div_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,6)]

parameter_list = [[0.1, 0.05, 0.15, 0.1, 0.2, 0.1, 0.2, 0.02, 0.02, 0.02],
 [0.2, 0.1, 0.05, 0.2, 0.05, 0.2, 0.05, 0.015, 0.015, 0.015],
 [0.3, 0.2, 0.1, 0.15, 0.3, 0.15, 0.3, 0.025, 0.025, 0.025],
 [0.15, 0.3, 0.2, 0.3, 0.1, 0.3, 0.1, 0.01, 0.01, 0.01],
 [0.05, 0.4, 0.25, 0.1, 0.2, 0.05, 0.15, 0.03, 0.03, 0.03],
 [0.4, 0.15, 0.1, 0.25, 0.25, 0.1, 0.1, 0.04, 0.04, 0.04],
 [0.3, 0.05, 0.1, 0.05, 0.1, 0.25, 0.15, 0.015, 0.025, 0.02],
 [0.08, 0.25, 0.2, 0.12, 0.18, 0.12, 0.18, 0.02, 0.02, 0.02],
 [0.1, 0.3, 0.05, 0.15, 0.05, 0.15, 0.05, 0.01, 0.01, 0.01],
 [0.2, 0.08, 0.27, 0.18, 0.08, 0.18, 0.08, 0.035, 0.035, 0.035],
 [0.05, 0.1, 0.15, 0.25, 0.3, 0.2, 0.25, 0.015, 0.015, 0.015],
 [0.15, 0.2, 0.1, 0.3, 0.15, 0.05, 0.15, 0.02, 0.03, 0.02],
 [0.25, 0.05, 0.2, 0.05, 0.1, 0.3, 0.1, 0.04, 0.025, 0.025],
 [0.2, 0.4, 0.2, 0.05, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01],
 [0.35, 0.15, 0.3, 0.2, 0.3, 0.2, 0.3, 0.05, 0.05, 0.05],
 [0.12, 0.35, 0.22, 0.28, 0.18, 0.12, 0.18, 0.02, 0.025, 0.02],
 [0.3, 0.1, 0.05, 0.1, 0.2, 0.3, 0.15, 0.015, 0.02, 0.025],
 [0.18, 0.15, 0.08, 0.25, 0.04, 0.2, 0.03, 0.03, 0.045, 0.045],
 [0.05, 0.25, 0.1, 0.35, 0.15, 0.4, 0.2, 0.03, 0.02, 0.01],
 [0.1, 0.08, 0.02, 0.4, 0.3, 0.1, 0.05, 0.015, 0.01, 0.015],
 [0.3, 0.05, 0.2, 0.05, 0.1, 0.2, 0.05, 0.025, 0.015, 0.015],
 [0.2, 0.1, 0.3, 0.15, 0.25, 0.25, 0.15, 0.02, 0.03, 0.02],
 [0.05, 0.1, 0.05, 0.3, 0.2, 0.1, 0.3, 0.015, 0.02, 0.01],
 [0.2, 0.05, 0.25, 0.2, 0.05, 0.1, 0.2, 0.02, 0.02, 0.01],
 [0.15, 0.1, 0.08, 0.1, 0.25, 0.15, 0.25, 0.03, 0.045, 0.045],
 [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08 ] ]
                  
           
material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

params: [0.1, 0.05, 0.15, 0.1, 0.2, 0.1, 0.2, 0.02, 0.02, 0.02]
params: [0.2, 0.1, 0.05, 0.2, 0.05, 0.2, 0.05, 0.015, 0.015, 0.015]
params: [0.3, 0.2, 0.1, 0.15, 0.3, 0.15, 0.3, 0.025, 0.025, 0.025]
params: [0.15, 0.3, 0.2, 0.3, 0.1, 0.3, 0.1, 0.01, 0.01, 0.01]
params: [0.05, 0.4, 0.25, 0.1, 0.2, 0.05, 0.15, 0.03, 0.03, 0.03]
params: [0.4, 0.15, 0.1, 0.25, 0.25, 0.1, 0.1, 0.04, 0.04, 0.04]
params: [0.3, 0.05, 0.1, 0.05, 0.1, 0.25, 0.15, 0.015, 0.025, 0.02]
params: [0.08, 0.25, 0.2, 0.12, 0.18, 0.12, 0.18, 0.02, 0.02, 0.02]
params: [0.1, 0.3, 0.05, 0.15, 0.05, 0.15, 0.05, 0.01, 0.01, 0.01]
params: [0.2, 0.08, 0.27, 0.18, 0.08, 0.18, 0.08, 0.035, 0.035, 0.035]
params: [0.05, 0.1, 0.15, 0.25, 0.3, 0.2, 0.25, 0.015, 0.015, 0.015]
params: [0.15, 0.2, 0.1, 0.3, 0.15, 0.05, 0.15, 0.02, 0.03, 0.02]
params: [0.25, 0.05, 0.2, 0.05, 0.1, 0.3, 0.1, 0.04, 0.025, 0.025]
params: [0.2, 0.4, 0.2, 0.05, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01]
params: [0.35, 0.15, 0.3, 0.2, 0.3, 0.2, 0.3, 0.05, 0.05, 0.05]
p

In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [23]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [0] or j not in [12]:
        #     continue
        # if i not in [5]:
        #     continue
        if i not in [3] and j < 10:
            continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTAnt.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew

        env_name = "GPTAntEnv"
        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/div2025-03-17_15-13-46/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Div_m25_r5/env/GPTrewardfunc_0.py
10 results/Div_m25_r5/assets/GPTAnt_10.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Div_m25_r5/env/GPTrewardfunc_0.py
11 results/Div_m25_r5/assets/GPTAnt_11.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Div_m25_r5/env/GPTrewardfunc_0.py
12 results/Div_m25_r5/assets/GPTAnt_12.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
5

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [24]:
efficiency_matrix

array([[62.0143355665915, 12.253735396642417, 16.88750769838232,
        49.96843190265978, 33.13593792528397, 1.9941047829822889,
        4.4853793663017845, 89.7388404759196, 75.6846853060232,
        12.289518050037374, 202.49589394259866, 18.471501603209603,
        7.223279728484965, 8.525592128214635, 5.675764384434074,
        67.32319789192341, 4.73700033986508, 19.664539840955445,
        118.13268134044253, 117.54224438888221, 3.8917319018534755,
        26.484434720272567, 197.11404874160934, 12.236283502691517,
        15.987689512637512, 5.936977360577948],
       [58.84924191367601, 12.168719315827417, 8.458725395727258,
        149.62032227885177, 32.96068581877422, 2.012553550406138,
        4.443537336469106, 98.24998782752515, 75.69533357139954,
        9.771163205493691, 261.2014020026532, 19.17921569923469,
        7.442198793684386, 8.525592128214635, 6.56240587285585,
        65.12794767501795, 4.763064319548672, 11.499275848626128,
        102.22713962681725, 128

In [25]:
fitness_matrix

array([[0.47339398635205804, 0.43086418921581293, 2.041827154797272,
        0.7712770584178302, 0.38214738270321236, 0.5680732267921969,
        0.5212433410364212, 0.5681260441763253, 0.3798334669906955,
        0.5661147346462263, 0.6504848459044855, 0.3799546545984619,
        0.5385137266163863, 0.29243026210093126, 1.2433847898041273,
        0.9120828306500415, 0.5571854841728576, 0.8150293854217999,
        0.7853533007287862, 0.6460396829769333, 0.4514034583940651,
        1.0748212927575411, 0.6419594231505306, 0.4453263817322554,
        0.533386209026041, 1.0815748858470304],
       [0.4492328583186482, 0.42787486526315943, 1.0227237503864266,
        2.3094325287525774, 0.380126249818218, 0.5733288437137565,
        0.5163809029582541, 0.6220091169976973, 0.3798869066996535,
        0.45010711101451273, 0.839066661667199, 0.3945121751880684,
        0.5548346952150429, 0.29243026210093126, 1.437620573047066,
        0.8823419672550117, 0.5602512367794718, 0.476606511187303

# print coarse optimization info

In [15]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

In [6]:
efficiency_matrix = np.array([[62.0143355665915, 12.253735396642417, 16.88750769838232,
        49.96843190265978, 33.13593792528397, 1.9941047829822889,
        4.4853793663017845, 89.7388404759196, 75.6846853060232,
        12.289518050037374, 202.49589394259866, 18.471501603209603,
        7.223279728484965, 8.525592128214635, 5.675764384434074,
        67.32319789192341, 4.73700033986508, 19.664539840955445,
        118.13268134044253, 117.54224438888221, 3.8917319018534755,
        26.484434720272567, 197.11404874160934, 12.236283502691517,
        15.987689512637512, 5.936977360577948],
       [58.84924191367601, 12.168719315827417, 8.458725395727258,
        149.62032227885177, 32.96068581877422, 2.012553550406138,
        4.443537336469106, 98.24998782752515, 75.69533357139954,
        9.771163205493691, 261.2014020026532, 19.17921569923469,
        7.442198793684386, 8.525592128214635, 6.56240587285585,
        65.12794767501795, 4.763064319548672, 11.499275848626128,
        102.22713962681725, 128.77639943846305, 3.8976769811537,
        18.39484853434099, 293.06933287996907, 12.236283502691517,
        16.085048694212826, 5.989298080996523],
       [59.10768309968578, 12.171863057678516, 8.488406541904254,
        65.23013233237096, 32.95674179943598, 1.8428207964034875,
        4.516329861816528, 93.14706571238511, 75.6846853060232,
        9.126070674165831, 201.2807438014753, 23.397856008668573,
        7.442353266521401, 8.525592128214635, 3.2501745503670434,
        69.22934944983156, 4.744546359373176, 13.09199353720335,
        122.40091718560232, 117.68846731665151, 3.989593101291398,
        15.131394660288684, 225.28516382597056, 12.318268906554742,
        16.313906083248835, 6.160340304167579],
       [71.67993552034967, 12.177743346556166, 7.97165251461774,
        56.247534517771214, 33.00725569994364, 2.2211214233430114,
        4.446435557618284, 90.49211041585825, 75.6846853060232,
        9.534639063583597, 183.77895564072375, 19.122957451358857,
        7.501848685835176, 8.525592128214635, 5.584459638205207,
        69.70209298602606, 4.768767383793329, 12.246299551821147,
        156.94728777157897, 85.26197309707038, 3.952862796622749,
        21.997267954930184, 183.22901546801498, 12.236283502691517,
        21.14104634188468, 6.339306226850774],
       [77.89413280453167, 12.168719315827417, 7.22740427135594,
        57.81935597792466, 33.03544508271147, 2.0229456191918285,
        4.534245578166328, 84.97942108722147, 75.6846853060232,
        9.518976632999669, 248.81799196843954, 18.928998157811275,
        7.305570858137823, 8.525592128214635, 4.783302172529854,
        71.89403694924691, 4.8270918286691655, 11.666106215662388,
        118.58612502174925, 143.10821259282582, 3.9402234869822323,
        17.841901222280434, 240.32013082588617, 12.236283502691517,
        19.890300313127444, 6.028056297344841],
       [64.43521482955605, 12.31177734236322, 10.037745220956637,
        47.87498763356024, 33.04075792726323, 1.7910812026410612,
        4.477035866429335, 82.09051340589662, 75.6846853060232,
        9.112076706514106, 396.453331473542, 20.13666098776548,
        7.871763002016971, 8.525592128214635, 9.677089791110134,
        67.34014017577603, 4.778136928895959, 11.734229032127109,
        132.1710913955092, 77.21307332051266, 3.9773296984980178,
        22.137547945717156, 279.41776982794835, 12.26050051958983,
        17.495651701817753, 6.08156431960861]], dtype=object)

mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)



平均值： 49.40472391447502
标准差： 68.61938131035991


In [7]:
fitness_matrix = np.array([[0.47339398635205804, 0.43086418921581293, 2.041827154797272,
        0.7712770584178302, 0.38214738270321236, 0.5680732267921969,
        0.5212433410364212, 0.5681260441763253, 0.3798334669906955,
        0.5661147346462263, 0.6504848459044855, 0.3799546545984619,
        0.5385137266163863, 0.29243026210093126, 1.2433847898041273,
        0.9120828306500415, 0.5571854841728576, 0.8150293854217999,
        0.7853533007287862, 0.6460396829769333, 0.4514034583940651,
        1.0748212927575411, 0.6419594231505306, 0.4453263817322554,
        0.533386209026041, 1.0815748858470304],
       [0.4492328583186482, 0.42787486526315943, 1.0227237503864266,
        2.3094325287525774, 0.380126249818218, 0.5733288437137565,
        0.5163809029582541, 0.6220091169976973, 0.3798869066996535,
        0.45010711101451273, 0.839066661667199, 0.3945121751880684,
        0.5548346952150429, 0.29243026210093126, 1.437620573047066,
        0.8823419672550117, 0.5602512367794718, 0.47660651118730313,
        0.6796122852626573, 0.7077852282017902, 0.45209303039548443,
        0.7465205540756985, 0.9544658083978621, 0.4453263817322554,
        0.5366343359510283, 1.0911064662754668],
       [0.4512057006004355, 0.4279854050895824, 1.0263124250050917,
        1.0068457758194906, 0.38008076456076617, 0.5249760018363052,
        0.524840080213054, 0.5897031172807483, 0.3798334669906955,
        0.4203910240444608, 0.6465813753851104, 0.48128866234532164,
        0.5548462115547458, 0.29243026210093126, 0.7120129248525521,
        0.9379070363179873, 0.5580730780574964, 0.5426193306771312,
        0.8137287940406237, 0.646843358407764, 0.4627544160095758,
        0.6140793768781885, 0.7337068805464791, 0.44831015235584687,
        0.5442695464702858, 1.1222662571531683],
       [0.547177521252315, 0.428192167012997, 0.9638329624278207,
        0.8681968057479255, 0.3806633270617178, 0.6327448912532638,
        0.5167177035611662, 0.5728949075681932, 0.3798334669906955,
        0.43921166326060734, 0.590359751562819, 0.3933549556182062,
        0.5592817451593658, 0.29243026210093126, 1.2233827380964752,
        0.9443116825048185, 0.5609220546778126, 0.5075681443928006,
        1.0433951815300297, 0.4686197575688727, 0.4584940540490309,
        0.8927180145685498, 0.5967387602518289, 0.4453263817322554,
        0.7053140827027136, 1.154869556044263],
       [0.5946143533566944, 0.42787486526315943, 0.8738477319164129,
        0.892458320187818, 0.38098842722544446, 0.5762892979980617,
        0.526922055244506, 0.5379947198184766, 0.3798334669906955,
        0.43849017583548344, 0.7992869880597878, 0.38936525635232583,
        0.5446487379357012, 0.29243026210093126, 1.0478738657072844,
        0.9740077533571002, 0.5677824160300733, 0.48352107174224007,
        0.7883678220938145, 0.7865562272997532, 0.45702801573310126,
        0.7240802206854294, 0.7826726382079505, 0.4453263817322554,
        0.6635863094553119, 1.098167283738056],
       [0.4918740631649621, 0.4329050522729958, 1.2136391663672836,
        0.7389641465182112, 0.3810496987562139, 0.510236616909109,
        0.5202737477435716, 0.5197053850747086, 0.3798334669906955,
        0.4197463941044136, 1.2735413010645533, 0.414206610525011,
        0.586859790648903, 0.29243026210093126, 2.1199516824260978,
        0.9123123617284488, 0.5620241391510928, 0.4863445345684999,
        0.8786815105741032, 0.42438112075369633, 0.46133197927132696,
        0.8984110158592383, 0.9100055094088924, 0.4462077340243384,
        0.5836953068356823, 1.1079151620870824]], dtype=object)

# configuration of fine optimization

In [34]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix)):
    for j in range(len(efficiency_matrix[0])):
        value = efficiency_matrix[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.05))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [35]:
coarse_best = efficiency_coarse_best
coarse_best

[(5, 10), (1, 22), (5, 22), (1, 10), (4, 10), (4, 22), (2, 22)]

In [36]:
efficiency_matrix_select = efficiency_matrix

# enter fine optimization stage

In [61]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环
        designer = DGA()
        iteration +=1
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTAnt.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        try:
            model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5, iter=iteration)
            improved_fitness, _ = Eva(model_path)
            improved_material = compute_ant_volume(improved_parameter)
            improved_efficiency = improved_fitness / improved_material
            improved_model_path = model_path
        except Exception as e:
            print(f"Error evaluating design: {e}")
            continue
            
        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            best_model_path = improved_model_path
            improved = True
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}, model_path={improved_model_path}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            improved = False
            
        iteration +=1   
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTAnt.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        try:
            model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5, iter=iteration)
            improved_fitness, _ = Eva(model_path)
            improved_material = compute_ant_volume(best_parameter)
            improved_efficiency = improved_fitness / improved_material
            improved_model_path = model_path
        except Exception as e:
            print(f"Error evaluating design: {e}")
            continue
            
        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            best_model_path = improved_model_path
            improved = True
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}, model_path={improved_model_path}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration,
        "best_model_path":best_model_path
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info(f"  best_model_path: {best_model_path}")
    logging.info("____________________________________________")

params: [0.05, 0.1, 0.15, 0.25, 0.3, 0.2, 0.25, 0.015, 0.015, 0.015]
ChatCompletion(id='chatcmpl-BWSY8xgt4ZxbttqF2vvH6EMwc9itW', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "parameters": [0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012],\n  "description": "Designed for efficient jumping capability with reduced material cost by optimizing leg lengths and joint configurations while minimizing excess geometrical sizes."\n}', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1747075940, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_7a53abb7a2', usage=CompletionUsage(completion_tokens=85, prompt_tokens=4185, total_tokens=4270, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDet

In [None]:
logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [63]:
final_optimized_results

[{'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_10.xml',
  'best_parameter': [0.05,
   0.1,
   0.15,
   0.25,
   0.3,
   0.2,
   0.25,
   0.015,
   0.015,
   0.015],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_5.py',
  'best_fitness': 1.2735413010645533,
  'best_material': 0.00321233597995265,
  'best_efficiency': 396.453331473542,
  'best_iteration': 2,
  'best_model_path': 'results/Div_m25_r5/fine/SAC_iter1_morphology10_rewardfunc4_500000.0steps'},
 {'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_refine7_1_22_1.xml',
  'best_parameter': [0.04,
   0.12,
   0.18,
   0.3,
   0.35,
   0.25,
   0.3,
   0.012,
   0.012,
   0.012],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_refine7_1_22_4.py',
  'best_fitness': 1.433226002371492,
  'best_material': 0.0022872279123280024,
  'best_efficiency': 626.621420037112,
  'best_iteration': 6,
  'best_model_path': 'results/Div_m25_r5/fine/SAC_iter4_morphology22_rewardfunc1_500000.0steps'},
 {'best_morphology'

In [None]:
 {'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_refine7_5_22_1.xml',
  'best_parameter': [0.04,
   0.12,
   0.18,
   0.3,
   0.35,
   0.25,
   0.3,
   0.012,
   0.012,
   0.012],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_refine7_5_22_2.py',
  'best_fitness': 2.2483639603777092,
  'best_material': 0.0022872279123280024,
  'best_efficiency': 983.0082731411159,
  'best_iteration': 4,
  'best_model_path': 'results/Div_m25_r5/fine/SAC_iter2_morphology22_rewardfunc5_500000.0steps'},

In [84]:
# best new
# 5E5
# fitness:2.5469001001027354
# efficiency:1113.53140033624
# 
morphology = 'results/Div_m25_r5/assets/GPTAnt_refine7_5_22_1.xml'
rewardfunc = 'results/Div_m25_r5/env/GPTrewardfunc_refine7_5_22_2.py'
morphology_index=99996
rewardfunc_index=99996

parameter = [0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = 'results/Div_m25_r5/fine/SAC_iter2_morphology22_rewardfunc5_500000.0steps'
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path, run_steps=10)
material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
params: [0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012]
best 1e6 steps train

fitness:1.461023666317157
efficiency:638.7748498705962


In [None]:
2025-05-13 01:13:45,959 - Final optimized result: rewardfunc_index4 morphology_index10
2025-05-13 01:13:45,959 -   Morphology: results/Div_m25_r5/assets/GPTAnt_refine6_4_10_1.xml
2025-05-13 01:13:45,959 -   Parameter: [0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012]
2025-05-13 01:13:45,959 -   Rewardfunc: results/Div_m25_r5/env/GPTrewardfunc_4.py
2025-05-13 01:13:45,959 -   Fitness: 2.264162613520391
2025-05-13 01:13:45,959 -   Material: 0.0022872279123280024
2025-05-13 01:13:45,959 -   Efficiency: 989.9156097722965
 'best_model_path': 'results/Div_m25_r5/fine/SAC_iter1_morphology10_rewardfunc4_500000.0steps'},

In [73]:
# best new

morphology = "results/Div_m25_r5/assets/GPTAnt_refine6_4_10_1.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_4.py"
morphology_index=99999
rewardfunc_index=99999

parameter = [0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = 'results/Div_m25_r5/fine/SAC_iter1_morphology10_rewardfunc4_500000.0steps'
# fitness, _ = Eva(model_path)
fitness, _ = Eva(model_path, run_steps=10)
material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
params: [0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012]
best 1e6 steps train

fitness:1.0190170005191297
efficiency:445.5249059469315


In [19]:
best_efficiency

274.1531227653519

In [50]:
# best new

morphology = "results/Div_m25_r5/assets/GPTAnt_refine2_1_10_1.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_refine2_1_10_4.py"
morphology_index=99999
rewardfunc_index=99999

parameter = [0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=3e5)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
params: [0.04, 0.12, 0.18, 0.3, 0.35, 0.25, 0.3, 0.012, 0.012, 0.012]
best 1e6 steps train

fitness:0.8464521333700047
efficiency:370.07773856190084


In [3]:
# best

morphology = "results/Div_m25_r5/assets/GPTAnt_refine_4_10_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTAnt_refine_4_10_1.py"
morphology_index=9999
rewardfunc_index=9999

parameter = [0.04,
   0.12,
   0.18,
   0.28,
   0.35,
   0.25,
   0.3,
   0.01,
   0.01,
   0.01]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/autodl-tmp/Ant_jump/qpos.txt
Average Fitness: 1.4165, Average Reward: 1317.2162
params: [0.04, 0.12, 0.18, 0.28, 0.35, 0.25, 0.3, 0.01, 0.01, 0.01]
best 1e6 steps train

fitness:1.416464603046393
efficiency:861.501675655942


In [5]:
# human 


morphology = "results/Div_m25_r5/assets/GPTAnt_25.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_5.py"
morphology_index=888
rewardfunc_index=888

parameter = [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("human 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("human 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/autodl-tmp/Ant_jump/qpos.txt
Average Fitness: 1.1715, Average Reward: 544.9538
params: [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]
human 1e6 steps train

fitness:1.1714694499665659
efficiency:6.430426310808277


In [6]:
# Eureka

morphology = "results/Eureka/assets/GPTAnt_25.xml"
rewardfunc = "results/Eureka/env/GPTAnt_3_1.py"
morphology_index=222
rewardfunc_index=222

parameter = [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]


shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("Eureka 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Eureka 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/autodl-tmp/Ant_jump/qpos.txt
Average Fitness: 1.1917, Average Reward: -708.0962
params: [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]
Eureka 1e6 steps train

fitness:1.1917362246142331
efficiency:6.541674624567804


In [7]:
# Eureka morphology

morphology = "results/Eureka_morphology/assets/GPTAnt_9_iter2.xml"
rewardfunc = "results/Eureka_morphology/env/GPTrewardfunc_5.py"
morphology_index=111
rewardfunc_index=111

parameter = [0.13, 0.2, 0.28, 0.4, 0.48, 0.3, 0.6, 0.035, 0.025, 0.018]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("Eureka morphology 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Eureka morphology 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/autodl-tmp/Ant_jump/qpos.txt
Average Fitness: 4.8540, Average Reward: 833.4519
params: [0.13, 0.2, 0.28, 0.4, 0.48, 0.3, 0.6, 0.035, 0.025, 0.018]
Eureka morphology 1e6 steps train

fitness:4.853981866223753
efficiency:209.07792080737983
