In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTAnt import GPTAntEnv

In [None]:
import prompts
class DGA:
    def __init__(self):
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"
        
    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTAnt_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        # env_path = os.path.join(os.path.dirname(__file__), "env", "ant_v5.py")
        # with open(env_path, "r") as f:
        #     env_content = f.read().rstrip()

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = ant_design(parameter)  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_ant_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = ant_design(initial_parameter['parameters'])  

        filename = f"GPTAnt_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_ant_volume(diverse_parameter['parameters']))
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = ant_design(diverse_parameter['parameters'])  
            filename = f"GPTAnt_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts

        for rewardfunc_file, fitness in zip(rewardfunc_list, fitness_list):
            with open(rewardfunc_file, "r") as fp:
                reward_content = fp.read()
            reward_improve_prompts += f"\nreward function:\n{reward_content}\nfitness: {fitness}\n"

        with open(best_rewardfunc, "r") as fp:
            best_reward_content = fp.read()
        reward_improve_prompts += f"\nbest reward function:\n{best_reward_content}\nbest fitness: {max(fitness_list)}\n" 

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content":reward_improve_prompts+ zeroshot_rewardfunc_format}
        ]
        print(messages)
        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTrewardfunc_refine2_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path
    

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" + "this is helpful [0.045, 0.22, 0.11,0.31,0.055, 0.055,0.055,0.012, 0.012,0.012]"

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = ant_design(parameter)  
        filename = f"GPTAnt_refine2_{step}_{rewardfunc_index}_{morphology_index}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [11]:

folder_name = "results/Div_m25_r5"
log_file = os.path.join(folder_name, "parameters_refine.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 26
rewardfunc_nums = 6

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()



# return file list of morphology and reward function: [GPTAnt_{i}.xml] and [GPTAnt_{j}.py]



In [4]:
logging.info(f"start!")

# print configuration info

In [7]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

params: [0.2, 0.3, 0.06, 0.2, 0.15, 0.1, 0.05, 0.02, 0.02, 0.015]
params: [0.25, 0.35, 0.08, 0.25, 0.2, 0.15, 0.1, 0.03, 0.025, 0.02]
params: [0.15, 0.2, 0.05, 0.15, 0.1, 0.08, 0.04, 0.015, 0.015, 0.01]
params: [0.3, 0.45, 0.1, 0.4, 0.3, 0.25, 0.15, 0.05, 0.04, 0.03]
params: [0.18, 0.25, 0.04, 0.12, 0.09, 0.06, 0.03, 0.02, 0.018, 0.015]
params: [0.35, 0.5, 0.1, 0.25, 0.05, 0.15, 0.02, 0.04, 0.03, 0.025]
params: [0.45, 0.6, 0.15, 0.35, 0.2, 0.25, 0.1, 0.07, 0.06, 0.05]
params: [0.2, 0.1, 0.3, 0.2, 0.4, 0.15, 0.35, 0.015, 0.025, 0.02]
params: [0.22, 0.18, 0.07, 0.1, 0.05, 0.12, 0.06, 0.025, 0.02, 0.015]
params: [0.1, 0.15, 0.02, 0.3, 0.1, 0.05, 0.02, 0.01, 0.015, 0.01]
params: [0.4, 0.25, 0.06, 0.2, 0.05, 0.3, 0.15, 0.03, 0.025, 0.02]
params: [0.25, 0.1, 0.2, 0.15, 0.3, 0.1, 0.2, 0.02, 0.03, 0.02]
params: [0.3, 0.15, 0.1, 0.05, 0.2, 0.08, 0.04, 0.02, 0.015, 0.01]
params: [0.5, 0.2, 0.12, 0.25, 0.08, 0.3, 0.1, 0.04, 0.03, 0.025]
params: [0.15, 0.25, 0.08, 0.12, 0.07, 0.15, 0.09, 0.015, 0.

In [8]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_0.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_1.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_2.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_3.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_4.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_5.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_6.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_7.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_8.py
Saved: results/div2025-03-21_15-03-44/env/GPTrewardfunc_9.py


In [9]:
efficiency_matrix.shape

(10, 50)

# enter coarse optimization stage

In [5]:
morphology_list = [f'results/Div_m25_r5/assets/GPTAnt_{i}.xml' for i in range(0,26) ]
rewardfunc_list = [f'results/Div_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,6)]

parameter_list =[[0.1, 0.15, 0.05, 0.15, 0.05, 0.1, 0.05, 0.02, 0.02, 0.02],
 [0.12, 0.2, 0.08, 0.25, 0.08, 0.15, 0.06, 0.025, 0.025, 0.025],
 [0.2, 0.1, 0.15, 0.1, 0.15, 0.05, 0.1, 0.03, 0.03, 0.03],
 [0.25, 0.3, 0.05, 0.3, 0.05, 0.15, 0.05, 0.01, 0.01, 0.01],
 [0.15, 0.25, 0.1, 0.2, 0.07, 0.12, 0.04, 0.035, 0.035, 0.035],
 [0.08, 0.18, 0.06, 0.12, 0.04, 0.1, 0.03, 0.015, 0.015, 0.015],
 [0.1, 0.05, 0.1, 0.05, 0.1, 0.2, 0.1, 0.04, 0.04, 0.04],
 [0.05, 0.1, 0.02, 0.07, 0.02, 0.02, 0.01, 0.015, 0.015, 0.015],
 [0.2, 0.12, 0.2, 0.25, 0.15, 0.18, 0.12, 0.05, 0.05, 0.05],
 [0.05, 0.07, 0.02, 0.04, 0.02, 0.03, 0.01, 0.01, 0.01, 0.01],
 [0.3, 0.05, 0.25, 0.05, 0.25, 0.1, 0.05, 0.02, 0.02, 0.02],
 [0.15, 0.2, 0.08, 0.1, 0.04, 0.15, 0.05, 0.04, 0.03, 0.02],
 [0.2, 0.15, 0.04, 0.18, 0.06, 0.12, 0.03, 0.025, 0.025, 0.025],
 [0.12, 0.18, 0.12, 0.22, 0.08, 0.18, 0.08, 0.02, 0.02, 0.02],
 [0.25, 0.3, 0.15, 0.35, 0.1, 0.25, 0.08, 0.05, 0.04, 0.04],
 [0.05, 0.2, 0.1, 0.3, 0.05, 0.05, 0.05, 0.015, 0.015, 0.015],
 [0.3, 0.1, 0.3, 0.1, 0.3, 0.1, 0.3, 0.05, 0.05, 0.05],
 [0.1, 0.05, 0.1, 0.05, 0.1, 0.15, 0.15, 0.03, 0.03, 0.03],
 [0.2, 0.08, 0.2, 0.08, 0.2, 0.1, 0.1, 0.01, 0.01, 0.01],
 [0.2, 0.3, 0.05, 0.15, 0.03, 0.2, 0.04, 0.04, 0.02, 0.02],
 [0.15, 0.1, 0.15, 0.08, 0.12, 0.12, 0.08, 0.015, 0.015, 0.015],
 [0.18, 0.07, 0.22, 0.07, 0.22, 0.09, 0.09, 0.03, 0.03, 0.03],
 [0.05, 0.3, 0.1, 0.05, 0.2, 0.05, 0.2, 0.02, 0.02, 0.02],
 [0.13, 0.08, 0.13, 0.12, 0.18, 0.2, 0.24, 0.02, 0.04, 0.04],
 [0.2, 0.12, 0.08, 0.06, 0.04, 0.15, 0.1, 0.01, 0.015, 0.02],
[0.25, 0.2, 0.2, 0.2, 0.2,0.4,0.4, 0.08, 0.08, 0.08 ]]


material_list = [compute_ant_volume(parameter) for parameter in parameter_list]

params: [0.1, 0.15, 0.05, 0.15, 0.05, 0.1, 0.05, 0.02, 0.02, 0.02]
params: [0.12, 0.2, 0.08, 0.25, 0.08, 0.15, 0.06, 0.025, 0.025, 0.025]
params: [0.2, 0.1, 0.15, 0.1, 0.15, 0.05, 0.1, 0.03, 0.03, 0.03]
params: [0.25, 0.3, 0.05, 0.3, 0.05, 0.15, 0.05, 0.01, 0.01, 0.01]
params: [0.15, 0.25, 0.1, 0.2, 0.07, 0.12, 0.04, 0.035, 0.035, 0.035]
params: [0.08, 0.18, 0.06, 0.12, 0.04, 0.1, 0.03, 0.015, 0.015, 0.015]
params: [0.1, 0.05, 0.1, 0.05, 0.1, 0.2, 0.1, 0.04, 0.04, 0.04]
params: [0.05, 0.1, 0.02, 0.07, 0.02, 0.02, 0.01, 0.015, 0.015, 0.015]
params: [0.2, 0.12, 0.2, 0.25, 0.15, 0.18, 0.12, 0.05, 0.05, 0.05]
params: [0.05, 0.07, 0.02, 0.04, 0.02, 0.03, 0.01, 0.01, 0.01, 0.01]
params: [0.3, 0.05, 0.25, 0.05, 0.25, 0.1, 0.05, 0.02, 0.02, 0.02]
params: [0.15, 0.2, 0.08, 0.1, 0.04, 0.15, 0.05, 0.04, 0.03, 0.02]
params: [0.2, 0.15, 0.04, 0.18, 0.06, 0.12, 0.03, 0.025, 0.025, 0.025]
params: [0.12, 0.18, 0.12, 0.22, 0.08, 0.18, 0.08, 0.02, 0.02, 0.02]
params: [0.25, 0.3, 0.15, 0.35, 0.1, 0.25, 0

In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [None]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [0] or j not in [12]:
        #     continue
        # if i not in [0,1,2,3,4]:
        #     continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTAnt.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew

        env_name = "GPTAntEnv"
        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/div2025-03-17_15-13-46/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Div_m25_r5/env/GPTrewardfunc_0.py
0 results/Div_m25_r5/assets/GPTAnt_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Div_m25_r5/env/GPTrewardfunc_0.py
1 results/Div_m25_r5/assets/GPTAnt_1.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Div_m25_r5/env/GPTrewardfunc_0.py
2 results/Div_m25_r5/assets/GPTAnt_2.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
5

In [25]:
fitness_matrix

array([[0.27558539343461225, 0.19421193503322864, 0.1535262595097764,
        0.2608509365115297, 0.20913208778923514, 0.1296366470891181,
        0.32644959715401745, 0.22675637220703113, 1.83810518322508,
        0.08843047560529368, 0.20603592032447943, 0.21686356959239994,
        0.30843809928057847, 0.19615213058915706, 0.3807929188129826,
        19.086681266359715, 0.4918550583515354, 0.35858029779579736,
        0.15471580967738463, 0.21687406570344087, 2.7353691403550546,
        0.2556833117361542, 10.279140797923707, 0.3764429102613109,
        0.13255691136768094, 2.6196942880541765],
       [0.05775412183757486, 0.06193613704025872, 0.11137490961088509,
        0.048545216585012235, 0.02975788368424515, 0.031030718513027453,
        0.19037365487145508, 0.037347130521108524, 0.10676618986800868,
        0.023246498866943948, 0.15465988174666717, 0.05028479026720202,
        0.11117915441780081, 0.23971089645956414, 0.04570191212076285,
        -0.07706809825221372, 0.2831

In [15]:
efficiency_matrix

array([[40.8732839461963, 14.886870991649474, 3.8181362108020647,
        3.924743147882202, 8.154481690018647, 37.00085456266262,
        19.908444727602674, 181.31065238242172, 29.140915541321963,
        116.15938274567151, 1.7666608272502324, 9.795192313504627,
        8.121781051327352, 18.004590893771027, 4.102376182456072,
        8001.6669430415905, 3.2969629224815398, 34.235119679531074,
        4.513339026623524, 5.071899799882528, 174.98419915472394,
        7.879836005126908, 2240.559760888966, 16.453101791217737,
        3.7863705248532153, 14.380017401784157],
       [8.56576827788411, 4.747572705465803, 2.7698491236481146,
        0.7304075986940642, 1.1603198734449414, 8.85677876168883,
        11.60988838289773, 29.86214911398938, 1.6926477059128147,
        30.53584118935253, 1.3261355311184888, 2.2712398953740687,
        2.9275655366806044, 22.002802674487665, 0.4923579891174329,
        -32.30908744909503, 1.8982577633327395, 18.77305783503761,
        1.0731257571

# print coarse optimization info

In [15]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

In [6]:
efficiency_matrix = np.array([[40.8732839461963, 14.886870991649474, 3.8181362108020647,
        3.924743147882202, 8.154481690018647, 37.00085456266262,
        19.908444727602674, 181.31065238242172, 29.140915541321963,
        116.15938274567151, 1.7666608272502324, 9.795192313504627,
        8.121781051327352, 18.004590893771027, 4.102376182456072,
        8001.6669430415905, 3.2969629224815398, 34.235119679531074,
        4.513339026623524, 5.071899799882528, 174.98419915472394,
        7.879836005126908, 2240.559760888966, 16.453101791217737,
        3.7863705248532153, 14.380017401784157],
       [8.56576827788411, 4.747572705465803, 2.7698491236481146,
        0.7304075986940642, 1.1603198734449414, 8.85677876168883,
        11.60988838289773, 29.86214911398938, 1.6926477059128147,
        30.53584118935253, 1.3261355311184888, 2.2712398953740687,
        2.9275655366806044, 22.002802674487665, 0.4923579891174329,
        -32.30908744909503, 1.8982577633327395, 18.77305783503761,
        1.073125757177042, 2.6599745296923447, 2.4203727285710643,
        4.409858418903599, 35.87877320319372, 12.424603400081637,
        3.4373091685773165, 2.677664364092048],
       [-16.734064701242268, -2.238124168809396, 1.3140288872239092,
        0.4518621828981377, 0.2447193610233926, 115.83546753648419,
        -3.5972907726171206, 11.075718388745365, 3.4523355016193404,
        175.31981620850593, 1.32395017434334, 3.4392111194700004,
        7.150679824177766, 3.1442599604380113, -0.02886405429047396,
        47.79575727182832, 1.3085121158100705, 13.763936939128685,
        7.182956325828912, 5.486546975128887, 0.4267820863561428,
        1.146320587964664, -25.450370202472598, -10.008785682186836,
        3.314933888735352, 2.27308998552627],
       [28.513515683472846, 9.610345495448204, 2.897907923314545,
        4.425293958919599, 6.650842201122289, 59.173792580639905,
        20.867896927205134, 72.16303206577332, 37.91226837845095,
        130.75157980916308, 1.9097983979410422, 6.653676026237246,
        6.304579363835615, 23.296978046043435, 69.61704886119135,
        4223.174851794523, 4.919946740069522, 32.242493589791856,
        4.485163722191625, 5.542777172614466, 36.32247223146432,
        6.851383777873784, 1135.066715781883, 19.690290675642036,
        3.6553947220371183, 15.792444784739052],
       [11.490411942631809, 2.870231871798866, 3.5626441095283212,
        2.896918161090745, 5.546079260651306, 21.04852383589865,
        1.1782792776054958, 50.0391090809593, 2.932356199787889,
        30.66576935955976, 1.9921225681655126, 4.503459413756689,
        5.734811761763073, 7.736290690262997, 1.1733056564101327,
        28.059476266875375, 0.9086411355498911, 13.0054005729928,
        2.306620576842187, 5.599501544858376, 5.356600209816569,
        3.674676261172876, 71.65965029896071, 5.448368123594595,
        3.075277044325966, 0.5305745904543809],
       [29.895801839670952, 8.809748094356104, 3.5935980704301826,
        4.613601726210213, 8.561589475967123, 53.88355432077319,
        32.67469270912093, 185.76482858295998, 39.707973639671415,
        126.18373950229646, 1.8988896169676213, 6.630709044660771,
        5.0328904591490975, 16.896753510191502, 6.675012917740373,
        3742.683310996947, 2.201716623461198, 53.52504062768046,
        4.915432087661846, 5.524835451655485, 137.40955469411256,
        8.95398207449075, 1308.1082638727516, 21.707206057016904,
        4.508649494814096, 20.687045858312775]], dtype=object)
mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)



平均值： 151.51679839196308
标准差： 802.932118101724


In [7]:
fitness_matrix = np.array([[0.27558539343461225, 0.19421193503322864, 0.1535262595097764,
        0.2608509365115297, 0.20913208778923514, 0.1296366470891181,
        0.32644959715401745, 0.22675637220703113, 1.83810518322508,
        0.08843047560529368, 0.20603592032447943, 0.21686356959239994,
        0.30843809928057847, 0.19615213058915706, 0.3807929188129826,
        19.086681266359715, 0.4918550583515354, 0.35858029779579736,
        0.15471580967738463, 0.21687406570344087, 2.7353691403550546,
        0.2556833117361542, 10.279140797923707, 0.3764429102613109,
        0.13255691136768094, 2.6196942880541765],
       [0.05775412183757486, 0.06193613704025872, 0.11137490961088509,
        0.048545216585012235, 0.02975788368424515, 0.031030718513027453,
        0.19037365487145508, 0.037347130521108524, 0.10676618986800868,
        0.023246498866943948, 0.15465988174666717, 0.05028479026720202,
        0.11117915441780081, 0.23971089645956414, 0.04570191212076285,
        -0.07706809825221372, 0.28319022837161056, 0.19662991489556017,
        0.03678640568056571, 0.11374031697852904, 0.0378354897291966,
        0.1430901866611466, 0.16460304601117798, 0.2842718608393865,
        0.12033663473019032, 0.48780622748535996],
       [-0.11282831618136405, -0.029198239570486634,
        0.052836758251980254, 0.03003220061591176, 0.006276140267248873,
        0.40584256236566213, -0.05898673350101184, 0.013851859713798563,
        0.2177610298743069, 0.13346846689342082, 0.15440501562439857,
        0.07614343609383242, 0.27155892033963624, 0.0342553348756994,
        -0.002679234422491596, 0.11400904229399393, 0.19520944524029812,
        0.14416414058649327, 0.2462294317516182, 0.23460434868935626,
        0.006671496936947425, 0.03719557666573803, -0.11676008077852035,
        -0.22899854739823453, 0.11605240289996412, 0.4141024788035025],
       [0.19225047951071506, 0.12537515748311492, 0.1165241204888199,
        0.29411913850868865, 0.17056933450227368, 0.2073220242172988,
        0.34218225675321234, 0.09025077756700449, 2.391370885909019,
        0.09953930638546424, 0.2227292666960034, 0.1473110366574226,
        0.23942685273782555, 0.2538103701984817, 6.462030310230562,
        10.073700005522591, 0.7339787397736167, 0.33770943584634816,
        0.15374996930679116, 0.23700874783467998, 0.5677962360218549,
        0.22231230360154172, 5.207408786958495, 0.4505089933732938,
        0.1279715841337919, 2.8770046823350044],
       [0.07747333686484077, 0.03744462393264413, 0.1432529888674462,
        0.19253841253931145, 0.1422362792529531, 0.07374586583237354,
        0.01932088622552884, 0.06258146829529754, 0.1849625871311181,
        0.02334541132338414, 0.23233028117230695, 0.09970567730817131,
        0.21778898351871048, 0.08428349806477119, 0.10890919449924948,
        0.06693133865969902, 0.13555497870449573, 0.13621919403441293,
        0.07907021122488855, 0.23943427785664476, 0.08373486853059152,
        0.11923514593496648, 0.32875696859821274, 0.12465731864101638,
        0.1076622649368379, 0.09665796536710537],
       [0.20157045181087382, 0.11493068123717397, 0.1444976395482757,
        0.3066346727995523, 0.2195728864159426, 0.18878708067565053,
        0.535784709351743, 0.2323269927035843, 2.5046375793838163,
        0.09606187493500919, 0.2214570356639862, 0.14680255234715842,
        0.1911323584441624, 0.18408272760091024, 0.6195915584072444,
        8.927565211901783, 0.328461521639122, 0.5606238619133901,
        0.16849943043736598, 0.23624156115441847, 2.1479988325566395,
        0.29053698434109526, 6.0012811342914665, 0.49665551975780986,
        0.1578431502012552, 3.7686836084783084]], dtype=object)

# configuration of fine optimization

In [8]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix)):
    for j in range(len(efficiency_matrix[0])):
        value = efficiency_matrix[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.05))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [9]:
coarse_best = efficiency_coarse_best
coarse_best

[(0, 15), (3, 15), (5, 15), (0, 22), (5, 22), (3, 22), (5, 7)]

In [10]:
efficiency_matrix_select = efficiency_matrix

# enter fine optimization stage

In [None]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环
        designer = DGA()
        iteration +=1   
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTAnt.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material
        

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True

            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            improved = False
            
            
        # -------- 优化 reward function --------
        iteration +=1
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTAnt.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_ant_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True

            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

params: [0.05, 0.2, 0.1, 0.3, 0.05, 0.05, 0.05, 0.015, 0.015, 0.015]
ChatCompletion(id='chatcmpl-BW8qW378iyegFPEGu4kMYIKin1uo0', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n  "parameters": [0.045, 0.22, 0.11, 0.31, 0.055, 0.055, 0.055, 0.012, 0.012, 0.012],\n  "description": "Optimized for reduced material cost while enhancing control parameters, allowing for efficient movement over desert terrain."\n}', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1747000200, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_7a53abb7a2', usage=CompletionUsage(completion_tokens=81, prompt_tokens=4570, total_tokens=4651, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_toke

In [23]:
logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [46]:
final_optimized_results

[{'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_refine2_0_15_1.xml',
  'best_parameter': [0.045,
   0.22,
   0.11,
   0.31,
   0.055,
   0.055,
   0.055,
   0.012,
   0.012,
   0.012],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_0.py',
  'best_fitness': 29.687550259614532,
  'best_material': 0.0016241283212710322,
  'best_efficiency': 18279.066912878687,
  'best_iteration': 4},
 {'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_refine2_3_15_1.xml',
  'best_parameter': [0.05,
   0.22,
   0.11,
   0.31,
   0.055,
   0.055,
   0.055,
   0.012,
   0.012,
   0.012],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 15.332710513564704,
  'best_material': 0.0017660235894581713,
  'best_efficiency': 8682.053062648438,
  'best_iteration': 4},
 {'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_refine2_5_15_7.xml',
  'best_parameter': [0.04, 0.2, 0.1, 0.3, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01],
  'best_rewardfunc': 'results/Div_m25_r5/

In [21]:
 {'best_morphology': 'results/Div_m25_r5/assets/GPTAnt_refine2_5_15_7.xml',
  'best_parameter': [0.04, 0.2, 0.1, 0.3, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01],
  'best_rewardfunc': 'results/Div_m25_r5/env/GPTrewardfunc_refine2_5_15_6.py',
  'best_fitness': 16.902433015902933,
  'best_material': 0.001070389545142885,
  'best_efficiency': 15790.917514657383,
  'best_iteration': 10},

5272.058355242536

In [None]:
material=0.0012196922555950196

params: [0.04, 0.25, 0.12, 0.35, 0.06, 0.06, 0.06, 0.01, 0.01, 0.01]


0.0012196922555950196

In [2]:
# best new
# 5e5
# fitness:20.08678694564774
# efficiency:18765.866162273083

# fitness:39.5998887416585
# efficiency:36995.7731008783

morphology = "results/Div_m25_r5/assets/GPTAnt_refine2_5_15_7.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_refine2_5_15_6.py"

morphology_index=9999
rewardfunc_index=9999

parameter = [0.04, 0.2, 0.1, 0.3, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/autodl-tmp/Ant_desert/qpos.txt
Average Fitness: 41.9974, Average Reward: 1618.3945
params: [0.04, 0.2, 0.1, 0.3, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01]
best 1e6 steps train

fitness:41.99740470834796
efficiency:39235.626785519264


In [21]:
# best new

morphology = "results/Div_m25_r5/assets/GPTAnt_refine_0_15_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_refine_3_22_2.py"

morphology_index=9999
rewardfunc_index=9999

parameter = [0.045,
   0.22,
   0.11,
   0.31,
   0.055,
   0.055,
   0.055,
   0.012,
   0.012,
   0.012]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path)
material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
params: [0.045, 0.22, 0.11, 0.31, 0.055, 0.055, 0.055, 0.012, 0.012, 0.012]
best 1e6 steps train

fitness:37.857367040803084
efficiency:23309.34480052423


In [22]:
# best new

morphology = "results/Div_m25_r5/assets/GPTAnt_refine_0_15_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_refine_5_22_0.py"

morphology_index=9999
rewardfunc_index=9999

parameter = [0.045,
   0.22,
   0.11,
   0.31,
   0.055,
   0.055,
   0.055,
   0.012,
   0.012,
   0.012]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path)
material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


Process ForkServerProcess-375:
Traceback (most recent call last):
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/root/miniconda3/envs/robodesign/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 32, in _worker
    cmd, data = remote.recv()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
Process ForkServerProcess-371:
Traceback (most recent call la

KeyboardInterrupt: 

In [20]:
# best

morphology = "results/Div_m25_r5/assets/GPTAnt_refine_0_15_0.xml"
rewardfunc = "results/Div_m25_r5/env/GPTAnt_refine_0_15_1.py"

morphology_index=999
rewardfunc_index=999

parameter = [0.045,
   0.22,
   0.11,
   0.31,
   0.055,
   0.055,
   0.055,
   0.012,
   0.012,
   0.012]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
fitness, _ = Eva(model_path)
material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("best 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("best 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
params: [0.045, 0.22, 0.11, 0.31, 0.055, 0.055, 0.055, 0.012, 0.012, 0.012]
best 1e6 steps train

fitness:49.292841434555754
efficiency:30350.336724612684


{'best_morphology': 'results/noDiv_m25_r5/assets/GPTAnt_refine_0_1_0.xml',
  'best_parameter': [0.1, 0.18, 0.1, 0.15, 0.1, 0.1, 0.05, 0.02, 0.02, 0.02],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_0.py',
  'best_fitness': 28.671216806088314,
  'best_material': 0.007094102747340339,
  'best_efficiency': 4041.5564627728977,
  'best_iteration': 1},

In [26]:
# nodiv


morphology = "results/noDiv_m25_r5/assets/GPTAnt_refine_0_1_0.xml"
rewardfunc = "results/noDiv_m25_r5/env/GPTrewardfunc_0.py"

morphology_index=333
rewardfunc_index=333

parameter = [0.1, 0.18, 0.1, 0.15, 0.1, 0.1, 0.05, 0.02, 0.02, 0.02]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("no div 5e5 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("no div 5e5 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")





0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
params: [0.1, 0.18, 0.1, 0.15, 0.1, 0.1, 0.05, 0.02, 0.02, 0.02]
no div 5e5 steps train

fitness:0.11233624879263462
efficiency:15.835159539344252


In [3]:
# human 


morphology = "results/Div_m25_r5/assets/GPTAnt_25.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_5.py"
morphology_index=888
rewardfunc_index=888

parameter = [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("human 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("human 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/autodl-tmp/Ant_desert/qpos.txt
Average Fitness: 8.2921, Average Reward: 999.9924
params: [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]
human 1e6 steps train

fitness:8.292149674961552
efficiency:45.51724114065022


In [4]:
# eureka


morphology = "results/Eureka/assets/GPTAnt_25.xml"
rewardfunc = "results/Eureka/env/GPTAnt_1_1.py"

morphology_index=222
rewardfunc_index=222

parameter = [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("eureka 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("eureka 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")



Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/autodl-tmp/Ant_desert/qpos.txt
Average Fitness: 7.1453, Average Reward: 1007.4059
params: [0.25, 0.2, 0.2, 0.2, 0.2, 0.4, 0.4, 0.08, 0.08, 0.08]
eureka 1e6 steps train

fitness:7.145252525065034
efficiency:39.22169701980604


In [5]:
# eureka morphology



morphology = "results/Eureka_morphology/assets/GPTAnt_0_iter2.xml"
rewardfunc = "results/Eureka_morphology/env/GPTrewardfunc_5.py"

morphology_index=111
rewardfunc_index=111

parameter = [0.18, 0.17, 0.15, 0.07, 0.07, 0.07, 0.07, 0.035, 0.025, 0.025]

shutil.copy(morphology, "GPTAnt.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTAntEnv._get_rew = _get_rew

model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=10, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_ant_volume(parameter)
efficiency = fitness / material

logging.info("eureka morphology 1e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("eureka morphlogy 1e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")



Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Saved qpos log to /root/autodl-tmp/Ant_desert/qpos.txt
Average Fitness: 0.0888, Average Reward: 969.0029
params: [0.18, 0.17, 0.15, 0.07, 0.07, 0.07, 0.07, 0.035, 0.025, 0.025]
eureka morphlogy 1e6 steps train

fitness:0.08875673148678011
efficiency:2.8895902706042786
