In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTSwimmer import GPTSwimmerEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTSwimmer_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")
        messages.append({"role": "assistant", "content": initial_code})

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]
            # print(diverse_messages)
            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )
            diverse_code = self.extract_code(response.choices[0].message.content)
            messages.append({"role": "assistant", "content": diverse_code})

            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files


    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_swimmer_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = swimmer_design(parameter)  
            filename = f"GPTSwimmer_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_swimmer_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = swimmer_design(initial_parameter['parameters'])  

        filename = f"GPTSwimmer_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_swimmer_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = swimmer_design(diverse_parameter['parameters'])  
            filename = f"GPTSwimmer_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, step, rewardfunc_index, morphology_index):
        reward_improve_prompts = prompts.reward_improve_prompts

        for rewardfunc_file, fitness in zip(rewardfunc_list, fitness_list):
            with open(rewardfunc_file, "r") as fp:
                reward_content = fp.read()
            reward_improve_prompts += f"\nreward function:\n{reward_content}\nfitness: {fitness}\n"

        with open(best_rewardfunc, "r") as fp:
            best_reward_content = fp.read()
        reward_improve_prompts += f"\nbest reward function:\n{best_reward_content}\nbest fitness: {max(fitness_list)}\n"

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content":reward_improve_prompts+ zeroshot_rewardfunc_format}
        ]
        print(messages)
        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTrewardfunc_refine_{step}_{rewardfunc_index}_{morphology_index}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f" This is best parameter, please carefully review it, you can reduce the geom size, best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format + " please refer to parameters [0.3, 1.2, 0.6, 0.025, 0.02, 0.02]"}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = swimmer_design(parameter)  
        filename = f"GPTSwimmer_refine2_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/Div_m25_r5"
log_file = os.path.join(folder_name, "new_parameter.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 51
rewardfunc_nums = 11

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [4]:
logging.info(f"start!")

In [17]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

In [22]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/Div_m50_r10\env\GPTrewardfunc_0.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_1.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_2.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_3.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_4.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_5.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_6.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_7.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_8.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_9.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_10.py


In [5]:
parameter_list

NameError: name 'parameter_list' is not defined

# enter coarse optimization stage

In [4]:
morphology_list = [f'results/Div_m25_r5/assets/GPTSwimmer_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/Div_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list = [[0.6, 0.8, 0.5, 0.05, 0.05, 0.05],
 [1.0, 0.5, 0.3, 0.04, 0.04, 0.04],
 [0.35, 0.35, 0.35, 0.03, 0.03, 0.03],
 [0.8, 0.4, 0.2, 0.02, 0.03, 0.04],
 [0.2, 1.0, 0.7, 0.03, 0.02, 0.02],
 [0.5, 0.5, 0.5, 0.06, 0.06, 0.06],
 [1.2, 0.3, 0.3, 0.1, 0.02, 0.02],
 [0.9, 0.6, 0.9, 0.07, 0.05, 0.07],
 [0.4, 0.75, 1.2, 0.025, 0.025, 0.025],
 [0.45, 0.45, 1.5, 0.05, 0.05, 0.03],
 [0.2, 1.0, 0.6, 0.03, 0.06, 0.04],
 [0.1, 0.8, 1.5, 0.025, 0.025, 0.025],
 [0.15, 1.3, 0.4, 0.07, 0.03, 0.05],
 [0.7, 0.1, 1.3, 0.02, 0.07, 0.02],
 [0.2, 1.2, 0.3, 0.06, 0.02, 0.04],
 [0.5, 1.0, 0.2, 0.08, 0.03, 0.05],
 [0.3, 0.3, 0.3, 0.1, 0.1, 0.1],
 [0.4, 0.8, 1.2, 0.025, 0.05, 0.075],
 [0.7, 0.2, 1.1, 0.02, 0.07, 0.02],
 [0.8, 0.3, 0.5, 0.02, 0.05, 0.03],
 [1.1, 0.4, 0.6, 0.03, 0.03, 0.03],
 [0.9, 0.45, 0.9, 0.04, 0.02, 0.04],
 [0.3, 0.7, 1.0, 0.06, 0.04, 0.02],
 [0.2, 0.9, 0.5, 0.1, 0.03, 0.05],
 [1.5, 0.3, 0.3, 0.03, 0.05, 0.05],
 [0.1, 1.2, 0.6, 0.02, 0.04, 0.04],
 [0.25, 1.5, 0.25, 0.03, 0.01, 0.03],
 [0.3, 1.2, 0.6, 0.025, 0.015, 0.025],
 [0.5, 0.2, 0.8, 0.04, 0.06, 0.02],
 [0.2, 1.5, 0.2, 0.015, 0.04, 0.015],
 [0.2, 1.0, 0.2, 0.01, 0.08, 0.01],
 [0.8, 0.1, 0.8, 0.05, 0.02, 0.05],
[0.2, 1.5, 0.5, 0.03, 0.01, 0.03],
[0.25, 1.5, 0.25, 0.03, 0.01, 0.03],
 [0.9, 0.9, 0.1, 0.02, 0.02, 0.08],
 [1.5, 0.3, 0.3, 0.03, 0.05, 0.05],
 [0.1, 1.0, 0.1, 0.01, 0.06, 0.01],
 [0.2, 1.2, 0.2, 0.04, 0.01, 0.04],
 [0.4, 0.2, 0.8, 0.08, 0.01, 0.05],
 [0.4, 1.0, 0.4, 0.02, 0.04, 0.02],
 [0.1, 0.8, 0.8, 0.05, 0.02, 0.02],
 [0.8, 0.3, 1.4, 0.03, 0.06, 0.02],
 [0.4, 0.5, 0.5, 0.025, 0.025, 0.025],
 [0.6, 1.0, 0.3, 0.015, 0.03, 0.015],
 [0.2, 0.5, 1.0, 0.04, 0.03, 0.02],
 [0.15, 1.2, 0.8, 0.02, 0.04, 0.03],
 [1.2, 0.3, 0.5, 0.02, 0.02, 0.02],
 [0.5, 0.3, 0.2, 0.055, 0.08, 0.055],
 [0.1, 0.4, 0.9, 0.02, 0.03, 0.04],
 [0.05, 1.0, 0.05, 0.05, 0.01, 0.05],
 [1.0, 1.0, 1.0, 0.1, 0.1, 0.1]]

material_list = [compute_swimmer_volume(parameter) for parameter in parameter_list]
parameter_list[27]

[0.3, 1.2, 0.6, 0.025, 0.015, 0.025]

In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [21]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [10]:
        #     continue
        if j not in [50]:
            continue
        
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTSwimmer.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTSwimmerEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/Div_m50_r10/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Div_m25_r5/env/GPTrewardfunc_0.py
50 results/Div_m25_r5/assets/GPTSwimmer_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
1 results/Div_m25_r5/env/GPTrewardfunc_1.py
50 results/Div_m25_r5/assets/GPTSwimmer_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
2 results/Div_m25_r5/env/GPTrewardfunc_2.py
50 results/Div_m25_r5/assets/GPTSwimmer_50.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
5

AttributeError: 'mujoco._structs.MjData' object has no attribute 'qvel_old'

In [5]:
efficiency_matrix

array([[None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None],
       [None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None, None, None, None, None, None, None, None,
        No

In [5]:
efficiency_matrix = np.array([[2490.827166969521, 4927.090190898689, 12864.936602454682,
        1324.404200715939, 21931.145365424352, 1580.9066859467548,
        610.2646656733313, 48.97118468739633, 13878.317918336126,
        3143.3351478849804, 1820.7228767113256, 5715.801254726394,
        2090.2517466843497, 3043.2168056297496, 4722.356797231127,
        1492.8134809229687, 235.04655180362352, 878.5020869527466,
        1617.2352618062444, 6978.065356984796, 10254.391114385744,
        3865.52761824445, 4989.111293599648, 1580.9962703747324,
        1867.3783833475068],
       [2530.1733817999525, 5087.527742382322, 14423.892038881017,
        833.5960685932044, 21286.47983375307, 1609.6318542863833,
        564.2726861099007, 48.43284886565554, 13925.193418189261,
        2889.0175399511113, 1839.985890462754, 5384.99995472673,
        2105.6067317404554, 2876.1343322828907, 4780.561422165743,
        1517.805311727424, 228.29442591302646, 879.4602549274284,
        1729.47861344265, 4704.617048406275, 10008.30735716913,
        3998.782795578854, 4975.27743571648, 1539.5511349045107,
        2022.9114911017662],
       [2532.791312115207, 4838.90440075404, 14710.149159061293,
        2202.1923489014575, 21811.94932729642, 1472.6491001775346,
        549.7182690532485, 117.63447405266973, 14120.235615461232,
        3035.798053013247, 1826.889262070252, 5656.993003754852,
        2107.8050253450087, 3047.9925034714965, 4745.47639123587,
        1213.096530750974, 141.46840247257805, 877.4606364102925,
        1793.1136519310396, 7198.190351774631, 9919.548250890866,
        4049.0372828024724, 4968.317373498144, 1586.2975847673026,
        1914.546678349104],
       [2178.34033115373, 5014.261354961932, 12943.701906883602,
        1687.7739148026135, 21638.954129221504, 1468.1453285628527,
        567.6952387726864, 26.44771881462972, 13855.306583846395,
        3074.5129314468572, 1818.2141401691426, 5616.310232301272,
        2106.2867486422037, 3055.0959304702355, 4705.737833705331,
        1148.5609252875502, 220.60248362928743, 872.9805820665233,
        2108.373610561695, 9726.945490477836, 11114.809056899348,
        4057.8938764713944, 5019.7529896611295, 1576.5923185672762,
        2344.7969873933425],
       [2512.7322860222266, 5000.094302536023, 14649.089881809261,
        1830.7561114809748, 21851.973979335544, 1550.2611398464244,
        680.7645468998198, 79.81711951675348, 13938.29035878637,
        3120.5975026621236, 1840.7874686378746, 5407.2457496724455,
        2104.3147696507294, 2680.576398349633, 4536.335164591208,
        1322.321989076811, 180.26955924692, 880.2577624591086,
        1815.6353833486853, 6900.594901778612, 10291.222331998035,
        4068.6836346429673, 4977.514035473323, 1592.0292834752315,
        2027.9101843936855]], dtype=object)
mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)

平均值： 4607.744248970781
标准差： 5127.2952687360375


In [7]:
fitness_matrix = np.array([[41.08211272784495, 48.541862849137985, 42.55845900608606,
        4.709955990323206, 63.203097217867956, 31.110562098761328,
        26.063735587161254, 1.7540667902059868, 66.76253310015595,
        39.19747868947427, 29.454030197474957, 28.057373798793382,
        23.410355129928647, 16.90942178418347, 30.620929014618174,
        25.725181349309633, 9.599466765172394, 26.908983099706994,
        11.069139571181273, 37.99854736040631, 64.36580934439742,
        39.36247054765333, 46.77048607753694, 27.794471717754437,
        18.886354889981657],
       [41.731064069998666, 50.12249915134245, 47.7156349085935,
        2.9645034307940388, 61.34524357674144, 31.675842859081598,
        24.09946850453609, 1.7347844919948756, 66.98803068055051,
        36.0261308858354, 29.765650046923852, 26.433556714610727,
        23.582327550833632, 15.98103968227733, 30.998342193031625,
        26.155857644715525, 9.323705186950223, 26.938332291041107,
        11.837387305164581, 25.618649952525846, 62.821165686566125,
        40.71940122081457, 46.640800404187985, 27.065851627213338,
        20.4593909154512],
       [41.774242540890235, 47.67305339678581, 48.66260124747682,
        7.831618957373732, 62.85958762635973, 28.980167955530575,
        23.477865290181036, 4.213472180350194, 67.92629360462526,
        37.85648805810652, 29.553784477983115, 27.768699471882975,
        23.606947950765875, 16.93595761592918, 30.77084217816421,
        20.904907844543715, 5.77766922099816, 26.877082919317797,
        12.272936256687423, 39.197222009011924, 62.2640334640326,
        41.23114010062252, 46.57555321407236, 27.88767069343342,
        19.363407193306802],
       [35.92815440219731, 49.40067617026897, 42.81902227843056,
        6.002201485044074, 62.36103489963388, 28.891538520445803,
        24.245641980822082, 0.9473135180531514, 66.6518356086019,
        38.339263693211194, 29.413446096063304, 27.568998384498222,
        23.58994358904029, 16.975427312275702, 30.51316754629407,
        19.79278621967225, 9.009560845134617, 26.739856487630632,
        14.430727745471843, 52.96737419090613, 69.76656855345816,
        41.32132634215914, 47.057737039621934, 27.717048692632634,
        23.714887375682675],
       [41.443401773577904, 49.2611018801321, 48.46061123165536,
        6.510686623788556, 62.97493417720282, 30.507490346658628,
        29.074708311898792, 2.858917127040062, 67.05103434823346,
        38.9139396068946, 29.77861726344957, 26.542755505196236,
        23.567857862489696, 14.894435671020679, 29.414718757148815,
        22.78715553275213, 7.362335798947921, 26.962760368117518,
        12.427085868889266, 37.576687631299905, 64.59699529237916,
        41.43119789921135, 46.66176743651749, 27.988435977082354,
        20.509946869369934]], dtype=object)

In [8]:
none_coords = np.argwhere(efficiency_matrix == None)
print(none_coords)

[]


In [9]:
efficiency_matrix_select = efficiency_matrix
efficiency_matrix_select

array([[2490.827166969521, 4927.090190898689, 12864.936602454682,
        1324.404200715939, 21931.145365424352, 1580.9066859467548,
        610.2646656733313, 48.97118468739633, 13878.317918336126,
        3143.3351478849804, 1820.7228767113256, 5715.801254726394,
        2090.2517466843497, 3043.2168056297496, 4722.356797231127,
        1492.8134809229687, 235.04655180362352, 878.5020869527466,
        1617.2352618062444, 6978.065356984796, 10254.391114385744,
        3865.52761824445, 4989.111293599648, 1580.9962703747324,
        1867.3783833475068],
       [2530.1733817999525, 5087.527742382322, 14423.892038881017,
        833.5960685932044, 21286.47983375307, 1609.6318542863833,
        564.2726861099007, 48.43284886565554, 13925.193418189261,
        2889.0175399511113, 1839.985890462754, 5384.99995472673,
        2105.6067317404554, 2876.1343322828907, 4780.561422165743,
        1517.805311727424, 228.29442591302646, 879.4602549274284,
        1729.47861344265, 4704.61704840627

# print coarse optimization info

In [17]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [10]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)


top_k = max(1, int(len(sorted_values) * 0.1))

efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]




In [11]:
logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")

In [12]:
coarse_best = efficiency_coarse_best
coarse_best

[(0, 4),
 (4, 4),
 (2, 4),
 (3, 4),
 (1, 4),
 (2, 2),
 (4, 2),
 (1, 2),
 (2, 8),
 (4, 8),
 (1, 8),
 (0, 8)]

# enter fine optimization stage

In [13]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_swimmer_volume(parameter)
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环
        designer = DGA()

        # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration 
        )

        shutil.copy(improved_morphology, "GPTSwimmer.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTSwimmerEnv._get_rew = _get_rew
        
        try:
            model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
            improved_fitness, _ = Eva(model_path)
        except Exception as e:
            print(f"Error evaluating design: {e}")
            continue
        
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_swimmer_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material
        logging.info(f"improved_parameter:{improved_parameter}\n")

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            improved = False
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )
        shutil.copy(best_morphology, "GPTSwimmer.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTSwimmerEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_swimmer_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
        
            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

[0.25, 1.1, 0.6, 0.02, 0.02, 0.02]
Successfully saved GPTSwimmer_refine2_0_4_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[{'role': 'system', 'content': 'You are a reinforcement learning reward function designer'}, {'role': 'user', 'content': '\nYou are a reward engineer trying to write reward functions to solve reinforcement learning tasks as effectively as possible.\nYour goal is to write a reward function for the enviroment that will help the agent lea

Process ForkServerProcess-667:
Traceback (most recent call last):
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/root/miniconda3/envs/robodesign/lib/python3.8/site-packages/stable_baselines3/common/vec_env/subproc_vec_env.py", line 32, in _worker
    cmd, data = remote.recv()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda3/envs/robodesign/lib/python3.8/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt
Process ForkServerProcess-664:
Traceback (most recent call la

KeyboardInterrupt: 

In [None]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [None]:
final_optimized_results

2025-04-10 14:21:58,984 - Final optimized result: rewardfunc_index2 morphology_index4
2025-04-10 14:21:58,984 -   Morphology: results/Div_m25_r5/assets/GPTSwimmer_refine_2_4_2.xml
2025-04-10 14:21:58,984 -   Parameter: [0.25, 1.3, 0.65, 0.023, 0.018, 0.018]
2025-04-10 14:21:58,984 -   Rewardfunc: results/Div_m25_r5/env/GPTSwimmer_refine_2_4_3.py
2025-04-10 14:21:58,984 -   Fitness: 70.88658010548744
2025-04-10 14:21:58,984 -   Material: 0.0025001569263455457
2025-04-10 14:21:58,984 -   Efficiency: 28352.852318394926

In [None]:
2025-05-11 12:53:55,783 - Final optimized result: rewardfunc_index1 morphology_index8
2025-05-11 12:53:55,783 -   Morphology: results/Div_m25_r5/assets/GPTSwimmer_refine2_1_8_0.xml
2025-05-11 12:53:55,783 -   Parameter: [0.4, 1.1, 0.5, 0.02, 0.02, 0.02]
2025-05-11 12:53:55,783 -   Rewardfunc: results/Div_m25_r5/env/GPTrewardfunc_refine_1_8_1.py
2025-05-11 12:53:55,783 -   Fitness: 89.93837841576415
2025-05-11 12:53:55,783 -   Material: 0.002613805087786708
2025-05-11 12:53:55,783 -   Efficiency: 34408.98437148628

In [16]:
best_morphology = "results/Div_m25_r5/assets/GPTSwimmer_refine2_1_8_0.xml"
best_rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_refine_1_8_1.py"
morphology_index=9999
rewardfunc_index=9999
best_parameter = [0.4, 1.1, 0.5, 0.02, 0.02, 0.02]

shutil.copy(best_morphology, "GPTSwimmer.xml")
shutil.copy(best_rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew
# autodl-tmp/Swimmer/results/Div_m25_r5/fine/SAC_morphology999_rewardfunc999_3000000.0steps
model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
best_fitness, _ = Eva(model_path)
best_material = compute_swimmer_volume(best_parameter)
best_efficiency = best_fitness / best_material
logging.info("3e6 steps train\n")
logging.info(f"best_fitness:{best_fitness}")
logging.info(f"best_efficiency:{best_efficiency}")
print("3e6 steps train\n")
print(f"best_fitness:{best_fitness}")
print(f"best_efficiency:{best_efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
3e6 steps train

best_fitness:87.31937938235838
best_efficiency:33406.99725100689


In [5]:
best_morphology = "results/Div_m25_r5/assets/GPTSwimmer_refine_2_4_2.xml"
best_rewardfunc = "results/Div_m25_r5/env/GPTSwimmer_refine_2_4_3.py"
morphology_index=999
rewardfunc_index=999
best_parameter = [0.25, 1.3, 0.65, 0.023, 0.018, 0.018]

shutil.copy(best_morphology, "GPTSwimmer.xml")
shutil.copy(best_rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew
# autodl-tmp/Swimmer/results/Div_m25_r5/fine/SAC_morphology999_rewardfunc999_3000000.0steps
model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
best_fitness, _ = Eva(model_path)
best_material = compute_swimmer_volume(best_parameter)
best_efficiency = best_fitness / best_material
logging.info("3e6 steps train\n")
logging.info(f"best_fitness:{best_fitness}")
logging.info(f"best_efficiency:{best_efficiency}")
print("3e6 steps train\n")
print(f"best_fitness:{best_fitness}")
print(f"best_efficiency:{best_efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
3e6 steps train

best_fitness:73.16345150502791
best_efficiency:29263.543713622083


2025-04-10 02:00:20,461 - Initial morphology:results/Div_m25_r5/assets/GPTSwimmer_4.xml
2025-04-10 02:00:20,461 - Initial parameter:[0.2, 1.0, 0.7, 0.03, 0.02, 0.02]
2025-04-10 02:00:20,461 - Initial rewardfunc:results/Div_m25_r5/env/GPTrewardfunc_0.py
2025-04-10 02:00:20,461 - Initial fitness:63.203097217867956
2025-04-10 02:00:20,461 - Initial efficiency:21931.145365424352

In [9]:
morphology = "results/Div_m25_r5/assets/GPTSwimmer_4.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_0.py"

morphology_index=777
rewardfunc_index=777
parameter = [0.2, 1.0, 0.7, 0.03, 0.02, 0.02]
morphology_index=4
rewardfunc_index=0
shutil.copy(morphology, "GPTSwimmer.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew

# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_swimmer_volume(parameter)
efficiency = fitness / material

logging.info("coarse only best 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
3e6 steps train

fitness:44.07411938131373
efficiency:15293.489742641834


In [2]:
#  human
morphology = "results/Div_m25_r5/assets/GPTSwimmer_50.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_10.py"
parameter =  [1.0, 1.0, 1.0, 0.1, 0.1, 0.1]


morphology_index=888
rewardfunc_index=888

shutil.copy(morphology, "GPTSwimmer.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew


# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=100, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_swimmer_volume(parameter)
efficiency = fitness / material

logging.info("human 3e6 steps train\n")
logging.info(f"human_fitness:{fitness}")
logging.info(f"human_efficiency:{efficiency}")
print("3e6 steps train\n")
print(f"human_fitness:{fitness}")
print(f"human_efficiency:{efficiency}")
    
    
 

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50
Run 51
Run 52
Run 53
Run 54
Run 55
Run 56
Run 57
Run 58
Run 59
Run 60
Run 61
Run 62
Run 63
Run 64
Run 65
Run 66
Run 67
Run 68
Run 69
Run 70
Run 71
Run 72
Run 73
Run 74
Run 75
Run 76
Run 77
Run 78
Run 79
Run 80
Run 81
Run 82
Run 83
Run 84
Run 85
Run 86
Run 87
Run 88
Run 89
Run 90
Run 91
Run 92
Run 93
Run 94
Run 95
Run 96
Run 97
Run 98
Run 99
Saved qpos log to /root/autodl-tmp/Swimmer/qpos.txt
Average Fitness: 1.9897, Average Reward: 49.6873
3e6 steps train

human_fitness:1.9897092116354986
human_efficiency:18.627768020427514


2025-04-07 11:05:41,435 - morphology: 50, rewardfunc: 0, material cost: 0.10681415022205296 reward: 41.660022777401224 fitness: 1.6689821209181794 efficiency: 15.625103204477863
[1.0, 1.0, 1.0, 0.1, 0.1, 0.1]

In [8]:
# Robodesign (w/o Morphology Design)

morphology = "results/Div_m25_r5/assets/GPTSwimmer_50.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_0.py"
parameter =  [1.0, 1.0, 1.0, 0.1, 0.1, 0.1]


morphology_index=555
rewardfunc_index=555

shutil.copy(morphology, "GPTSwimmer.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew


model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = compute_swimmer_volume(parameter)
efficiency = fitness / material

logging.info("human 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Morphology Design) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")
    

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Morphology Design) 3e6 steps train

fitness:2.3706576073842083
efficiency:22.194228034917792


2025-04-07 09:14:35,499 - morphology: 4, rewardfunc: 10, material cost: 0.002881887660893037 reward: 1588.2197695801924 fitness: 63.53375844670141 efficiency: 22045.883088660583
[0.2, 1.0, 0.7, 0.03, 0.02, 0.02]

In [7]:
# Robodesign (w/o Reward Shaping)

morphology = "results/Div_m25_r5/assets/GPTSwimmer_4.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_10.py"
parameter =  [0.2, 1.0, 0.7, 0.03, 0.02, 0.02]


# morphology = "results/Div_m25_r5/assets/GPTSwimmer_refine_2_4_2.xml"
# rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_10.py"
# parameter = [0.25, 1.3, 0.65, 0.023, 0.018, 0.018]

morphology_index=555
rewardfunc_index=555

morphology_index=4
rewardfunc_index=10


shutil.copy(morphology, "GPTSwimmer.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew


# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
model_path = f"results/Div_m25_r5/coarse/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_500000.0steps"
fitness, _ = Eva(model_path)
material = compute_swimmer_volume(parameter)
efficiency = fitness / material

logging.info("Robodesign (w/o Reward Shaping) 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Reward Shaping) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Reward Shaping) 3e6 steps train

fitness:63.43019338270048
efficiency:22009.946551159035


2025-04-08 05:01:03,460 - Final optimized result: rewardfunc_index2 morphology_index18
2025-04-08 05:01:03,460 -   Morphology: results/noDiv_m25_r5/assets/GPTSwimmer_refine_2_18_0.xml
2025-04-08 05:01:03,460 -   Parameter: [0.45, 0.45, 0.45, 0.035, 0.035, 0.035]
2025-04-08 05:01:03,460 -   Rewardfunc: results/noDiv_m25_r5/env/GPTSwimmer_refine_2_18_1.py
2025-04-08 05:01:03,460 -   Fitness: 41.06518362642221
2025-04-08 05:01:03,460 -   Material: 0.005734191990964771
2025-04-08 05:01:03,460 -   Efficiency: 7161.459485683012

In [10]:
# Robodesign (w/o Diversity Reflection)

morphology = "results/noDiv_m25_r5/assets/GPTSwimmer_refine_2_18_0.xml"
rewardfunc = "results/noDiv_m25_r5/env/GPTSwimmer_refine_2_18_1.py"
parameter =  [0.45, 0.45, 0.45, 0.035, 0.035, 0.035]

morphology_index=333
rewardfunc_index=333

shutil.copy(morphology, "GPTSwimmer.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew


model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
# model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_3000000.0steps"
fitness, _ = Eva(model_path)
material = compute_swimmer_volume(parameter)
efficiency = fitness / material

logging.info("Robodesign (w/o Diversity Reflection) 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("Robodesign (w/o Diversity Reflection) 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Robodesign (w/o Diversity Reflection) 3e6 steps train

fitness:61.67922159794303
efficiency:10756.39282659693


2025-04-09 04:36:01,592 - iteration:2, morphology: 0, rewardfunc: 9, material cost: 0.10681415022205296 reward: 49.70920622052905 fitness: 1.992258670845264 efficiency: 18.651636198983123

In [3]:
# eureka reward

morphology = "results/Div_m25_r5/assets/GPTSwimmer_50.xml"
rewardfunc = "results/eureka1/env/GPTrewardfunc_9_2.py"
parameter =  [1.0, 1.0, 1.0, 0.1, 0.1, 0.1]

morphology_index=222
rewardfunc_index=222

shutil.copy(morphology, "GPTSwimmer.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew


# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=100, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_swimmer_volume(parameter)
efficiency = fitness / material

logging.info("eureka reward 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("eureka reward 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50
Run 51
Run 52
Run 53
Run 54
Run 55
Run 56
Run 57
Run 58
Run 59
Run 60
Run 61
Run 62
Run 63
Run 64
Run 65
Run 66
Run 67
Run 68
Run 69
Run 70
Run 71
Run 72
Run 73
Run 74
Run 75
Run 76
Run 77
Run 78
Run 79
Run 80
Run 81
Run 82
Run 83
Run 84
Run 85
Run 86
Run 87
Run 88
Run 89
Run 90
Run 91
Run 92
Run 93
Run 94
Run 95
Run 96
Run 97
Run 98
Run 99
Saved qpos log to /root/autodl-tmp/Swimmer/qpos.txt
Average Fitness: 2.5465, Average Reward: 63.5811
eureka reward 3e6 steps train

fitness:2.5465372941186715
efficiency:23.840823419226254


In [4]:
# eureka morphology

morphology = "results/Eureka_morphology/assets/GPTSwimmer_14.xml"
rewardfunc = "results/Eureka_morphology/env/GPTrewardfunc_0.py"
parameter =  [0.85, 0.65, 0.45, 0.035, 0.025, 0.015]

morphology_index=222
rewardfunc_index=222

shutil.copy(morphology, "GPTSwimmer.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew


# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=100, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_swimmer_volume(parameter)
efficiency = fitness / material

logging.info("eureka reward 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("eureka reward 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50
Run 51
Run 52
Run 53
Run 54
Run 55
Run 56
Run 57
Run 58
Run 59
Run 60
Run 61
Run 62
Run 63
Run 64
Run 65
Run 66
Run 67
Run 68
Run 69
Run 70
Run 71
Run 72
Run 73
Run 74
Run 75
Run 76
Run 77
Run 78
Run 79
Run 80
Run 81
Run 82
Run 83
Run 84
Run 85
Run 86
Run 87
Run 88
Run 89
Run 90
Run 91
Run 92
Run 93
Run 94
Run 95
Run 96
Run 97
Run 98
Run 99
Saved qpos log to /root/autodl-tmp/Swimmer/qpos.txt
Average Fitness: 0.3651, Average Reward: 8.9552
eureka reward 3e6 steps train

fitness:0.365113702305294
efficiency:71.24554852103635


2025-04-06 09:59:14,743 - morphology: 2, rewardfunc: 0, material cost: 0.014476458947741768 reward: 1409.4540161000132 fitness: 56.384153229858015 efficiency: 3894.885719870989
[0.9, 0.7, 0.5, 0.035, 0.025, 0.015]

In [5]:
# eureka morphology

morphology = "results/Eureka_morphology/assets/GPTSwimmer_2.xml"
rewardfunc = "results/Div_m25_r5/env/GPTrewardfunc_10.py"

parameter =  [0.9, 0.7, 0.5, 0.035, 0.025, 0.015]

morphology_index=111
rewardfunc_index=111

shutil.copy(morphology, "GPTSwimmer.xml")
shutil.copy(rewardfunc, "GPTrewardfunc.py")

import GPTrewardfunc
importlib.reload(GPTrewardfunc)  # 重新加载模块
from GPTrewardfunc import _get_rew
GPTSwimmerEnv._get_rew = _get_rew


# model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=1e6)
model_path = f"results/Div_m25_r5/fine/SAC_morphology{morphology_index}_rewardfunc{rewardfunc_index}_1000000.0steps"
# fitness, _ = Eva(model_path)
fitness, _ = Eva_with_qpos_logging(model_path, run_steps=100, video = True, rewardfunc_index=rewardfunc_index, morphology_index=morphology_index)

material = compute_swimmer_volume(parameter)
efficiency = fitness / material

logging.info("eureka morphology 3e6 steps train\n")
logging.info(f"fitness:{fitness}")
logging.info(f"efficiency:{efficiency}")
print("eureka morphology 3e6 steps train\n")
print(f"fitness:{fitness}")
print(f"efficiency:{efficiency}")

Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50
Run 51
Run 52
Run 53
Run 54
Run 55
Run 56
Run 57
Run 58
Run 59
Run 60
Run 61
Run 62
Run 63
Run 64
Run 65
Run 66
Run 67
Run 68
Run 69
Run 70
Run 71
Run 72
Run 73
Run 74
Run 75
Run 76
Run 77
Run 78
Run 79
Run 80
Run 81
Run 82
Run 83
Run 84
Run 85
Run 86
Run 87
Run 88
Run 89
Run 90
Run 91
Run 92
Run 93
Run 94
Run 95
Run 96
Run 97
Run 98
Run 99
Saved qpos log to /root/autodl-tmp/Swimmer/qpos.txt
Average Fitness: 89.1276, Average Reward: 2228.0682
eureka morphology 3e6 steps train

fitness:89.12763177813105
efficiency:16351.703934943916
