In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTSwimmer import GPTSwimmerEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTSwimmer_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")
        messages.append({"role": "assistant", "content": initial_code})

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]
            # print(diverse_messages)
            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )
            diverse_code = self.extract_code(response.choices[0].message.content)
            messages.append({"role": "assistant", "content": diverse_code})

            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files


    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_swimmer_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = swimmer_design(parameter)  
            filename = f"GPTSwimmer_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_swimmer_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = swimmer_design(initial_parameter['parameters'])  

        filename = f"GPTSwimmer_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_swimmer_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = swimmer_design(diverse_parameter['parameters'])  
            filename = f"GPTSwimmer_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        reward_improve_prompts = prompts.reward_improve_prompts

        for reward_filename, fitness in zip(rewardfunc_list, fitness_list):
            with open(reward_filename, 'r') as f:
                reward_content = f.read()
            reward_improve_prompts += f"reward function:\n{reward_content}\nfitness: {fitness}\n"
            
        with open(best_rewardfunc, 'r') as f:
            best_reward_content = f.read()

        reward_improve_prompts += f"This is best reward function, please carefully review it :\n{best_reward_content}\nbest fitness: {max(fitness_list)}"
        # print(reward_improve_prompts)
        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        # print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTSwimmer_refine_{rewardfunc_index}_{morphology_index}_{iteration}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = swimmer_design(parameter)  
        filename = f"GPTSwimmer_refine_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/noDiv_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


In [4]:
# print configuration info

In [5]:
logging.info(f"start!")

In [6]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

KeyboardInterrupt: 

In [22]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/Div_m50_r10\env\GPTrewardfunc_0.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_1.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_2.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_3.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_4.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_5.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_6.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_7.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_8.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_9.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_10.py


In [5]:
parameter_list

NameError: name 'parameter_list' is not defined

# enter coarse optimization stage

In [6]:
morphology_list = [f'results/noDiv_m25_r5/assets/GPTSwimmer_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/noDiv_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list = [[0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.75, 0.65, 0.55, 0.07, 0.07, 0.07],
 [0.5, 0.4, 0.3, 0.05, 0.05, 0.05],
 [0.8, 0.7, 0.6, 0.08, 0.07, 0.06],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.8, 0.6, 0.07, 0.07, 0.07],
 [0.6, 0.8, 0.5, 0.05, 0.05, 0.05],
 [0.4, 0.5, 0.6, 0.04, 0.04, 0.04],
 [1.5, 1.0, 0.5, 0.1, 0.1, 0.1],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.6, 0.7, 0.08, 0.09, 0.1],
 [1.0, 1.0, 1.0, 0.1, 0.1, 0.1],
 [0.5, 0.6, 0.4, 0.08, 0.07, 0.06],
 [0.5, 0.4, 0.3, 0.05, 0.05, 0.04],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.7, 0.5, 0.06, 0.06, 0.06],
 [1.2, 0.8, 0.6, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.04, 0.04, 0.04],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.3, 0.5, 0.4, 0.05, 0.05, 0.05],
 [0.9, 0.8, 0.7, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.75, 0.5, 0.35, 0.05, 0.04, 0.03],
 [0.6, 0.6, 0.6, 0.05, 0.05, 0.05],
 [0.75, 0.75, 0.75, 0.05, 0.05, 0.05],
 [0.5, 0.4, 0.3, 0.05, 0.04, 0.03],
 [0.8, 0.6, 0.4, 0.05, 0.05, 0.05],
 [0.6, 0.6, 0.6, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.05, 0.04, 0.03],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.7, 0.6, 0.1, 0.1, 0.1],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.8, 0.6, 0.4, 0.05, 0.05, 0.05],
 [0.5, 0.6, 0.7, 0.05, 0.05, 0.05],
 [0.5, 0.6, 0.7, 0.05, 0.05, 0.05],
 [0.5, 0.4, 0.3, 0.05, 0.05, 0.05],
 [0.6, 0.5, 0.4, 0.05, 0.04, 0.03],
 [0.6, 0.8, 1.0, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.06, 0.06, 0.06],
 [0.5, 0.6, 0.7, 0.03, 0.03, 0.03],
 [0.6, 0.7, 0.5, 0.08, 0.08, 0.08],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.05, 0.05, 0.05],
 [0.5, 0.5, 0.5, 0.1, 0.1, 0.1],
 [0.8, 0.6, 0.4, 0.05, 0.045, 0.04],
 [0.8, 0.6, 0.4, 0.05, 0.05, 0.05],
 [0.8, 0.7, 0.6, 0.05, 0.05, 0.05]]

material_list = [compute_swimmer_volume(parameter) for parameter in parameter_list]

In [12]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [14]:
for j, morphology in enumerate(morphology_list):
    for i, rewardfunc in enumerate(rewardfunc_list):

        
        # if j not in [32]:
        #     continue
            
        if i not in [0]:
            continue
        if j in [0, 1, 2, 3]:
            continue
        # if j not in [13]:
        #     continue
        # if j not in [18]:
        #     continue
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTSwimmer.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTSwimmerEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/Div_m50_r10/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
4 results/noDiv_m25_r5/assets/GPTSwimmer_4.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
5 results/noDiv_m25_r5/assets/GPTSwimmer_5.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/noDiv_m25_r5/env/GPTrewardfunc_0.py
6 results/noDiv_m25_r5/assets/GPTSwimmer_6.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
5

In [33]:
efficiency_matrix = np.array([[1368.4742015903275, 589.8751084268529, 1615.3932338057273,
        609.8713586210831, 2954.1675545853427, 764.0634151387345,
        2519.871735642786, 5934.473583622517, 55.10494905141997,
        3135.883041027847, 3120.091368893377, 244.70879972376696,
        17.29372894423567, 1046.4158326759643, 3159.0062933947047,
        2969.7272651710778, 1467.457984978194, 1778.2422810463538,
        6351.318902607162, 3032.8566713554446, 2790.6714237490105,
        1573.7541571507777, 3109.24103221815, 5758.114933190308,
        2614.1099909044788],
       [1200.7374877572554, 605.7576811381646, 1409.6807601190537,
        606.8116601544316, 3075.9470690942594, 708.4508194917718,
        2520.110021447044, 5919.605806116344, 50.53786447555113,
        3069.6255177353323, 3077.1713157400536, 258.2424696340783,
        22.26025702890422, 868.5868350261792, 3117.1386864749356,
        2959.7392216963694, 1276.8285618643731, 1715.1973639601633,
        6124.119850154302, 3135.6273465315226, 2839.9902191232404,
        1657.0265395061374, 3112.926554031736, 5677.742991781933,
        2553.40967403229],
       [1346.4647421830282, 379.23559451326696, 1805.284272520437,
        627.4722060428347, 3121.5923783598146, 772.3245570186907,
        2503.429515555621, 5902.214325231143, 54.758363744886715,
        3017.10304140356, 2970.844687723256, 257.35518646571205,
        20.576529656212273, 937.7525410472919, 3563.1666375186896,
        2996.026760471088, 1475.9097351916957, 1799.0663536189795,
        6333.752905767369, 2948.2972542220505, 2871.660108756409,
        1566.5649749978147, 3112.7381553114965, 5834.973528961034,
        2620.5252773617544],
       [1282.9016642245906, 536.1278427340677, 1468.1394655411032,
        561.5584801975904, 3012.314004736944, 759.640547615026,
        2517.831890264858, 5872.61672854994, 44.68359545180143,
        3106.458311511013, 3100.2629208476606, 251.21185096973468,
        18.030648936240535, 1018.3327425355768, 2899.311872310917,
        2977.789261611033, 1478.5583109289212, 1751.6119617156442,
        6413.623890354997, 3022.6138486031646, 2860.9010325479485,
        1585.3697985307374, 3091.1419535784225, 5766.073531388765,
        2622.31996120809],
       [1376.2781538260972, 274.3257905425478, 2468.482454840466,
        721.5552342042213, 3016.686951248101, 697.4991224745894,
        2488.749417092855, 5944.9355884631605, 46.84196677900785,
        3079.1767295798263, 2982.1375911300356, 247.89463774381144,
        9.700669176509976, 874.1747007253512, 3140.3177736820776,
        3065.275731203989, 1516.8704203036173, 1778.8443299376281,
        6221.996627556754, 3135.462461588441, 2830.708963465813,
        1947.748889111996, 3056.078122864581, 5814.0452926163725,
        2622.341630072435]], dtype=object)
mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)


平均值： 2338.880851379202
标准差： 1713.524728662616


In [22]:
fitness_matrix = np.array([[18.271551117959152, 20.249340625438766, 17.762176305939562,
        23.256012056969134, 39.4433621195742, 25.6408165588656,
        41.56115529659068, 49.51766351674205, 5.885988305956948,
        41.869585277892085, 41.6587385229385, 13.874857832296406,
        1.8472149613490543, 29.613009931153982, 31.248347121895463,
        39.65111177756375, 32.19732409181327, 39.10559020506587,
        52.99585007417734, 40.49400101201449, 30.685034952015013,
        32.13661424129832, 41.51386733649006, 59.29787785510196,
        41.06234371550596],
       [16.031969299319243, 20.794560486804606, 15.50024951963913,
        23.13933764124969, 41.069334039164715, 23.774541672392697,
        41.56508543048426, 49.39360573917872, 5.398159047993273,
        40.98493014710361, 41.085679897306306, 14.64220966503297,
        2.3777104382668854, 24.58054414746687, 30.83419995260125,
        39.51775373054726, 28.014746205249594, 37.7191600665168,
        51.100085256598376, 41.86617130409942, 31.22732343032554,
        33.83706562155269, 41.56307557157044, 58.4701962926621,
        40.10886536772475],
       [17.97768590502947, 13.01846886041672, 19.850137328669895,
        23.927179040082773, 41.6787796542676, 25.918048028549638,
        41.28996781795932, 49.24849000373963, 5.848968090960149,
        40.28366218748659, 39.666031345107484, 14.59190118478229,
        2.1978645297471853, 26.537896736504713, 35.24623175811737,
        40.00225655778118, 32.382762955176176, 39.563535478949746,
        52.849278165381676, 39.364983226467544, 31.575552054466005,
        31.989808609357382, 41.560560115419776, 60.0893784897727,
        41.16311479953022],
       [17.129006385325894, 18.40429465697657, 16.14303655786211,
        21.41371389483867, 40.21972007724573, 25.492391791244728,
        41.52751138950845, 49.00152558259725, 4.772840277050161,
        41.47671309303529, 41.393993669410335, 14.24357735368588,
        1.925928444076696, 28.818273459040306, 28.679494558159227,
        39.75875368991713, 32.44087504578872, 38.51995889606112,
        53.515727258773175, 40.35724121099451, 31.457249832750314,
        32.37380972986742, 41.272212623401984, 59.379836619271416,
        41.191305627466036],
       [18.375747683763805, 9.417105919002603, 27.14238220974737,
        27.514814377188, 40.278106647940206, 23.4070192277081,
        41.04784364816516, 49.60495921839268, 5.003384876229359,
        41.112455719198664, 39.816811580224346, 14.055493141099111,
        1.0361687346741753, 24.738677766317394, 31.063483498041457,
        40.92685280310451, 33.28147655861256, 39.11882995168338,
        51.916775947225595, 41.86396979846446, 31.125270694262422,
        39.773718406967525, 40.80404846324837, 59.87385656690603,
        41.19164600119122]], dtype=object)

In [23]:
none_coords = np.argwhere(efficiency_matrix == None)
print(none_coords)

[]


In [24]:
efficiency_matrix_select = efficiency_matrix[:10, :50]
efficiency_matrix_select.shape

(5, 25)

# print coarse optimization info

In [25]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [26]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)


top_k = max(1, int(len(sorted_values) * 0.1))

efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [27]:
coarse_best = efficiency_coarse_best
coarse_best

[(3, 18),
 (0, 18),
 (2, 18),
 (4, 18),
 (1, 18),
 (4, 7),
 (0, 7),
 (1, 7),
 (2, 7),
 (3, 7),
 (2, 23),
 (4, 23)]

# enter fine optimization stage

In [28]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_swimmer_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()

        # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTWalker.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_swimmer_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
        
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTWalker.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_swimmer_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
        
            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

[0.5, 0.5, 0.5, 0.04, 0.04, 0.04]
Successfully saved GPTSwimmer_refine_3_18_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[0.5, 0.5, 0.5, 0.04, 0.04, 0.04]
Successfully saved GPTSwimmer_refine_0_18_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[0.45, 0.45, 0.45, 0.035, 0.035, 0.035]
Successfully saved GPTSwimmer_refine_2_18_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

In [29]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [32]:
final_optimized_results

[{'best_morphology': 'results/noDiv_m25_r5/assets/GPTSwimmer_18.xml',
  'best_parameter': [0.5, 0.5, 0.5, 0.04, 0.04, 0.04],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 53.515727258773175,
  'best_material': 0.008344070087934492,
  'best_efficiency': 6413.623890354997,
  'best_iteration': 0},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTSwimmer_18.xml',
  'best_parameter': [0.5, 0.5, 0.5, 0.04, 0.04, 0.04],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTrewardfunc_0.py',
  'best_fitness': 52.99585007417734,
  'best_material': 0.008344070087934492,
  'best_efficiency': 6351.318902607162,
  'best_iteration': 0},
 {'best_morphology': 'results/noDiv_m25_r5/assets/GPTSwimmer_refine_2_18_0.xml',
  'best_parameter': [0.45, 0.45, 0.45, 0.035, 0.035, 0.035],
  'best_rewardfunc': 'results/noDiv_m25_r5/env/GPTSwimmer_refine_2_18_1.py',
  'best_fitness': 41.06518362642221,
  'best_material': 0.005734191990964771,
  'best_efficiency': 7161.459485