In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTSwimmer import GPTSwimmerEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4o-mini"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTSwimmer_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")
        messages.append({"role": "assistant", "content": initial_code})

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]
            # print(diverse_messages)
            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )
            diverse_code = self.extract_code(response.choices[0].message.content)
            messages.append({"role": "assistant", "content": diverse_code})

            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files


    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_swimmer_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = swimmer_design(parameter)  
            filename = f"GPTSwimmer_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_swimmer_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = swimmer_design(initial_parameter['parameters'])  

        filename = f"GPTSwimmer_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_swimmer_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = swimmer_design(diverse_parameter['parameters'])  
            filename = f"GPTSwimmer_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        reward_improve_prompts = prompts.reward_improve_prompts

        for reward_filename, fitness in zip(rewardfunc_list, fitness_list):
            with open(reward_filename, 'r') as f:
                reward_content = f.read()
            reward_improve_prompts += f"reward function:\n{reward_content}\nfitness: {fitness}\n"
            
        with open(best_rewardfunc, 'r') as f:
            best_reward_content = f.read()

        reward_improve_prompts += f"This is best reward function, please carefully review it :\n{best_reward_content}\nbest fitness: {max(fitness_list)}"
        # print(reward_improve_prompts)
        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        # print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTSwimmer_refine_{rewardfunc_index}_{morphology_index}_{iteration}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = swimmer_design(parameter)  
        filename = f"GPTSwimmer_refine_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/Random_m25_r5"
log_file = os.path.join(folder_name, "parameter.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [4]:
logging.info(f"start!")

In [17]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

In [22]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/Div_m50_r10\env\GPTrewardfunc_0.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_1.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_2.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_3.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_4.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_5.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_6.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_7.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_8.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_9.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_10.py


In [5]:
parameter_list

NameError: name 'parameter_list' is not defined

# enter coarse optimization stage

In [5]:
morphology_list = [f'results/Random_m25_r5/assets/GPTSwimmer_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/Random_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list = [[1.0144399243114082, 1.0233782550263117, 1.0062631678998168, 0.10420200419476251, 0.183054265984641, 0.16234726494920265], [1.0945829331719297, 1.0222600644891968, 0.8802812039034764, 0.09605496479462078, 0.2683230264707348, 0.16179020397908633], [1.1689270786218349, 0.8412526865847495, 1.0546965812075615, 0.09963602225597848, 0.06119670941146188, 0.022747153648571022], [0.9334688664624419, 0.9167617749928556, 0.8474668275507773, 0.026400471523678817, 0.15674548040564024, 0.18647021646389367], [0.920381209267056, 1.2018157040214408, 1.004890643156458, 0.010245880978781804, 0.05477331953436708, 0.042254786913008685], [0.9528733186569758, 0.9487505947226735, 1.1447076361220043, 0.1660659775129264, 0.0007885081242811204, 0.17455172842182476], [1.0018720088639024, 0.8729771796985011, 1.0030680685482543, 0.13043835561374031, 0.029311299692105844, 0.05158943677733588], [1.0075454600152547, 1.0922715637996954, 0.9436036757952628, 0.20393876278196488, 0.07521381540267114, 0.025346200277526806], [0.9239915848510509, 1.1267643473420181, 0.8531113368275242, 0.1593316134945268, 0.21256357998909295, 0.07172557144504633], [0.9240577630335389, 0.996325636238215, 1.0903478414286225, 0.17905058925143785, 0.09740132195715892, 0.07781319927748256], [1.2264413348022578, 0.8959116108289008, 1.0731232502233516, 0.12129047644026206, 0.039532558517465705, 0.07303871741931162], [0.9499589294960672, 1.0443635215900973, 0.8959002259755786, 0.20403373416011142, 0.0018362510980602514, 0.12576967199620268], [0.9116067227516949, 1.1008325693953882, 0.9670844040244482, 0.16000577362239998, 0.048067423820788435, 0.024990005027512446], [1.1435330653125209, 0.9701601239940641, 1.0680206993650994, 0.22978829773292073, 0.00619220284640265, 0.18848830485276405], [1.094319734153812, 1.0412580934693654, 0.8481191886035822, 0.12076631842278668, 0.022131163476071358, 0.09056615092846423], [0.8205926817966938, 1.137441656291318, 0.9724089211312202, 0.04444775314596752, 0.19570067527786442, 0.1703832543635666], [1.1522554232662365, 1.0566133103477378, 1.0328877152036107, 0.0049864107109263145, 0.24660735622692018, 0.08086625251505411], [1.0472874946660489, 0.9780711160720507, 1.1023302483715605, 0.190922592270712, 0.1616373054036183, 0.08781351073064507], [0.8199628148587682, 1.1439104676348089, 1.2004244342250299, 0.14345727574984315, 0.1666556587702397, 0.15659119482424555], [0.9947790440687821, 0.9952939408070111, 0.9549028547047151, 0.16694855450481483, 0.05272649263264406, 0.11055080633205136], [0.8271383021829753, 0.862642663686985, 1.0146629103380085, 0.16150097457081955, 0.13807823036301176, 0.13106819051726928], [1.1376800727182739, 0.9770175291501374, 1.0111093441721482, 0.08717935958635, 0.141081074528017, 0.12767213566874297], [0.9990334237819416, 1.1047341461379017, 1.0218951787947308, 0.007790125429853925, 0.12743561192374644, 0.15952101598790708], [1.1269958577981947, 1.0195541053819188, 1.0532618235045816, 0.17896279128738274, 0.014357586612157872, 0.1730617998289075], [1.015270811381478, 0.9629183319787823, 0.9372233577607516, 0.21040680872775808, 0.2828345508762671, 0.0829992328360415]]

material_list = [compute_swimmer_volume(parameter) for parameter in parameter_list]

[0.2740134811589153,
 0.4377113600542984,
 0.05322074623669683,
 0.2087476386621088,
 0.018276324365335432,
 0.23358854650898325,
 0.0742716052092857,
 0.19034453537951646,
 0.30614093006505316,
 0.1733921679478914,
 0.08843131662542073,
 0.21268328378730691,
 0.10089859628490508,
 0.38789327284307085,
 0.0841315594891473,
 0.2831166980737967,
 0.28821939171893923,
 0.27659181970173197,
 0.2931389573071657,
 0.15822551961850215,
 0.2123092251158495,
 0.16328938547978852,
 0.16392167020993953,
 0.2588928518456206,
 0.5396692335367926]

In [8]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [9]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [10]:
        #     continue
        # if j not in [50]:
        #     continue
        
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTSwimmer.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTSwimmerEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/Div_m50_r10/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Random_m25_r5/env/GPTrewardfunc_0.py
0 results/Random_m25_r5/assets/GPTSwimmer_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Random_m25_r5/env/GPTrewardfunc_0.py
1 results/Random_m25_r5/assets/GPTSwimmer_1.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Random_m25_r5/env/GPTrewardfunc_0.py
2 results/Random_m25_r5/assets/GPTSwimmer_2.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79


array([[2490.827166969521, 4927.090190898689, 12864.936602454682,
        1324.404200715939, 21931.145365424352, 1580.9066859467548,
        610.2646656733313, 48.97118468739633, 13878.317918336126,
        3143.3351478849804, 1820.7228767113256, 5715.801254726394,
        2090.2517466843497, 3043.2168056297496, 4722.356797231127,
        1492.8134809229687, 235.04655180362352, 878.5020869527466,
        1617.2352618062444, 6978.065356984796, 10254.391114385744,
        3865.52761824445, 4989.111293599648, 1580.9962703747324,
        1867.3783833475068],
       [2530.1733817999525, 5087.527742382322, 14423.892038881017,
        833.5960685932044, 21286.47983375307, 1609.6318542863833,
        564.2726861099007, 48.43284886565554, 13925.193418189261,
        2889.0175399511113, 1839.985890462754, 5384.99995472673,
        2105.6067317404554, 2876.1343322828907, 4780.561422165743,
        1517.805311727424, 228.29442591302646, 879.4602549274284,
        1729.47861344265, 4704.61704840627

In [25]:
efficiency_matrix = np.array([
    [9.349223536678995, 4.29909915652142, 365.8373591600463, 28.659244524947642, 1836.2982977648405, 1.845201284704441,
     105.28435513297111, 39.12737597649926, 16.853122267652985, 10.370812680765804, 24.027893676072296, 4.86465167166655,
     119.57161348303467, 2.122156082074609, 29.92841745813013, 21.257791877661983, 27.897911383201475, 6.266116467512521,
     5.8107278499853825, 27.161404349309162, 3.178392755071865, 14.736761261671484, 44.32340502568701, 1.326316137526951,
     4.8347539751428625],
    [9.449208588698344, 5.0775171994426875, 519.9820048456195, 26.188019260082026, 1843.8368161467386, 2.7013814684045676,
     138.6655575405364, 42.54464628062717, 3.663206985602655, 13.051354613079315, 16.72221790065421, 4.121827906041355,
     124.48933467850117, 2.6792824431175792, 25.514679613562972, 20.007426808853474, 31.59893716776505, 7.668350023426346,
     6.003041729356452, 5.124709177439552, 4.131397247854335, 13.1256010479311, 47.35743193384867, 2.2893433551180085,
     2.5475968444002066],
    [10.314883130386013, 4.164614558789165, 577.5728206857841, 30.259596342933918, 1810.7287848183119, 2.8657670696713953,
     118.02853671874196, 30.06025691434022, 16.697833580358886, 10.272847418671857, 11.219919594495217, 3.1396332101150968,
     128.87661926729245, 1.894370783876984, 7.329067691636401, 20.71234934488688, 21.926664955249926, 5.768770476654584,
     5.984449790806349, 7.241902567888667, 2.854016806300829, 14.356059630290966, 45.04111494425229, 1.843389759746006,
     3.7361197712920906],
    [9.263337251589837, 4.5960928235602285, 614.2637116768304, 29.780968526394226, 1865.0601422421194, 3.0163512310276452,
     120.1516952341844, 40.661427070316336, 16.974466026503865, 27.093301931484262, 7.743023391582513, 3.2398140191794447,
     113.01049368920917, 1.766742338986215, 14.141150823435535, 20.723610085169472, 28.84001332898076, 4.575223565078694,
     6.828652328300931, 11.346153954122096, 4.421779240721017, 10.279530448595457, 46.917321328272074, 1.1894523184968604,
     3.5667107287849555],
    [10.694395360188027, 5.06466858892148, 630.4318368930936, 27.56650118697921, 1844.7936751539266, 3.695209992099368,
     102.196749135151, 44.65820106104261, 18.011980733739502, 24.317710596350114, 15.314897603972492, 4.030420794141582,
     93.71792080990676, 3.240101342724979, 7.429191947467709, 21.991497198819964, 26.706063720595143, 4.144759229949674,
     4.17191948200274, 3.9558595944588393, 3.242045042209685, 29.756220791661303, 45.42747402291198, 1.1976655815091686,
     2.492746561494977]
])


mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)


平均值： 116.43491650885679
标准差： 367.972331009723


In [6]:
fitness_matrix = np.array([[41.08211272784495, 48.541862849137985, 42.55845900608606,
        4.709955990323206, 63.203097217867956, 31.110562098761328,
        26.063735587161254, 1.7540667902059868, 66.76253310015595,
        39.19747868947427, 29.454030197474957, 28.057373798793382,
        23.410355129928647, 16.90942178418347, 30.620929014618174,
        25.725181349309633, 9.599466765172394, 26.908983099706994,
        11.069139571181273, 37.99854736040631, 64.36580934439742,
        39.36247054765333, 46.77048607753694, 27.794471717754437,
        18.886354889981657],
       [41.731064069998666, 50.12249915134245, 47.7156349085935,
        2.9645034307940388, 61.34524357674144, 31.675842859081598,
        24.09946850453609, 1.7347844919948756, 66.98803068055051,
        36.0261308858354, 29.765650046923852, 26.433556714610727,
        23.582327550833632, 15.98103968227733, 30.998342193031625,
        26.155857644715525, 9.323705186950223, 26.938332291041107,
        11.837387305164581, 25.618649952525846, 62.821165686566125,
        40.71940122081457, 46.640800404187985, 27.065851627213338,
        20.4593909154512],
       [41.774242540890235, 47.67305339678581, 48.66260124747682,
        7.831618957373732, 62.85958762635973, 28.980167955530575,
        23.477865290181036, 4.213472180350194, 67.92629360462526,
        37.85648805810652, 29.553784477983115, 27.768699471882975,
        23.606947950765875, 16.93595761592918, 30.77084217816421,
        20.904907844543715, 5.77766922099816, 26.877082919317797,
        12.272936256687423, 39.197222009011924, 62.2640334640326,
        41.23114010062252, 46.57555321407236, 27.88767069343342,
        19.363407193306802],
       [35.92815440219731, 49.40067617026897, 42.81902227843056,
        6.002201485044074, 62.36103489963388, 28.891538520445803,
        24.245641980822082, 0.9473135180531514, 66.6518356086019,
        38.339263693211194, 29.413446096063304, 27.568998384498222,
        23.58994358904029, 16.975427312275702, 30.51316754629407,
        19.79278621967225, 9.009560845134617, 26.739856487630632,
        14.430727745471843, 52.96737419090613, 69.76656855345816,
        41.32132634215914, 47.057737039621934, 27.717048692632634,
        23.714887375682675],
       [41.443401773577904, 49.2611018801321, 48.46061123165536,
        6.510686623788556, 62.97493417720282, 30.507490346658628,
        29.074708311898792, 2.858917127040062, 67.05103434823346,
        38.9139396068946, 29.77861726344957, 26.542755505196236,
        23.567857862489696, 14.894435671020679, 29.414718757148815,
        22.78715553275213, 7.362335798947921, 26.962760368117518,
        12.427085868889266, 37.576687631299905, 64.59699529237916,
        41.43119789921135, 46.66176743651749, 27.988435977082354,
        20.509946869369934]], dtype=object)

In [11]:
none_coords = np.argwhere(efficiency_matrix == None)
print(none_coords)

[]


In [21]:
efficiency_matrix_select = efficiency_matrix
efficiency_matrix_select

array([[9.349223536678995, 4.29909915652142, 365.8373591600463,
        28.659244524947642, 1836.2982977648405, 1.845201284704441,
        105.28435513297111, 39.12737597649926, 16.853122267652985,
        10.370812680765804, 24.027893676072296, 4.86465167166655,
        119.57161348303467, 2.122156082074609, 29.92841745813013,
        21.257791877661983, 27.897911383201475, 6.266116467512521,
        5.8107278499853825, 27.161404349309162, 3.178392755071865,
        14.736761261671484, 44.32340502568701, 1.326316137526951,
        4.8347539751428625],
       [9.449208588698344, 5.0775171994426875, 519.9820048456195,
        26.188019260082026, 1843.8368161467386, 2.7013814684045676,
        138.6655575405364, 42.54464628062717, 3.663206985602655,
        13.051354613079315, 16.72221790065421, 4.121827906041355,
        124.48933467850117, 2.6792824431175792, 25.514679613562972,
        20.007426808853474, 31.59893716776505, 7.668350023426346,
        6.003041729356452, 5.1247091774395

# print coarse optimization info

In [13]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [14]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)


top_k = max(1, int(len(sorted_values) * 0.05))

efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]




In [15]:
logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")

In [16]:
coarse_best = efficiency_coarse_best
coarse_best

[(3, 4), (4, 4), (1, 4), (0, 4), (2, 4), (4, 2)]

# enter fine optimization stage

In [17]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_swimmer_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0

    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()

        # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )

        shutil.copy(improved_morphology, "GPTWalker.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTSwimmerEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_swimmer_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
        
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTWalker.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTSwimmerEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_swimmer_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
        
            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

[0.95, 1.22, 1.05, 0.01, 0.05, 0.04]
Successfully saved GPTSwimmer_refine_3_4_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[0.93, 1.21, 1.0, 0.01, 0.055, 0.043]
Successfully saved GPTSwimmer_refine_4_4_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
[0.925, 1.215, 1.01, 0.008, 0.05, 0.04]
Successfully saved GPTSwimmer_refine_1_4_0.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85


In [18]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [20]:
final_optimized_results

[{'best_morphology': 'results/Random_m25_r5/assets/GPTSwimmer_4.xml',
  'best_parameter': [0.920381209267056,
   1.2018157040214408,
   1.004890643156458,
   0.010245880978781804,
   0.05477331953436708,
   0.042254786913008685],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 34.08644412047561,
  'best_material': 0.018276324365335432,
  'best_efficiency': 1865.0601422421194,
  'best_iteration': 0},
 {'best_morphology': 'results/Random_m25_r5/assets/GPTSwimmer_4.xml',
  'best_parameter': [0.920381209267056,
   1.2018157040214408,
   1.004890643156458,
   0.010245880978781804,
   0.05477331953436708,
   0.042254786913008685],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_4.py',
  'best_fitness': 33.71604759423241,
  'best_material': 0.018276324365335432,
  'best_efficiency': 1844.7936751539266,
  'best_iteration': 0},
 {'best_morphology': 'results/Random_m25_r5/assets/GPTSwimmer_4.xml',
  'best_parameter': [0.920381209267056,
   1.2