In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTHopper import GPTHopperEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        self.client = OpenAI(api_key=api_key)
        # self.model = "gpt-3.5-turbo"
        self.model = "gpt-4-turbo"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTHopper_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")
        messages.append({"role": "assistant", "content": initial_code})

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]
            # print(diverse_messages)
            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )
            diverse_code = self.extract_code(response.choices[0].message.content)
            messages.append({"role": "assistant", "content": diverse_code})

            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files


    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_hopper_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = hopper_design(parameter)  
            filename = f"GPTHopper_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_hopper_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = hopper_design(initial_parameter['parameters'])  

        filename = f"GPTHopper_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_hopper_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = hopper_design(diverse_parameter['parameters'])  
            filename = f"GPTHopper_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        reward_improve_prompts = prompts.reward_improve_prompts

        for reward_filename, fitness in zip(rewardfunc_list, fitness_list):
            with open(reward_filename, 'r') as f:
                reward_content = f.read()
            reward_improve_prompts += f"reward function:\n{reward_content}\nfitness: {fitness}\n"
            
        with open(best_rewardfunc, 'r') as f:
            best_reward_content = f.read()

        reward_improve_prompts += f"This is best reward function, please carefully review it :\n{best_reward_content}\nbest fitness: {max(fitness_list)}"
        # print(reward_improve_prompts)
        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        # print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTHopper_refine_{rewardfunc_index}_{morphology_index}_{iteration}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}"  

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = hopper_design(parameter)  
        filename = f"GPTHopper_refine_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:
folder_name = "results/Random_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [4]:
logging.info(f"start!")

In [5]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

In [8]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)

initial Saved: results/Div_m50_r10\env\GPTrewardfunc_0.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_1.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_2.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_3.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_4.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_5.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_6.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_7.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_8.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_9.py
Saved: results/Div_m50_r10\env\GPTrewardfunc_10.py


In [5]:
parameter_list = [[1.5271825670144064, 0.9526328520851862, 0.616022778379654, 0.19324518282609637, 0.03846675946914521, 0.15796568764161265, 0.067655052646565, 0.043277145759263384, 0.20400044117876676, 0.03442606703218554], [1.4174745110171612, 1.0384847446836305, 0.5861766973940479, 0.043962413492556356, -0.0859962293988614, 0.25686528826115285, 0.06938149032132647, 0.002801735594339376, 0.12007967805565226, 0.04585860140965338], [1.2481751801227547, 1.0738671881061892, 0.5763173806248475, 0.19622855138136808, -0.07779101543365519, 0.29212141406365055, 0.04367104473167603, 0.03675136419924517, 0.013289051941235655, 0.28942099768316987], [1.4461911333132214, 1.0462469755670856, 0.5273002794001652, 0.06700728742372054, 0.22125991450221516, 0.22301210193568807, 0.015522205740864853, 0.27019598440087084, 0.10497269938550569, 0.09629435465509542], [1.5545150512527486, 1.1331118737412664, 0.6938744046872379, -0.17153466375416124, 0.008857937382087594, 0.26238277839488633, 0.033163257856314604, 0.0814943947937221, 0.026768560327248024, 0.08518106098808217], [1.47104009872269, 0.9004743841555727, 0.5228723056306612, 0.06884478795840356, -0.1809953935926438, 0.3764299744820329, 0.05870528304190065, 0.11963277641838861, 0.1610653270892352, 0.06233169925115834], [1.2988907077482352, 0.774524650618551, 0.7055852989148749, 0.10361782258476164, -0.29398419355827343, 0.28186107899408036, 0.02530117253829688, 0.07253399444756263, 0.0192344549989737, 0.044880584463853276], [1.5885624827271925, 0.9432754653881806, 0.6018294151997702, 0.04456725160913599, -0.15976922401611565, 0.12320552581245978, 0.05587577533975029, 0.14188334902248834, 0.0899899067589248, 0.014914773092226441], [1.7447985826795753, 0.9670749997324908, 0.6148589835146091, -0.03154747976171446, -0.2223833446288399, 0.309904893434219, 0.09203906143947962, 0.12253865787998115, 0.08330325419505111, 0.037205285952354505], [1.215715788126377, 1.2198685645784608, 0.7642151983110639, 0.22015668752595885, -0.03310585027517919, 0.2965135620337498, 0.20425582188064967, 0.051352939727060974, 0.059595702656737456, 0.009257296862535846], [1.3950030526011548, 1.0849872037879384, 0.7688612632241335, 0.08728668945957167, -0.08475202636323328, 0.21371817355441258, 0.01845014393341657, 0.20789341737973216, 0.037812479099622716, 0.13710213469457816], [1.4578594040913966, 1.0688716589491816, 0.58241314543185, 0.21906474181088884, -0.03977767916112407, 0.3945631962032309, 0.09161785519597034, 0.037830998003657954, 0.02327792992474648, 0.09047268931040245], [1.4982991169832247, 1.0453796571616947, 0.4315083322087102, 0.15273789865398063, -0.0322860429585196, 0.07059061009167084, 0.03137857538474707, 0.06259918769044713, 0.13883813846254534, 0.04342689292390188], [1.2143621925910955, 1.0715236394581031, 0.582475467937073, 0.056482126314976004, -0.12092157494853624, 0.4362637512459009, 0.006715313460752312, 0.10058090193951326, 0.04665681316307723, 0.13389974871676028], [1.3813038564618851, 1.1004351998977944, 0.4835219058125005, 0.001676006262414903, -0.36043534626581625, 0.4768992508669653, 0.13133618727105287, 0.12904737086957818, 0.07832204718437677, 0.08208156944278389], [1.4287762161823698, 1.3385194989062315, 0.37779940578581284, 0.10684539363621005, 0.012000125737944373, 0.35230976687504506, 0.27764964186834573, 0.1301755381553104, 0.09987641155357971, 0.035469438039892595], [1.3023581461973917, 0.9192571059837862, 0.7186454904902544, 0.11561850851270418, -0.0779787310220604, 0.2966352225313652, 0.06338402726154217, 0.1748434510593924, 0.007816279960050375, 0.08669670362077046], [1.5723355691844012, 1.0894819559358164, 0.8237433978665937, 0.04334843812589682, -0.08616066218822774, 0.26320549855303793, 0.06681812320050902, 0.10836617657116696, 0.016369238176964195, 0.18424304780386297], [1.3577956011199732, 1.0028355202479395, 0.7269386296195454, 0.06414689506174191, -0.12204313585094179, 0.14571470996749097, 0.01589369443296939, 0.02688088193079638, 0.15630218577728666, 0.011357352409445798], [1.3104121163126903, 1.104930586765291, 0.7114555891602848, 0.07637314251573318, -0.04203022760643926, 0.3835771555828539, 0.12578143757708982, 0.03045042384695839, 0.029756626961873166, 0.11867313628298307], [1.3168346138235174, 0.9908495817550473, 0.5624303901520578, 0.23162832354714324, -0.005919184782258627, 0.23629675839097278, 0.04382031636734961, 0.2104964489086319, 0.22365618464138634, 0.09287085163511745], [1.4972253037463366, 0.9971161867611497, 0.7087558179988409, 0.2194789700398692, -0.15642616210738078, 0.45610529049369397, 0.11930778407727084, 0.057056635559475405, 0.013148459745135413, 0.0570945123584682], [1.4305735282435468, 1.1146287560659305, 0.561291239163021, -0.019312794041788026, -0.16254934700034074, 0.30913517177195965, 0.0037933120247948676, 0.00527958421365575, 0.022633781583714572, 0.008423531383942112], [1.44078850029609, 1.1812654266079294, 0.5837586034937173, 0.09569531348523933, -0.1425596817307759, 0.39599071694212706, 0.0647500415659675, 0.2170537004160374, 0.03439715543792103, 0.210157549689662], [1.4953773042081877, 1.080628698173189, 0.7178157958316235, 0.1525741306107714, 0.0018643370474073773, 0.2784579137689837, 0.06815975329444604, 0.053132980508422564, 0.04387148514608374, 0.10456826572024583]]

morphology_list = [f'results/Random_m25_r5/assets/GPTHopper_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/Random_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

material_list = [compute_hopper_volume(parameter) for parameter in parameter_list]
material_list

[0.10333100156528172,
 0.04162533259220436,
 0.20282674010291132,
 0.2265398125691865,
 0.023436756998529117,
 0.09349831036936306,
 0.008612441996509717,
 0.0580598844493239,
 0.06733023484071467,
 0.04763110967748263,
 0.1126259581762171,
 0.030835293026513517,
 0.039159641062597624,
 0.06528935278597184,
 0.09732516657394966,
 0.18609663908833657,
 0.05925080850497368,
 0.08728680954301027,
 0.06798525087141495,
 0.0475214976620529,
 0.20978988047293876,
 0.04053277582477646,
 0.0011542248815622845,
 0.251414574592124,
 0.02928829722749169]

# enter coarse optimization stage

In [6]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [None]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
#         if i not in [10]:
#             continue
# #         6
#         if j not in [50]:
#             continue
            
        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTHopper.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTHopperEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = f"results/Div_m25_r5/coarse/SAC_morphology{j}_rewardfunc{i}_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Random_m25_r5/env/GPTrewardfunc_0.py
0 results/Random_m25_r5/assets/GPTHopper_0.xml




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Random_m25_r5/env/GPTrewardfunc_0.py
1 results/Random_m25_r5/assets/GPTHopper_1.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Random_m25_r5/env/GPTrewardfunc_0.py
2 results/Random_m25_r5/assets/GPTHopper_2.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

In [8]:
efficiency_matrix

array([[-1.3026836451079569, 58.27152097670469, 3.9777515948039515,
        5.047467476710709, 24.190929220769725, 20.339405743469687,
        137.70115017115242, 34.15604804655244, 9.485411806603304,
        66.77366612648956, 15.545310624221452, 75.29998987079539,
        67.00229923583866, 41.607637916343876, 26.241685150759217,
        2.5830424016905584, 9.328819301841792, 34.519060068201846,
        31.395733070217062, 41.25298791121419, 6.495298994697599,
        65.72060682849106, 2135.746406435666, 2.47913359535999,
        -7.41921029073993],
       [-0.8875619402384343, 36.741015472620916, 3.9023539784420125,
        4.888167891056545, 3.7610803711389162, 21.192461102897457,
        132.16348654482107, 25.498080134345198, 27.260829530216693,
        62.74417067517929, 20.841661759825175, 59.15669869740726,
        62.68458903331753, 40.68793218733947, 27.323167071734552,
        3.9112560980508326, 9.330631829855335, 29.10131086595204,
        30.271199231922083, 28.89036832

In [22]:
efficiency_matrix = np.array([[-1.3026836451079569, 58.27152097670469, 3.9777515948039515,
        5.047467476710709, 24.190929220769725, 20.339405743469687,
        137.70115017115242, 34.15604804655244, 9.485411806603304,
        66.77366612648956, 15.545310624221452, 75.29998987079539,
        67.00229923583866, 41.607637916343876, 26.241685150759217,
        2.5830424016905584, 9.328819301841792, 34.519060068201846,
        31.395733070217062, 41.25298791121419, 6.495298994697599,
        65.72060682849106, 2135.746406435666, 2.47913359535999,
        -7.41921029073993],
       [-0.8875619402384343, 36.741015472620916, 3.9023539784420125,
        4.888167891056545, 3.7610803711389162, 21.192461102897457,
        132.16348654482107, 25.498080134345198, 27.260829530216693,
        62.74417067517929, 20.841661759825175, 59.15669869740726,
        62.68458903331753, 40.68793218733947, 27.323167071734552,
        3.9112560980508326, 9.330631829855335, 29.10131086595204,
        30.271199231922083, 28.890368325509694, 5.905623542576491,
        33.81638631811338, 497.3312676559088, 3.0027545286394255,
        96.94807752507384],
       [-0.6662744736137396, 49.31127017321401, 3.8608475043721615,
        4.683469543600267, 25.9546425979141, 19.632206762735642,
        437.00939818904976, 34.0727990197243, 25.349649299973432,
        39.47982304453591, 19.522922297885057, 61.041427100949385,
        58.53392465227802, 41.50482324494038, 25.728167649599857,
        -0.2813065999534791, 19.110638443762042, 33.37578846953706,
        32.805312608901794, 1.0065314593583103, 6.52925914579638,
        42.871093477019684, 2290.9913900357074, 9.430679135803086,
        93.32568199019086],
       [12.150655944692584, 65.52584853973428, 7.7252783445271636,
        4.558255165201852, -1.5545731208039963, 20.566127477919267,
        249.71382569818363, 24.1849037224594, 28.147758602530295,
        58.25967072993117, 20.23797245312459, 24.097827832765283,
        64.3594518929492, 37.586307091984175, 27.33143584942833,
        4.639462662354512, 9.26298758515489, 33.2362813926903,
        30.002142010257774, 22.04085207334053, 5.741512662996715,
        65.29663295318052, 912.381432823406, 9.193998737899722,
        97.76636485205526],
       [16.583691793371173, 63.28903996190426, 3.7583604352385565,
        4.699974493345609, 25.372606636902187, 20.459462288920193,
        367.9947663117034, 23.586707531845573, 24.784464690921695,
        45.71587443176508, 20.052023088165438, 111.23914774023554,
        63.21229934467965, 38.91894372898529, 27.035037765513213,
        2.1735406443906173, 11.296712832496311, 33.44993354141087,
        31.82184616970964, 26.11546587603766, 1.5346657985228427,
        64.33215158195398, 287.69579937126554, 6.2340769840140124,
        98.9353716495589]], dtype=object)


efficiency_matrix_select = efficiency_matrix[:5, :25]
mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("方差：", std)

# parameter_list[48]
# material_list[48]
# efficiency_matrix_select

平均值： 86.39868016406844
方差： 291.6842152039868


In [13]:
fitness_matrix = np.array([[-0.13460760577171718, 2.425571441308946, 0.8067943889132421,
        1.1434523361231088, 0.5669569297157974, 1.9017000709313343,
        1.1859431687017243, 1.98309620282839, 0.6386550044994881,
        3.180503814838431, 1.7508055041998685, 2.3218972525594754,
        2.623785988444201, 2.7165357505111536, 2.553976378478782,
        0.4806955095772779, 0.5527400860309303, 3.0130586217768656,
        2.1344467890706857, 1.9604037685754618, 1.3626479997336085,
        2.6638386236475013, 2.465131643015281, 0.6232903182344748,
        -0.2172960361884561],
       [-0.09171266423606213, 1.5293569888231722, 0.7915017361750201,
        1.1073646378466655, 0.08814752671032049, 1.9814593056893606,
        1.1382503619237638, 1.4804155862796833, 1.8354780542221791,
        2.9885744750521543, 2.347312125684933, 1.8241141388157232,
        2.4547060067011577, 2.6564887587109056, 2.6592317865844217,
        0.7278716144610213, 0.5528474797811707, 2.540160579008291,
        2.0579950739608064, 1.372913570836556, 1.238940057115295,
        1.3706720058361277, 0.5740321235073621, 0.754936252422455,
        2.8394441101882695],
       [-0.06884680867588858, 2.05259802150408, 0.7830831133462662,
        1.060992312580698, 0.6082926515509852, 1.8355781611377653,
        3.7637180938328094, 1.9782627739502294, 1.7067978404969693,
        1.8804677814818964, 2.1987878301991386, 1.8822302914143378,
        2.2921674793683415, 2.709823047158317, 2.503998202139809,
        -0.05235021280470968, 1.132320778839133, 2.9132660914882798,
        2.2302774076313807, 0.04783188239267864, 1.3697724957734647,
        1.7376844212670752, 2.6443192658241776, 2.3710101830427526,
        2.733350313087078],
       [1.255539448440229, 2.727535238852848, 1.56689302300806,
        1.032626270767354, -0.03643415246872831, 1.9228981700263814,
        2.150645839552144, 1.404172715544016, 1.8951951969481113,
        2.774972766311376, 2.2793210390770438, 0.7430635825257906,
        2.5202930351134096, 2.45398566365043, 2.6600365467508316,
        0.8633884086400007, 0.548839503591961, 2.901088963841654,
        2.0397031512471924, 1.047414300272906, 1.204511255303945,
        2.6466537856039776, 1.0530933512402232, 2.311505281489584,
        2.863410352638391],
       [1.7136094826589863, 2.634427337855578, 0.7622959952111954,
        1.0647313408024717, 0.5946516161683436, 1.9129251550797393,
        3.169333579878694, 1.3694415138389515, 1.6687438280411584,
        2.1774978290614264, 2.25837831367626, 3.4300917165897915,
        2.47537095307913, 2.540992647179107, 2.631189553861593,
        0.4044886088429912, 0.6693393687739178, 2.9197379782554815,
        2.163416195039285, 1.241046050571546, 0.32195735443801426,
        2.607560678396879, 0.3320656499552657, 1.5673378129104345,
        2.897648571184636]], dtype=object)

# print coarse optimization info

In [14]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [15]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.1))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [16]:
coarse_best = efficiency_coarse_best
coarse_best

[(2, 22),
 (0, 22),
 (3, 22),
 (1, 22),
 (2, 6),
 (4, 6),
 (4, 22),
 (3, 6),
 (0, 6),
 (1, 6),
 (4, 11),
 (4, 24)]

# enter fine optimization stage

In [17]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_hopper_volume(parameter)
    
    
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0
    print(f"Initial parameter:{parameter}")
    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )
        
        print("improved parameter", improved_parameter)
        shutil.copy(improved_morphology, "GPTHopper.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTHopperEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_hopper_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material

        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTHopper.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTHopperEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_hopper_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material
        print("improved_fitness", improved_fitness)


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })
    logging.info("____________________________________________")
    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

Initial parameter:[1.4305735282435468, 1.1146287560659305, 0.561291239163021, -0.019312794041788026, -0.16254934700034074, 0.30913517177195965, 0.0037933120247948676, 0.00527958421365575, 0.022633781583714572, 0.008423531383942112]
[1.5, 1.1, 0.6, 0.2, 0.0, -0.2, 0.1, 0.1, 0.1, 0.1]
Successfully saved GPTHopper_refine_2_22_0.xml
improved parameter [1.5, 1.1, 0.6, 0.2, 0.0, -0.2, 0.1, 0.1, 0.1, 0.1]




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Initial parameter:[1.4305735282435468, 1.1146287560659305, 0.561291239163021, -0.019312794041788026, -0.16254934700034074, 0.30913517177195965, 0.0037933120247948676, 0.00527958421365575, 0.022633781583714572, 0.008423531383942112]
[1.475, 1.1, 0.55, 0.02, -0.1, 0.3, 0.005, 0.005, 0.03, 0.01]
Successfully saved GPTHopper_refine_0_22_0.xml
improved parameter [1.475, 1.1, 0.55, 0.02, -0.1, 0.3, 0.005, 0.005, 0.03, 0.01]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
9

In [None]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [18]:
final_optimized_results

[{'best_morphology': 'results/Random_m25_r5/assets/GPTHopper_22.xml',
  'best_parameter': [1.4305735282435468,
   1.1146287560659305,
   0.561291239163021,
   -0.019312794041788026,
   -0.16254934700034074,
   0.30913517177195965,
   0.0037933120247948676,
   0.00527958421365575,
   0.022633781583714572,
   0.008423531383942112],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_2.py',
  'best_fitness': 2.6443192658241776,
  'best_material': 0.0011542248815622845,
  'best_efficiency': 2290.9913900357074,
  'best_iteration': 0},
 {'best_morphology': 'results/Random_m25_r5/assets/GPTHopper_22.xml',
  'best_parameter': [1.4305735282435468,
   1.1146287560659305,
   0.561291239163021,
   -0.019312794041788026,
   -0.16254934700034074,
   0.30913517177195965,
   0.0037933120247948676,
   0.00527958421365575,
   0.022633781583714572,
   0.008423531383942112],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_0.py',
  'best_fitness': 2.465131643015281,
  'best_material