In [1]:
import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTWalker import GPTWalkerEnv
import os

In [None]:
import prompts
class DGA:
    def __init__(self):
        api_key = "<api_key>"
        self.client = OpenAI(api_key=api_key)
        self.model = "gpt-4-turbo"

    def extract_code(self, text):
        match = re.search(r'```python\n(.*?)\n```', text, re.DOTALL)
        return match.group(1).strip() if match else None

    def indent_code(self, code):
        return "\n".join(line if line.strip() else line for line in code.split("\n"))

    def generate_rewardfunc(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        responses = self.client.chat.completions.create(
            model=self.model, messages=messages, n=rewardfunc_nums
        )
        files = []
        for i, choice in enumerate(responses.choices):
            reward_code = self.extract_code(choice.message.content)
            if reward_code:
                full_code = self.indent_code(reward_code) + "\n"
                file_name =  f"GPTWalker_{i}.py"
                file_path = os.path.join(folder_name, "env", file_name)
                with open(file_path, "w") as fp:
                    fp.write(full_code)

                with open(file_path, "w") as fp:
                    fp.write(full_code)
                files.append(file_path)
                print(f"Saved: {file_path}")
        return files
    
    def generate_rewardfunc_div(self, rewardfunc_nums, folder_name):

        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + zeroshot_rewardfunc_format}
        ]

        # 生成初始 Reward Function
        response = self.client.chat.completions.create(
            model=self.model, messages=messages, n=1, timeout=10
        )

        rewardfunc_files = []

        initial_code = self.extract_code(response.choices[0].message.content)
        if initial_code:
            reward_code = "import numpy as np\n" + self.indent_code(initial_code) + "\n"

            file_path = os.path.join(folder_name, "env", "GPTrewardfunc_0.py")
            with open(file_path, "w") as fp:
                fp.write(reward_code)
            rewardfunc_files.append(file_path)
            print(f"initial Saved: {file_path}")

        # 生成不同的多样化 Reward Functions
        for i in range(1, rewardfunc_nums):
            diverse_messages = messages + [
                {"role": "user", "content": rewardfunc_div_prompts + zeroshot_rewardfunc_format}
            ]

            response = self.client.chat.completions.create(
                model=self.model, messages=diverse_messages, n=1
            )

            diverse_code = self.extract_code(response.choices[0].message.content)
            if diverse_code:
                reward_code =  "import numpy as np\n" + self.indent_code(diverse_code) + "\n"
                file_path = os.path.join(folder_name, "env", f"GPTrewardfunc_{i}.py")
                with open(file_path, "w") as fp:
                    fp.write(reward_code)
                rewardfunc_files.append(file_path)
                print(f"Saved: {file_path}")

        return rewardfunc_files

    def generate_morphology(self, morphology_nums, folder_name):
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=morphology_nums                                                                                                                                                                                                                                                                                   
        )

        # 解析所有 response 里的参数
        for i, choice in enumerate(responses.choices):
            print(f"Response {i}:")
            print(json.dumps(choice.message.content, indent=4))

        parameter_list = [json.loads(choice.message.content).get('parameters', []) for choice in responses.choices]
        material_list = [compute_walker_volume(parameter) for parameter in parameter_list]

        xml_files = []
        for i, parameter in enumerate(parameter_list):
            if not isinstance(parameter, list):
                print(f"Skipping invalid parameter {i}: {parameter}")
                continue

            xml_file = walker_design(parameter)  
            filename = f"GPTWalker_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            xml_files.append(file_path)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            print(f"Successfully saved {filename}")
            
        return xml_files, material_list, parameter_list
    
    def generate_morphology_div(self, morphology_nums, folder_name):

        material_list = []
        xml_files = []
        parameter_list = []
        
        # 生成初始 morphology
        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_prompts + morphology_format}
        ]
        
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
            n=1
        )
        

        initial_parameter = json.loads(response.choices[0].message.content)
        parameter_list.append(initial_parameter['parameters'])
        material_list.append(compute_walker_volume(initial_parameter['parameters']))
        messages.append({"role": "assistant", "content": json.dumps(initial_parameter)})

        logging.info(f"generate initial_parameter{initial_parameter['parameters']}" )

        xml_file = walker_design(initial_parameter['parameters'])  

        filename = f"GPTWalker_0.xml"
        file_path = os.path.join(folder_name, "assets", filename)
        with open(file_path, "w") as fp:
            fp.write(xml_file)

        xml_files.append(file_path)

        # 生成不同的多样化设计
        for i in range(1, morphology_nums):
            diverse_messages = messages + [
                {"role": "user", "content": morphology_div_prompts + morphology_format}
            ]
            
            response = self.client.chat.completions.create(
                model=self.model,
                messages=diverse_messages,
                response_format={'type': 'json_object'},
                n=1
            )

            diverse_parameter = json.loads(response.choices[0].message.content)
            material_list.append(compute_walker_volume(diverse_parameter['parameters'])) 
            parameter_list.append(diverse_parameter['parameters'])
            messages.append({"role": "assistant", "content": json.dumps(diverse_parameter)})
            logging.info(f"generate diverse_parameter{ diverse_parameter['parameters']}")
            xml_file = walker_design(diverse_parameter['parameters'])  
            filename = f"GPTWalker_{i}.xml"
            file_path = os.path.join(folder_name, "assets", filename)
            with open(file_path, "w") as fp:
                fp.write(xml_file)
            xml_files.append(file_path)

        return xml_files, material_list, parameter_list


    def improve_rewardfunc(self, best_rewardfunc, rewardfunc_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        reward_improve_prompts = prompts.reward_improve_prompts

        for reward_filename, fitness in zip(rewardfunc_list, fitness_list):
            with open(reward_filename, 'r') as f:
                reward_content = f.read()
            reward_improve_prompts += f"reward function:\n{reward_content}\nfitness: {fitness}\n"
            
        with open(best_rewardfunc, 'r') as f:
            best_reward_content = f.read()

        reward_improve_prompts += f"This is best reward function, please carefully review it :\n{best_reward_content}\nbest fitness: {max(fitness_list)}"
        # print(reward_improve_prompts)
        messages = [
            {"role": "system", "content": "You are a reinforcement learning reward function designer"},
            {"role": "user", "content": rewardfunc_prompts + rewardfunc_format}
        ]

        response = self.client.chat.completions.create(
            model=self.model, messages=messages
        )

        # print(response)
        reward_code = self.extract_code(response.choices[0].message.content)

        if reward_code:
            full_code = "import numpy as np \n" + self.indent_code(reward_code) + "\n"
            file_name =  f"GPTWalker_{rewardfunc_index}_{morphology_index}_{iteration}.py"
            file_path = os.path.join(folder_name, "env", file_name)
            with open(file_path, "w") as fp:
                fp.write(full_code)

        return file_path

    def improve_morphology(self, best_parameter, parameter_list, fitness_list, folder_name, rewardfunc_index, morphology_index, iteration):
        morphology_improve_prompts = prompts.morphology_improve_prompts
        for parameter_content, fitness in zip(parameter_list, fitness_list):
            morphology_improve_prompts = morphology_improve_prompts + f"parameter:{parameter_content} \n" + f"fintess:{fitness}"
        morphology_improve_prompts = morphology_improve_prompts + f"best parameter:{best_parameter} \n" + f"best fintess:{max(fitness_list)}" 

        messages = [
            {"role": "system", "content": "You are a helpful mujoco robot designer"},
            {"role": "user", "content": morphology_improve_prompts + morphology_format}
        ]
        
        responses = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            response_format={'type': 'json_object'},
        )
        # print(responses)
        parameter = json.loads(responses.choices[0].message.content).get('parameters', []) 
        print(parameter)
        xml_file = walker_design(parameter)  
        filename = f"GPTWalker_refine_{rewardfunc_index}_{morphology_index}_{iteration}.xml"
        file_path = os.path.join(folder_name, "assets", filename)

        with open(file_path, "w") as fp:
            fp.write(xml_file)

        print(f"Successfully saved {filename}")
        return file_path, parameter


# Configuration

In [3]:

folder_name = "results/Random_m25_r5"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

# folder_name = setup_logging(div_flag=True)

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 25
rewardfunc_nums = 5

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []
designer = DGA()


# print configuration info

In [4]:
logging.info(f"start!")

In [None]:
designer = DGA()
morphology_list, material_list, parameter_list = designer.generate_morphology_div(morphology_nums, folder_name)

In [10]:
morphology_list

NameError: name 'morphology_list' is not defined

In [6]:
designer = DGA()
rewardfunc_list = designer.generate_rewardfunc_div(rewardfunc_nums, folder_name)


KeyboardInterrupt



In [1]:
# Extracting all the parameters from the provided log and storing them in a list

rewardfunc_list

NameError: name 'rewardfunc_list' is not defined

# enter coarse optimization stage

In [20]:
morphology_list = [f'results/Random_m25_r5/assets/GPTWalker_{i}.xml' for i in range(0,25) ]
rewardfunc_list = [f'results/Random_m25_r5/env/GPTrewardfunc_{i}.py' for i in range(0,5)]

parameter_list = [[1.6542839931804645, 1.1035237480730757, 0.5618361738044225, 0.12270854677728196, -0.29159818346698674, 0.16645863469485728, 0.08117228157724517, 0.011406181638938806, 0.05413760643504972, 0.02639985050641873], [1.3384983432472506, 0.9060124471082429, 0.5518854291704354, -0.02751613569122363, -0.0990166876564323, 0.31105954409417347, 0.048917484351349384, 0.20060844412005124, 0.05839369397471091, 0.1205156400143031], [1.469769487034134, 0.8254671963326508, 0.5838015469970552, 0.10168849019888841, -0.1101533599350925, 0.15263234069113113, 0.06506598804759936, 0.0037894950339307024, 0.013911375703935882, 0.13301755864507142], [1.4510279711580665, 1.1112424107442354, 0.6964196730742477, 0.021558746949354807, -0.08156348092128643, 0.1673818710678148, 0.06554416679498086, 0.07455051068547931, 0.023121453051428282, 0.08784569582355808], [1.3537428625535848, 0.8950463338961956, 0.588385028345365, 0.24119152171881897, -0.06606170402388223, 0.32338345580621836, 0.014116346520514661, 0.03392725970162329, 0.02934353614621374, 0.01371937266486075], [1.424012505141681, 1.2764758734481398, 0.7495276183580368, 0.1287589953104567, -0.24172159834358012, 0.37241104932538033, 0.013897390587900536, 0.031237000167488895, 0.07475696985424453, 0.14748271190856396], [1.4767185491232802, 0.9513232357497319, 0.74546051108378, 0.0895268377162888, -0.09527021126871202, 0.3694407163127215, 0.12007590282918353, 0.010363166490070075, 0.05299713720781695, 0.06552955242047798], [1.3708454531204666, 1.3143324595245536, 0.5814456862293728, 0.1780858069946838, -0.19967387122215519, 0.3737770635758953, 0.08174817213542643, 0.027246984049151025, 0.022563463636437898, 0.08847394067703547], [1.4918556562210201, 1.048623870600997, 0.7790846452274685, 0.26021962846882774, -0.001866845264050454, 0.09883391861100344, 0.16950018122497812, 0.05306129609240244, 0.13780418385875065, 0.22774322479865686], [1.4927622737340818, 1.0077627084230905, 0.6129333826034216, 0.18110844807299822, -0.055008471683813745, 0.2354664825719876, 0.0917470618053746, 0.14755744651443878, 0.007549279950716831, 0.011315999750375866], [1.4459348631753688, 0.9720850824025165, 0.5342931624318786, 0.17239175056578832, -0.10774467409988563, 0.13416971281143716, 0.06758932412358507, 0.17400986829812215, 0.046524806383659335, 0.003937625289161695], [1.3583263213981474, 1.37057763103056, 0.6309991307119949, 0.057354281930894084, -0.05529266969383881, 0.2946597186481536, 0.0327365608444269, 0.0701422382858228, 0.04441078203257901, 0.07082337962329954], [1.5631021469128485, 1.0269622956925186, 0.6221941393010916, 0.19725907685715138, -0.26672934079846855, 0.2797180576501939, 0.05649841929524345, 0.03972185074741234, 0.040211883635682684, 0.061746265405334205], [1.4247203315396668, 0.9472563942291545, 0.692301202178359, 0.05377185277889589, -0.14621109678452104, 0.2715160160784335, 0.09634823594912734, 0.05252092491932995, 0.08439252396436646, 0.11990764629337299], [1.3785774230000116, 0.9638202557835104, 0.698510365935389, 0.2146130521317019, -0.03703005874832349, 0.17013476120960364, 0.15452697846319, 0.11124547746720546, 0.09886774192242406, 0.07276293340754604], [1.4339139057134451, 0.9469964797131395, 0.6449607889011596, 0.1465751624591375, -0.10829456886448682, 0.19578019461024002, 0.07717274500039274, 0.07232705276797549, 0.019789432930079558, 0.04642249964794932], [1.3535922122768764, 1.1995868414922306, 0.6997931645051536, 0.11502426776118381, -0.3827645066400518, 0.2830937376389187, 0.121551328471495, 0.11673637372031757, 0.03102871497654942, 0.03651852548642953], [1.5383780941694885, 1.117334755885589, 0.5732176082898461, 0.12323512553622244, -0.05970093223089841, 0.37346098420890067, 0.139179352737133, 0.034890192932852046, 0.023826153014595404, 0.02676973766782667], [1.433117168331749, 1.1770639947111046, 0.6689940870332919, 0.011428789850632934, -0.13485661001983792, 0.2581423337158156, 0.036105287448116294, 0.04712683886735782, 0.08658485144936146, 0.14197491507483778], [1.189161618039773, 0.974987305385768, 0.7033943403030232, -0.03447999677731245, -0.19344984656038966, 0.21740663899267051, 0.1868774696706798, 0.10223035181009749, 0.014322309227809332, 0.1259469358259745], [1.517736107349144, 1.0343188948111885, 0.524405991873785, 0.12562577585129436, -0.2184624431805363, 0.2716972266838262, 0.0448355061014339, 0.2460866262141499, 0.02305234926255611, 0.07193508581626953], [1.5755376025433427, 1.2095331043852864, 0.5835292830912424, 0.058575725399605656, -0.24494518408155752, 0.15456720736098778, 0.04793061129131573, 0.1251369518050076, 0.15451424629873714, 0.05390530638701106], [1.4299248975153611, 1.0518696330500625, 0.6181498012225723, 0.2963396039410072, -0.14880007620913127, 0.25963553685385915, 0.14324084898372036, 0.1891537444644361, 0.08568388997823828, 0.03489985156123112], [1.445719014093105, 0.974812334737031, 0.6345208366077826, 0.12380600968872862, -0.38600198956408344, 0.34998808569709927, 0.00437650934729026, 0.13873581199546675, 0.06180784986980279, 0.01689746318010435], [1.387725075570684, 1.274681239830318, 0.6188833105033241, 0.09025985919614699, -0.1396784291236139, 0.32361267447491904, 0.3073140205382789, 0.0667728787846213, 0.004614327181601423, 0.1629152231739942]]
material_list = [compute_walker_volume(parameter) for parameter in parameter_list]

In [11]:
logging.info(f'folder_name:{folder_name}')
logging.info(f'morphology_nums:{morphology_nums}')
logging.info(f'rewardfunc_nums:{rewardfunc_nums}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'morphology_list:{morphology_list}')
logging.info(f'material_list:{material_list}')
logging.info(f'_________________________________enter coarse optimization stage_________________________________')

In [29]:
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):
        # if i not in [10]:
        #     continue
        # if j not in [50]:
        #     continue
        if i not in [1] and j <9:
            continue

        print(i, rewardfunc)
        print(j, morphology)
        shutil.copy(morphology, "GPTWalker.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTWalkerEnv._get_rew = _get_rew

        model_path = Train(j,  i, folder_name, total_timesteps=5e5)
        # model_path = "results/Div_m50_r10/coarse/SAC_morphology50_rewardfunc0_500000.0steps"
        fitness, reward = Eva(model_path)
        material = material_list[j]
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
        
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

        if fitness > best_fitness:
            best_fitness = fitness
            best_morphology = morphology
            best_efficiency = efficiency
            best_rewardfunc = rewardfunc
            best_material = material

0 results/Random_m25_r5/env/GPTrewardfunc_0.py
9 results/Random_m25_r5/assets/GPTWalker_9.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Random_m25_r5/env/GPTrewardfunc_0.py
10 results/Random_m25_r5/assets/GPTWalker_10.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
0 results/Random_m25_r5/env/GPTrewardfunc_0.py
11 results/Random_m25_r5/assets/GPTWalker_11.xml
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47


In [36]:
efficiency_matrix

array([[87.99867271420294, 10.754144551826124, 39.03539471003291,
        8.719478787412434, 176.89263217557945, 110.96461039629338,
        5.326080045216254, 340.75897179003545, 5.976374362068454,
        21.777336326559993, 9.998054942981014, 27.85178859490261,
        36.246473531626656, 16.711820382020974, 13.467187034483503,
        26.069260950413334, 22.593931123304372, 45.570188289025374,
        11.875070348792654, 12.515168774269334, 1.2885434757726646,
        9.690726694238265, 9.045470390257606, 10.131929540757637,
        8.077321501486557],
       [71.3459423725038, 6.597811474708325, 32.952381330044524,
        67.083922994349, 215.43575206559953, 78.11518782777298,
        43.46206299796901, 83.01060733662823, 7.7098577583662005,
        15.211671150099944, 14.204567115312157, 85.01823947594156,
        82.19520739492373, 18.08608042005328, 15.169118752726273,
        26.43801283527698, 6.514345800031634, 43.8835375116767,
        41.31373138179925, 16.56359223112449,

In [37]:
efficiency_matrix = np.array([[87.99867271420294, 10.754144551826124, 39.03539471003291,
        8.719478787412434, 176.89263217557945, 110.96461039629338,
        5.326080045216254, 340.75897179003545, 5.976374362068454,
        21.777336326559993, 9.998054942981014, 27.85178859490261,
        36.246473531626656, 16.711820382020974, 13.467187034483503,
        26.069260950413334, 22.593931123304372, 45.570188289025374,
        11.875070348792654, 12.515168774269334, 1.2885434757726646,
        9.690726694238265, 9.045470390257606, 10.131929540757637,
        8.077321501486557],
       [71.3459423725038, 6.597811474708325, 32.952381330044524,
        67.083922994349, 215.43575206559953, 78.11518782777298,
        43.46206299796901, 83.01060733662823, 7.7098577583662005,
        15.211671150099944, 14.204567115312157, 85.01823947594156,
        82.19520739492373, 18.08608042005328, 15.169118752726273,
        26.43801283527698, 6.514345800031634, 43.8835375116767,
        41.31373138179925, 16.56359223112449, 6.25408910634512,
        3.0894309437726553, 8.047195558457243, 10.909551178010249,
        4.266093675752428],
       [50.70468525352133, 10.181276273883523, 18.785620407240238,
        59.76172203104406, 300.4882312840252, 103.05444922152091,
        40.137064468987795, 86.1126270293572, 7.522859893355855,
        17.444240443918076, 11.665676627594507, 29.897804133293764,
        73.48165915912544, 9.806835640222081, 11.431014558272507,
        121.38720471588579, 22.001963792021332, 29.388019958661808,
        12.106071709213499, 0.40969106541923755, 0.610809582627209,
        1.2900865712452052, 8.411854960815221, 9.457154751068785,
        8.074240209486096],
       [95.3362964667801, 4.204641738385599, 18.566091646329532,
        2.948077447142652, 361.33866480637755, 68.70831300705977,
        39.20082947170765, 210.61711936096836, 7.116665408229884,
        13.914259029253797, 9.216518998718055, 72.22874777428497,
        64.24528102994576, 15.411852585472154, 10.962536565666682,
        46.48146696799726, 17.11689109065314, 34.86230333175377,
        15.81708483990798, 19.3937386647672, 2.4026414150886333,
        2.6002938230289425, 8.619578293554554, 11.264787029319248,
        8.11384242879852],
       [92.20612995622729, 9.614710014469495, 16.904886059758596,
        14.308329599316233, 99.59551100974463, 120.82709018150408,
        43.43364207820739, 313.3722127209206, 7.661658626879049,
        18.100806449263445, 9.336313499695434, 25.950446276121244,
        79.45337915958494, 23.112446384544384, 12.265396276261004,
        4.9043136767612605, 15.557296584336575, 35.96268560664117,
        15.653595922971471, 0.13842248743043137, 3.8699452621771715,
        6.234173286338546, 8.562263395058796, 10.031181407269989,
        8.097463181708209]], dtype=object)


mean = np.mean(efficiency_matrix)

std = np.std(efficiency_matrix)

print("平均值：", mean)
print("标准差：", std)

平均值： 42.17336246551196
标准差： 65.42764422720262


In [38]:
fitness_matrix = np.array([[2.2591115117302487, 2.4421336617264733, 2.3142535958288044,
        0.382282324991187, 0.9580850773768905, 15.500707972084504,
        0.3137961678529153, 14.466926614839714, 1.6855365777615685,
        2.1209964890120485, 1.4128748297668963, 1.3310279434674062,
        1.1089356115254476, 1.8231363264502736, 1.705863900453882,
        0.7919704507502163, 1.8203305537796763, 2.062639344320297,
        1.4175599012371303, 1.7055999859293243, 0.44170045264381974,
        1.9320436959717089, 1.9400495083022917, 0.8019512081285056,
        2.33866672418469],
       [1.831600804394316, 1.4982816548968867, 1.9536158799126233,
        2.9411159401902522, 1.1668421496642383, 10.91195391376137,
        2.5606503657402153, 3.5242164227806048, 2.1744366188879978,
        1.481535694611938, 2.0073179692864613, 4.062994089636046,
        2.5147051201386263, 1.9730579591660649, 1.9214444720872763,
        0.8031729392682432, 0.5248428276145695, 1.9862966214984643,
        4.931734066171236, 2.2573297400852947, 2.143842284786666,
        0.6159409678331734, 1.7259420586034053, 0.8635006503105687,
        1.2351831383585246],
       [1.301696203156495, 2.3120423375345047, 1.1137248617639004,
        2.6200935400502563, 1.6275030043922019, 14.395733170259176,
        2.3647517334153, 3.655912709504199, 2.121696994101647,
        1.698976044646352, 1.6485347373390153, 1.428806362439951,
        2.24812018097233, 1.0698534267667825, 1.4479456645689712,
        3.687679501808992, 1.7726373828085356, 1.3301873109223976,
        1.4451351707762372, 0.05583377164286123, 0.20937971764114557,
        0.257205027638846, 1.804153280764319, 0.7485421851355202,
        2.3377745825177216],
       [2.4474837880034217, 0.9548223083041937, 1.1007098729800195,
        0.12925060410431027, 1.95708084826652, 9.597902352591728,
        2.3095916622443524, 8.941752563749127, 2.007136623400925,
        1.355174669009362, 1.3024321016058116, 3.451788429373172,
        1.9655396253763473, 1.6813194292525628, 1.388604415821398,
        1.4120825448867362, 1.3790605834810612, 1.5779693081281723,
        1.8881290438670024, 2.6430295100574943, 0.8236026339344649,
        0.5184215226546685, 1.8487052534265587, 0.8916178829642126,
        2.349240808356669],
       [2.367125812371352, 2.183382124065596, 1.0022235881431305,
        0.6273106041456448, 0.5394287579906072, 16.878403243443238,
        2.558975893026261, 13.304221399508485, 2.160842858822771,
        1.7629220741910117, 1.3193608578629723, 1.240163410736525,
        2.430820795000067, 2.521397408156051, 1.553635267621986,
        0.1489904727484606, 1.2534083667038431, 1.627775812318589,
        1.8686129209187754, 0.018864579205599207, 1.3265804422978127,
        1.242909389305854, 1.8364124996119144, 0.7939769040196883,
        2.344498444179354]], dtype=object)

In [39]:
efficiency_matrix_select = efficiency_matrix
# parameter_list[48]
# material_list[48]
efficiency_matrix_select

array([[87.99867271420294, 10.754144551826124, 39.03539471003291,
        8.719478787412434, 176.89263217557945, 110.96461039629338,
        5.326080045216254, 340.75897179003545, 5.976374362068454,
        21.777336326559993, 9.998054942981014, 27.85178859490261,
        36.246473531626656, 16.711820382020974, 13.467187034483503,
        26.069260950413334, 22.593931123304372, 45.570188289025374,
        11.875070348792654, 12.515168774269334, 1.2885434757726646,
        9.690726694238265, 9.045470390257606, 10.131929540757637,
        8.077321501486557],
       [71.3459423725038, 6.597811474708325, 32.952381330044524,
        67.083922994349, 215.43575206559953, 78.11518782777298,
        43.46206299796901, 83.01060733662823, 7.7098577583662005,
        15.211671150099944, 14.204567115312157, 85.01823947594156,
        82.19520739492373, 18.08608042005328, 15.169118752726273,
        26.43801283527698, 6.514345800031634, 43.8835375116767,
        41.31373138179925, 16.56359223112449,

# print coarse optimization info

In [40]:
logging.info(f'_________________________________end coarse optimization stage_________________________________')
logging.info(f"Stage1: Final best morphology: {best_morphology}, Fitness: {best_fitness}, best_efficiency: {best_efficiency}, best reward function: {best_rewardfunc}, Material cost: {best_material}, Reward: {best_reward}")
logging.info(f'folder_name:{folder_name}')
logging.info(f'parameter_list:{parameter_list}')
logging.info(f'fitness_matrix:{fitness_matrix}')
logging.info(f'efficiency_matrix:{efficiency_matrix}')
logging.info(f'_________________________________enter fine optimization stage_________________________________')

# configuration of fine optimization

In [41]:
# 获取矩阵中所有非 None 的值和它们的坐标
all_values_with_coords = []
for i in range(len(efficiency_matrix_select)):
    for j in range(len(efficiency_matrix_select[0])):
        value = efficiency_matrix_select[i][j]
        if value is not None:
            all_values_with_coords.append(((i, j), value))

# 按值降序排序
sorted_values = sorted(all_values_with_coords, key=lambda x: x[1], reverse=True)

# 计算前 20% 的数量（至少选1个）
top_k = max(1, int(len(sorted_values) * 0.1))
# 取前 20% 个坐标
efficiency_coarse_best = [coord for coord, val in sorted_values[:top_k]]

logging.info(f"fitness_coarse_best {efficiency_coarse_best}")
logging.info(f"fitness_coarse_best values {sorted_values[:top_k]}")


In [42]:
coarse_best = efficiency_coarse_best
coarse_best

[(3, 4),
 (0, 7),
 (4, 7),
 (2, 4),
 (1, 4),
 (3, 7),
 (0, 4),
 (2, 15),
 (4, 5),
 (0, 5),
 (2, 5),
 (4, 4)]

# enter fine optimization stage

In [None]:
final_optimized_results = []  # 用来记录每个 coarse_best 的最优结果

for rewardfunc_index, morphology_index in coarse_best:
    
    morphology = morphology_list[morphology_index]
    parameter = parameter_list[morphology_index]
    rewardfunc = rewardfunc_list[rewardfunc_index]
    
    best_efficiency = efficiency_matrix_select[rewardfunc_index][morphology_index]
    best_fitness = fitness_matrix[rewardfunc_index][morphology_index]
    best_morphology = morphology
    best_parameter = parameter
    best_rewardfunc = rewardfunc
    best_material = compute_ant_volume(parameter)
    
    print(f"Initial parameter:{parameter}")
    logging.info(f"Initial morphology:{morphology}")
    logging.info(f"Initial parameter:{parameter}" )
    logging.info(f"Initial rewardfunc:{rewardfunc}" )
    logging.info(f"Initial fitness:{best_fitness}" )
    logging.info(f"Initial efficiency:{best_efficiency}" )
    iteration = 0
    
    while True:
        improved = False  # 标记是否有改进，方便控制循环

        designer = DGA()
        
         # -------- 优化 morphology --------
        improved_morphology, improved_parameter = designer.improve_morphology(
            best_parameter,
            parameter_list,
            efficiency_matrix_select[rewardfunc_index, :],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
            
        )
        print(f"improved_morphology:{improved_parameter}")
        
        shutil.copy(improved_morphology, "GPTWalker.xml")
        shutil.copy(best_rewardfunc, "GPTrewardfunc.py")
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTWalkerEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_walker_volume(improved_parameter)
        improved_efficiency = improved_fitness / improved_material
        print(f"improved_efficiency:{improved_efficiency}")
        
        if improved_efficiency > best_efficiency:

            best_fitness = improved_fitness
            best_morphology = improved_morphology
            best_parameter = improved_parameter
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Morphology optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")

        # -------- 没有进一步改进，跳出循环 --------
        if not improved:
            logging.info("Not improved Morphology!")
            logging.info("____________________________________________")
            break
            
            
        # -------- 优化 reward function --------
        improved_rewardfunc = designer.improve_rewardfunc(
            best_rewardfunc,
            rewardfunc_list,
            efficiency_matrix_select[:, morphology_index],
            folder_name,
            rewardfunc_index, 
            morphology_index,
            iteration
        )

        shutil.copy(best_morphology, "GPTWalker.xml")
        shutil.copy(improved_rewardfunc, "GPTrewardfunc.py")
        
        
        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTWalkerEnv._get_rew = _get_rew
        
        model_path = Train(morphology_index, rewardfunc_index, folder_name, stage='fine', total_timesteps=5e5)
        improved_fitness, _ = Eva(model_path)
        improved_material = compute_walker_volume(best_parameter)
        improved_efficiency = improved_fitness / improved_material


        if improved_efficiency > best_efficiency:
            best_fitness = improved_fitness
            best_rewardfunc = improved_rewardfunc
            best_material = improved_material
            best_efficiency = improved_efficiency
            improved = True
            iteration +=1
            logging.info(f"Reward optimization improved iteration {iteration}: material={improved_material}, fitness={improved_fitness}, efficiency={improved_efficiency}")
        
        if not improved:
            logging.info("Not improved Reward!")
            logging.info("____________________________________________")
            break
            

            
    # 保存当前 coarse_best 的最终最优结果
    final_optimized_results.append({
        "best_morphology": best_morphology,
        "best_parameter": best_parameter,
        "best_rewardfunc": best_rewardfunc,
        "best_fitness": best_fitness,
        "best_material": best_material,
        "best_efficiency": best_efficiency,
        "best_iteration":iteration
    })

    logging.info(f"Final optimized result: rewardfunc_index{rewardfunc_index} morphology_index{morphology_index}")
    logging.info(f"  Morphology: {best_morphology}")
    logging.info(f"  Parameter: {best_parameter}")
    logging.info(f"  Rewardfunc: {best_rewardfunc}")
    logging.info(f"  Fitness: {best_fitness}")
    logging.info(f"  Material: {best_material}")
    logging.info(f"  Efficiency: {best_efficiency}")
    logging.info("____________________________________________")

params: [1.3537428625535848, 0.8950463338961956, 0.588385028345365, 0.24119152171881897, -0.06606170402388223, 0.32338345580621836, 0.014116346520514661, 0.03392725970162329, 0.02934353614621374, 0.01371937266486075]
Initial parameter:[1.3537428625535848, 0.8950463338961956, 0.588385028345365, 0.24119152171881897, -0.06606170402388223, 0.32338345580621836, 0.014116346520514661, 0.03392725970162329, 0.02934353614621374, 0.01371937266486075]
[1.4, 1.0, 0.6, 0.2, 0.35, -0.15, 0.01, 0.03, 0.02, 0.01]
Successfully saved GPTWalker_refine_3_4_0.xml
improved_morphology:[1.4, 1.0, 0.6, 0.2, 0.35, -0.15, 0.01, 0.03, 0.02, 0.01]




0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
improved_efficiency:299.3551453588714
params: [1.3708454531204666, 1.3143324595245536, 0.5814456862293728, 0.1780858069946838, -0.19967387122215519, 0.3737770635758953, 0.08174817213542643, 0.027246984049151025, 0.022563463636437898, 0.08847394067703547]
Initial parameter:[1.3708454531204666, 1.3143324595245536, 0.5814456862293728, 0.1780858069946838, -0.19967387122215519, 0.3737770635758953, 0.08174817213542643, 0.027246984049151025, 0.022563463636437898, 0.08847394067703547]
[1.39, 1.32, 0.58, 0.18, -0.2, 0.38, 0.08, 0.03, 0.02, 0.09]
Successfully saved GPTWalker_refine_0_7_0.xml
improved_morphology:[1.39, 1.32, 0.58, 0.18, -0.2, 0.38, 0.08, 0.03, 0.02, 0.09]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

In [44]:

logging.info(f"{final_optimized_results}")

# logging.info(f"fine optimization end: best material cost: {best_material}  fitness: {improved_fitness} merterial_efficiency: {improved_material_efficiency}")

In [45]:
final_optimized_results

[{'best_morphology': 'results/Random_m25_r5/assets/GPTWalker_4.xml',
  'best_parameter': [1.3537428625535848,
   0.8950463338961956,
   0.588385028345365,
   0.24119152171881897,
   -0.06606170402388223,
   0.32338345580621836,
   0.014116346520514661,
   0.03392725970162329,
   0.02934353614621374,
   0.01371937266486075],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_3.py',
  'best_fitness': 1.95708084826652,
  'best_material': 10.412038209106182,
  'best_efficiency': 361.33866480637755,
  'best_iteration': 0},
 {'best_morphology': 'results/Random_m25_r5/assets/GPTWalker_7.xml',
  'best_parameter': [1.3708454531204666,
   1.3143324595245536,
   0.5814456862293728,
   0.1780858069946838,
   -0.19967387122215519,
   0.3737770635758953,
   0.08174817213542643,
   0.027246984049151025,
   0.022563463636437898,
   0.08847394067703547],
  'best_rewardfunc': 'results/Random_m25_r5/env/GPTrewardfunc_0.py',
  'best_fitness': 14.466926614839714,
  'best_material': 10.8557016918