In [1]:
# xvfb-run -s "-screen 0 1400x900x24" python figure2.py 运行这一行就可以得到最终视频了

import time
from design import *
import importlib
import shutil
from utils import *
from openai import OpenAI
from prompts import *
import json
import numpy as np
from gymnasium.envs.robodesign.GPTAnt import GPTAntEnv

In [2]:
def load_logs_full(log_dir):
    # 找到 .tfevents 文件
    event_file = [f for f in os.listdir(log_dir) if f.startswith("events.out")][0]
    event_path = os.path.join(log_dir, event_file)

    # 加载日志
    event_acc = EventAccumulator(event_path)
    event_acc.Reload()

    # 获取所有 scalar tags
    all_tags = event_acc.Tags()["scalars"]

    # 筛选 reward 分量
    reward_tags = [tag for tag in all_tags if tag.startswith("reward/")]

    # 加载所有 reward 分量数据（完整）
    data_full = {}
    for tag in reward_tags:
        events = event_acc.Scalars(tag)
        values = [e.value for e in events]
        data_full[tag] = values

    # 加载 episode length（完整）
    ep_len_tag = "rollout/ep_len_mean"
    if ep_len_tag in all_tags:
        events = event_acc.Scalars(ep_len_tag)
        values = [e.value for e in events]
        data_full[ep_len_tag] = values

    return data_full


In [3]:
folder_name = "results/terrain"
log_file = os.path.join(folder_name, "parameters.log")
logging.basicConfig(filename=log_file, level=logging.INFO, format="%(asctime)s - %(message)s")

best_fitness = float('-inf')  
best_morphology = None  
best_rewardfunc = None  
best_reward = None
best_material = None
best_efficiency = None

morphology_nums = 4
rewardfunc_nums = 1

fitness_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
efficiency_matrix = np.array([[None for _ in range(morphology_nums)] for _ in range(rewardfunc_nums)])
fitness_list = []

morphology_list = [f'results/terrain/assets/GPTAnt_{i}.xml' for i in range(0,morphology_nums) ]
rewardfunc_list = [f'results/terrain/env/GPTrewardfunc_{i}.py' for i in range(0,rewardfunc_nums)]
parameter = [0.04, 0.2, 0.1, 0.3, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01]
material = compute_ant_volume(parameter)

params: [0.04, 0.2, 0.1, 0.3, 0.05, 0.05, 0.05, 0.01, 0.01, 0.01]


In [10]:
# 0 ground
# 1 desert
# 2 snow
# 3 hills
for i, rewardfunc in enumerate(rewardfunc_list):
    for j, morphology in enumerate(morphology_list):

        print(i, rewardfunc)
        print(j, morphology)
        
        if i not in [0] or j not in [0]:
            continue
        
        shutil.copy(morphology, "GPTAnt.xml")
        shutil.copy(rewardfunc, "GPTrewardfunc.py")         

        import GPTrewardfunc
        importlib.reload(GPTrewardfunc)  # 重新加载模块
        from GPTrewardfunc import _get_rew
        GPTAntEnv._get_rew = _get_rew

        env_name = "GPTAntEnv"
        model_path = folder_name + f"/coarse/SAC_morphology{j}_rewardfunc{i}_1000000.0steps"
        # model_path = Train(j,  i, folder_name, total_timesteps=1e6, callback=True)
        # fitness, reward = Eva(model_path=model_path, run_steps=100, folder_name=folder_name, video=False, rewardfunc_index = i, morphology_index = j)
        fitness, _ = Eva_with_qpos_logging2(model_path, run_steps=100, video = True, rewardfunc_index=i, morphology_index=j)
        efficiency = fitness/material
        fitness_matrix[i][j] = fitness
        efficiency_matrix[i][j] = efficiency
                
        logging.info("___________________finish coarse optimization_____________________")
        logging.info(f"morphology: {j}, rewardfunc: {i}, material cost: {material} reward: {reward} fitness: {fitness} efficiency: {efficiency}")

                

0 results/terrain/env/GPTrewardfunc_0.py
0 results/terrain/assets/GPTAnt_0.xml
Run 0
Run 1
Run 2
Run 3
Run 4
Run 5
Run 6
Run 7
Run 8
Run 9
Run 10
Run 11
Run 12
Run 13
Run 14
Run 15
Run 16
Run 17
Run 18
Run 19
Run 20
Run 21
Run 22
Run 23
Run 24
Run 25
Run 26
Run 27
Run 28
Run 29
Run 30
Run 31
Run 32
Run 33
Run 34
Run 35
Run 36
Run 37
Run 38
Run 39
Run 40
Run 41
Run 42
Run 43
Run 44
Run 45
Run 46
Run 47
Run 48
Run 49
Run 50
Run 51
Run 52
Run 53
Run 54
Run 55
Run 56
Run 57
Run 58
Run 59
Run 60
Run 61
Run 62
Run 63
Run 64
Run 65
Run 66
Run 67
Run 68
Run 69
Run 70
Run 71
Run 72
Run 73
Run 74
Run 75
Run 76
Run 77
Run 78
Run 79
Run 80
Run 81
Run 82
Run 83
Run 84
Run 85
Run 86
Run 87
Run 88
Run 89
Run 90
Run 91
Run 92
Run 93
Run 94
Run 95
Run 96
Run 97
Run 98
Run 99
Saved Origin plot CSV to: /root/autodl-tmp/Ant_desert/qpos0_origin_plot.csv


ValueError: too many values to unpack (expected 2)