找最优模型

In [134]:


# 模型的决定系数的文件
file_paths = [
    r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_TN loss  (%).json",
    r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_CH4-C loss (%).json",
    r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_CO2-C loss  (%).json",
    r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_N2O-N loss  (%).json",
    r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_TC loss  (%).json",
    r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_NH3-N loss  (%).json",
]

# 模型名称列表
model_names = [
    'ctb_model',
    'gp_model',
    'lgb_model',
    'lr_model',
    'mlp_model',
    'rf_model',
    'ri_model',
    'svr_model',
    'xgb_model'
]

# 模型缩写映射
model_mapping = {
    "RandomForest(k)": "rf_model",
    "XGBoost(k)": "xgb_model",
    "Lightgbm(k)": "lgb_model",
    "CatRegression(k)": "ctb_model",
    "RidgeRegression(k)": "ri_model",
    "LinearRegression(k)": "lr_model",
    "MLP(k)": "mlp_model",
    "SVR(k)": "svr_model",
    "GaussR(k)": "gp_model"  # 假设 GaussR 对应 gp_model
}


# 损失类型对应的目录路径
model_paths = {
    'result_r2_TN loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_TN loss  (%)',
    'result_r2_CH4-C loss (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_CH4-C loss (%)',
    'result_r2_CO2-C loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_CO2-C loss  (%)',
    'result_r2_N2O-N loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_N2O-N loss  (%)',
    'result_r2_TC loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_NH3-N loss  (%)',
    'result_r2_NH3-N loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_TC loss  (%)'
}

# CSV 文件路径
csv_files = [
    "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_CH4-C loss (%).csv",
    "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_CO2-C loss  (%).csv",
    "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_N2O-N loss  (%).csv",
    "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_NH3-N loss  (%).csv",
    "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_TC loss  (%).csv",
    "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_TN loss  (%).csv",
]

non_nums  = ['material_0', 'material_1', 'Excipients', 'Additive Species']
labels = [ 'TN loss  (%)','CH4-C loss (%)',  'CO2-C loss  (%)', "N2O-N loss  (%)", 'NH3-N loss  (%)', 'TC loss  (%)', ]

In [135]:
import json
import os
import pickle
import warnings

# 忽略所有警告
warnings.filterwarnings("ignore")


# file_paths = [
#     r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_TN loss  (%).json",
#     r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_CH4-C loss (%).json",
#     r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_CO2-C loss  (%).json",
#     r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_N2O-N loss  (%).json",
#     r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_TC loss  (%).json",
#     r"output\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\result_r2_NH3-N loss  (%).json",
# ]

optimal_models = {}  # 创建一个空字典来存储每个文件的最优模型

# 遍历每个文件
for file_path in file_paths:
    with open(file_path, "r", encoding="utf-8") as file:
        data = json.load(file)
        # min_value = float('inf')
        min_value = -float('inf')
        min_key = None
        # 查找最大值的键
        for key, value in data.items():
            if value > min_value:
                min_key = key
                min_value = value
        # 更新字典，文件名作为键，最优模型名作为值
        optimal_models[os.path.basename(file_path)] = min_key

# 打印每个文件的最优模型
print("每个文件的最优模型:", optimal_models)


每个文件的最优模型: {'result_r2_TN loss  (%).json': 'Lightgbm(k)', 'result_r2_CH4-C loss (%).json': 'SVR(k)', 'result_r2_CO2-C loss  (%).json': 'CatRegression(k)', 'result_r2_N2O-N loss  (%).json': 'Lightgbm(k)', 'result_r2_TC loss  (%).json': 'Lightgbm(k)', 'result_r2_NH3-N loss  (%).json': 'Lightgbm(k)'}


加载相应的模型

创建模型路径字典

模型名字

In [136]:
# # 模型缩写映射
# model_mapping = {
#     "RandomForest(k)": "rf_model",
#     "XGBoost(k)": "xgb_model",
#     "Lightgbm(k)": "lgb_model",
#     "CatRegression(k)": "ctb_model",
#     "RidgeRegression(k)": "ri_model",
#     "LinearRegression(k)": "lr_model",
#     "MLP(k)": "mlp_model",
#     "SVR(k)": "svr_model",
#     "GaussR(k)": "gp_model"  # 假设 GaussR 对应 gp_model
# }

In [137]:
# # 损失类型对应的目录路径
# model_paths = {
#     'result_r2_TN loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_TN loss  (%)',
#     'result_r2_CH4-C loss (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_CH4-C loss (%)',
#     'result_r2_CO2-C loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_CO2-C loss  (%)',
#     'result_r2_N2O-N loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_N2O-N loss  (%)',
#     'result_r2_TC loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_NH3-N loss  (%)',
#     'result_r2_NH3-N loss  (%).json': 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_TC loss  (%)'
# }

# 存储模型文件路径的列表
model_files = []

# 遍历optimal_models字典
for key, value in optimal_models.items():
    # 获取模型名字
    model_name = model_mapping.get(value)
    model_path_dir = model_paths.get(key)
    # 如果模型名存在
    if model_name:
        # 拼接模型文件路径
        model_file_path = os.path.join(model_path_dir, model_name + ".pkl",)
        
        # 检查文件是否存在
        if os.path.exists(model_file_path):
            model_files.append(model_file_path)
        else:
            print(f"找不到模型文件：{model_file_path}")
    else:
        print(f"找不到与模型键 {key} 相关的模型映射")

# 打印模型文件路径列表
print("模型文件路径列表：", model_files)

模型文件路径列表： ['output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_TN loss  (%)\\lgb_model.pkl', 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_CH4-C loss (%)\\svr_model.pkl', 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_CO2-C loss  (%)\\ctb_model.pkl', 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_N2O-N loss  (%)\\lgb_model.pkl', 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_NH3-N loss  (%)\\lgb_model.pkl', 'output\\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\\model_TC loss  (%)\\lgb_model.pkl']


In [139]:
import joblib

loaded_models = {}  # 用于存储加载的模型

# 遍历每个模型文件
for model_file in model_files:
    # 获取模型名称
    model_name = os.path.basename(os.path.dirname(model_file)) + "_" + os.path.splitext(os.path.basename(model_file))[0]
    print(model_name)
    # 加载模型
    with open(model_file, 'rb') as file:
        loaded_model = joblib.load(file)
    
    # 将加载的模型存储在字典中，以模型名称作为键
    loaded_models[model_name] = loaded_model
print(loaded_models)

model_TN loss  (%)_lgb_model
model_CH4-C loss (%)_svr_model
model_CO2-C loss  (%)_ctb_model
model_N2O-N loss  (%)_lgb_model
model_NH3-N loss  (%)_lgb_model
model_TC loss  (%)_lgb_model
{'model_TN loss  (%)_lgb_model': LGBMRegressor(metric='root_mean_squared_error', min_child_samples=10,
              n_estimators=300, objective='regression', seed=2023, verbose=-1), 'model_CH4-C loss (%)_svr_model': SVR(), 'model_CO2-C loss  (%)_ctb_model': <catboost.core.CatBoostRegressor object at 0x0000020D351C8850>, 'model_N2O-N loss  (%)_lgb_model': LGBMRegressor(metric='root_mean_squared_error', min_child_samples=10,
              n_estimators=300, objective='regression', seed=2023, verbose=-1), 'model_NH3-N loss  (%)_lgb_model': LGBMRegressor(metric='root_mean_squared_error', min_child_samples=10,
              n_estimators=300, objective='regression', seed=2023, verbose=-1), 'model_TC loss  (%)_lgb_model': LGBMRegressor(metric='root_mean_squared_error', min_child_samples=10,
              n_esti

读取每个模型的输入和输出

In [140]:
import pandas as pd
import os

# # CSV 文件路径
# csv_files = [
#     "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_CH4-C loss (%).csv",
#     "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_CO2-C loss  (%).csv",
#     "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_N2O-N loss  (%).csv",
#     "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_NH3-N loss  (%).csv",
#     "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_TC loss  (%).csv",
#     "data\TN_NH3_N2O_TC loss_CH4-C loss_CO2-C loss\data_for_TN loss  (%).csv",
# ]

# 用于存储输入特征和输出特征的列表
input_features_list = []
output_feature_list = []
model_names = []

# 遍历每个 CSV 文件
for csv_file in csv_files:
    # 从文件路径中提取模型名称
    model_name = os.path.splitext(os.path.basename(csv_file))[0]
    model_names.append(model_name)
    
    # 读取 CSV 文件
    df = pd.read_csv(csv_file)
    
    # 获取输入特征（除了最后一列）和输出特征（最后一列）的列名，并添加到列表中
    input_features = list(df.columns[:-1])
    output_feature = df.columns[-1]
    input_features_list.append(input_features)
    output_feature_list.append(output_feature)

# 打印模型名称、输入特征和输出特征的列表
for model_name, input_features, output_feature in zip(model_names, input_features_list, output_feature_list):
    print("模型名称:", model_name)
    print("输入特征:", input_features)
    print("输出特征:", output_feature)



模型名称: data_for_CH4-C loss (%)
输入特征: ['material_0', 'initial TN(%)', 'initial TC(%)', 'initial moisture content(%)', 'initial CN(%)', 'initial pH', 'material_1', 'Excipients_1', 'Additive Species']
输出特征: CH4-C loss (%)
模型名称: data_for_CO2-C loss  (%)
输入特征: ['material_0', 'initial TN(%)', 'initial TC(%)', 'initial moisture content(%)', 'initial CN(%)', 'initial pH', 'material_1', 'Excipients_1', 'Additive Species']
输出特征: CO2-C loss  (%)
模型名称: data_for_N2O-N loss  (%)
输入特征: ['material_0', 'initial TN(%)', 'initial TC(%)', 'initial moisture content(%)', 'initial CN(%)', 'initial pH', 'material_1', 'Excipients_1', 'Additive Species']
输出特征: N2O-N loss  (%)
模型名称: data_for_NH3-N loss  (%)
输入特征: ['material_0', 'initial TN(%)', 'initial TC(%)', 'initial moisture content(%)', 'initial CN(%)', 'initial pH', 'material_1', 'Excipients_1', 'Additive Species']
输出特征: NH3-N loss  (%)
模型名称: data_for_TC loss  (%)
输入特征: ['material_0', 'initial TN(%)', 'initial TC(%)', 'initial moisture content(%)', 'initial

找输入输出的取值范围

In [141]:
# 存储所有输入特征和标签的最小值和最大值的字典
min_max_values = {}

# 遍历每个 CSV 文件
for csv_file in csv_files:
    # 读取 CSV 文件
    df = pd.read_csv(csv_file)
    
    # 获取输入特征的列名（去除最后一列）
    input_features = list(df.columns[:-1])
    # 获取标签的列名
    label_column = df.columns[-1]
    
    # 遍历每个输入特征
    for feature in input_features:
        # 如果特征不存在，创建新的特征项
        if feature not in min_max_values:
            min_max_values[feature] = {'Minimum': None, 'Maximum': None}
        # 计算输入特征的最小值和最大值，并更新字典中的值
        if min_max_values[feature]['Minimum'] is None:
            min_max_values[feature]['Minimum'] = round(df[feature].min(), 2)
        else:
            min_max_values[feature]['Minimum'] = min(round(min_max_values[feature]['Minimum'], 2), round(df[feature].min(), 2))
        if min_max_values[feature]['Maximum'] is None:
            min_max_values[feature]['Maximum'] = round(df[feature].max(), 2)
        else:
            min_max_values[feature]['Maximum'] = max(round(min_max_values[feature]['Maximum'], 2), round(df[feature].max(), 2))
    
    # 如果标签不存在，创建新的标签项
    if label_column not in min_max_values:
        min_max_values[label_column] = {'Minimum': None, 'Maximum': None}
    # 计算标签的最小值和最大值，并更新字典中的值
    if min_max_values[label_column]['Minimum'] is None:
        min_max_values[label_column]['Minimum'] = round(df[label_column].min(), 2)
    else:
        min_max_values[label_column]['Minimum'] = min(round(min_max_values[label_column]['Minimum'], 2), round(df[label_column].min(), 2))
    if min_max_values[label_column]['Maximum'] is None:
        min_max_values[label_column]['Maximum'] = round(df[label_column].max(), 2)
    else:
        min_max_values[label_column]['Maximum'] = max(round(min_max_values[label_column]['Maximum'], 2), round(df[label_column].max(), 2))

# 打印每个输入特征和标签的最小值和最大值
for category, stats in min_max_values.items():
    print(f"{category}: Minimum={stats['Minimum']}, Maximum={stats['Maximum']}")

print(min_max_values)


material_0: Minimum=0, Maximum=6
initial TN(%): Minimum=-1.0, Maximum=11.58
initial TC(%): Minimum=-1.0, Maximum=197.0
initial moisture content(%): Minimum=-1.0, Maximum=89.8
initial CN(%): Minimum=-1.0, Maximum=55.98
initial pH: Minimum=-1.0, Maximum=10.7
material_1: Minimum=0, Maximum=11
Excipients_1: Minimum=0, Maximum=58
Additive Species: Minimum=0, Maximum=4
CH4-C loss (%): Minimum=0.0, Maximum=35.87
CO2-C loss  (%): Minimum=-0.8, Maximum=84.0
N2O-N loss  (%): Minimum=-0.5, Maximum=19.0
NH3-N loss  (%): Minimum=0.02, Maximum=160.6
TC loss  (%): Minimum=5.1, Maximum=92.58
TN loss  (%): Minimum=-1.0, Maximum=85.54
{'material_0': {'Minimum': 0, 'Maximum': 6}, 'initial TN(%)': {'Minimum': -1.0, 'Maximum': 11.58}, 'initial TC(%)': {'Minimum': -1.0, 'Maximum': 197.0}, 'initial moisture content(%)': {'Minimum': -1.0, 'Maximum': 89.8}, 'initial CN(%)': {'Minimum': -1.0, 'Maximum': 55.98}, 'initial pH': {'Minimum': -1.0, 'Maximum': 10.7}, 'material_1': {'Minimum': 0, 'Maximum': 11}, 'Excip

遗传算法优化

model_N2O-N loss (%)_ctb_model优化

In [128]:
import os
import pickle
import pandas as pd
from deap import base, creator, tools, algorithms
import random

# non_nums  = ['material_0', 'material_1', 'Excipients', 'Additive Species']
# labels = [ 'TN loss  (%)','CH4-C loss (%)',  'CO2-C loss  (%)', "N2O-N loss  (%)", 'NH3-N loss  (%)', 'TC loss  (%)', ]

#  SVR = array(['material_0', 'initial TN(%)', 'initial TC(%)','initial moisture content(%)', 'initial CN(%)', 'initial pH',

for no, model in enumerate(loaded_models.values()):
    # if no == 0:
    #     continue
    print(labels[no])
    # 加载模型
    loaded_model_1 = model
    print(model)
    print(loaded_models)
    
    # value_to_find = model # 替换为您要查找的值

    # # 使用字典推导式查找对应的键
    # keys = [key for key, value in loaded_models.items() if value == value_to_find]

    # print(keys)


    # 创建输入范围字典和输出范围字典
    input_ranges_1 = {key: value for key, value in min_max_values.items() if key not in labels}
    output_range_1 = {key: value for key, value in min_max_values.items() if key in labels}

    print("Input Ranges:")
    print(input_ranges_1)
    print("\nOutput Ranges:")
    print(output_range_1)

    # 创建属性名称到数值的映射
    attribute_mapping = {key: idx for idx, key in enumerate(input_ranges_1.keys())}

    # 最小化目标函数
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMin)

    toolbox = base.Toolbox()

    # 定义生成整数属性的方法
    toolbox.register("attr_int", lambda minimum, maximum: random.randint(round(minimum), round(maximum)))

    # 定义生成浮点数属性的方法
    toolbox.register("attr_float", lambda minimum, maximum: random.uniform(minimum, maximum))

    # 创建个体时根据属性类型选择不同的生成方法
    def create_individual():
        individual = []
        for attr_name, attr_info in input_ranges_1.items():
            if attr_name in non_nums :
                individual.append(toolbox.attr_int(attr_info['Minimum'], attr_info['Maximum']))
            else:
                individual.append(toolbox.attr_float(attr_info['Minimum'], attr_info['Maximum']))
        return creator.Individual(individual)

    # 注册个体生成方法
    toolbox.register("individual", create_individual)

    # 初始化种群
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    def evaluate(individual):
        # 使用模型进行预测
        individual_with_names = {attr_name: value for attr_name, value in zip(input_ranges_1.keys(), individual)}

        # 检查每个特征值是否超出范围，如果超出范围则返回一个非常大的适应度值
        for key, value in individual_with_names.items():
            if key in non_nums:
                if not isinstance(value, int) or value < input_ranges_1[key]['Minimum'] or value > input_ranges_1[key]['Maximum']:
                    return (1e6,)  # 返回一个非常大的适应度值，表示不合法的个体
            else:
                if value < input_ranges_1[key]['Minimum'] or value > input_ranges_1[key]['Maximum']:
                    return (1e6,)  # 返回一个非常大的适应度值，表示不合法的个体

        # 使用模型进行预测
        prediction = loaded_model_1.predict([individual])

        # 计算模型输出并返回其负值作为适应度（因为我们是最小化问题）
        fitness = -prediction
        return fitness,

    toolbox.register("evaluate", evaluate)
    toolbox.register("mate", tools.cxBlend, alpha=0.5)  # 交叉方式使用 Blend 交叉
    # toolbox.register("mate", tools.cxOnePoint)

    toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1)  # 变异方式使用高斯变异
    toolbox.register("select", tools.selTournament, tournsize=3)

    # 进化算法
    def main(population_size=100, n_generations=50, cxpb=0.5, mutpb=0.2):
        pop = toolbox.population(n=population_size)
        hof = tools.HallOfFame(1)
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("min", min)

        algorithms.eaSimple(pop, toolbox, cxpb=cxpb, mutpb=mutpb, ngen=n_generations, stats=stats, halloffame=hof, verbose=False)

        return pop, stats, hof

    # 调整参数
    population_size = 200
    n_generations = 100
    cxpb = 0.5  # 交叉概率
    mutpb = 0.01  # 变异概率

    num_runs = 100  # 运行次数
    best_individuals = []

    for i in range(num_runs):
        pop, stats, hof = main(population_size, n_generations, cxpb, mutpb)
        best_individual = tools.selBest(pop, 1)[0]
        print(f"Run {i+1}: Best individual with fitness {best_individual.fitness.values[0]}")
        best_individual_with_names = {list(input_ranges_1.keys())[idx]: value for idx, value in enumerate(best_individual)}
        best_individuals.append((best_individual, best_individual_with_names))

    # 保存最优个体到CSV文件
    file_name = f"best_individuals_{labels[no]}.csv"
    data = []
    columns = list(input_ranges_1.keys()) + [labels[no]]
    for best_individual, best_individual_with_names in best_individuals:
        row = [best_individual_with_names[attr] for attr in input_ranges_1.keys()]
        row.append(-best_individual.fitness.values[0])  # 添加适应度值
        data.append(row)

    df = pd.DataFrame(data, columns=columns)
    df.to_csv(file_name, index=False)

    print(f"Best individuals saved to_{labels[no]}:", file_name)


    # 从保存的文件中找到标题为"N2O-N loss (%)"的最小值对应的组合
    min_loss_combinations = []
    min_loss = min(df[labels[no]])
    for index, row in df.iterrows():
        if row[labels[no]] == min_loss:
            min_loss_combinations.append(row)

    print(f"\nCombinations with minimum {labels[no]} ({min_loss}%):")
    for combination in min_loss_combinations:
        print(combination)



CH4-C loss (%)
SVR()
{'model_TN loss  (%)_lgb_model': LGBMRegressor(metric='root_mean_squared_error', min_child_samples=10,
              n_estimators=300, objective='regression', seed=2023, verbose=-1), 'model_CH4-C loss (%)_svr_model': SVR(), 'model_CO2-C loss  (%)_ctb_model': <catboost.core.CatBoostRegressor object at 0x0000020D353A1950>, 'model_N2O-N loss  (%)_lgb_model': LGBMRegressor(metric='root_mean_squared_error', min_child_samples=10,
              n_estimators=300, objective='regression', seed=2023, verbose=-1), 'model_NH3-N loss  (%)_lgb_model': LGBMRegressor(metric='root_mean_squared_error', min_child_samples=10,
              n_estimators=300, objective='regression', seed=2023, verbose=-1), 'model_TC loss  (%)_lgb_model': LGBMRegressor(metric='root_mean_squared_error', min_child_samples=10,
              n_estimators=300, objective='regression', seed=2023, verbose=-1)}
Input Ranges:
{'material_0': {'Minimum': 0, 'Maximum': 6}, 'initial TN(%)': {'Minimum': -1.0, 'Maximum':

KeyboardInterrupt: 

从最优个体找出最小

In [98]:
# 从保存的文件中找到标题为"N2O-N loss (%)"的最小值对应的组合
min_N2O_loss_combinations = []
min_N2O_loss = min(df['N2O-N loss (%)'])
for index, row in df.iterrows():
    if row['N2O-N loss (%)'] == min_N2O_loss:
        min_N2O_loss_combinations.append(row)

print(f"\nCombinations with minimum N2O-N loss ({min_N2O_loss}%):")
for combination in min_N2O_loss_combinations:
    print(combination)


Combinations with minimum N2O-N loss ([40.17083787]%):
material_0                                       3
initial TN(%)                            -0.355847
initial TC(%)                            26.637128
initial moisture content(%)              11.644503
initial CN(%)                            19.048028
initial pH                               10.547442
material_1                                       3
Excipients_1                             15.910138
Additive Species                                 4
N2O-N loss (%)                 [40.17083786807452]
Name: 8, dtype: object


model_NH3-N loss (%)_ctb_model优化

In [186]:
import os
import pickle
import pandas as pd
from deap import base, creator, tools, algorithms
import random

# 加载模型
model_name_1 = 'model_NH3-N loss (%)_ctb_model'
# 假设loaded_models已经定义，用于存储加载的模型
# loaded_models = load_models() # 这里是你加载模型的代码，假设已经定义了
loaded_model_1 = loaded_models[model_name_1]

# 创建输入范围字典和输出范围字典
input_ranges_1 = {key: value for key, value in min_max_values.items() if key not in ['N2O-N loss (%)', 'NH3-N loss (%)', 'TN loss (%)']}
output_range_1 = {key: value for key, value in min_max_values.items() if key in ['NH3-N loss (%)', 'TN loss (%)']}

print("Input Ranges:")
print(input_ranges_1)
print("\nOutput Ranges:")
print(output_range_1)

# 创建属性名称到数值的映射
attribute_mapping = {key: idx for idx, key in enumerate(input_ranges_1.keys())}

# 最小化目标函数
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

# 定义生成整数属性的方法
toolbox.register("attr_int", lambda minimum, maximum: random.randint(round(minimum), round(maximum)))

# 定义生成浮点数属性的方法
toolbox.register("attr_float", lambda minimum, maximum: random.uniform(minimum, maximum))

# 创建个体时根据属性类型选择不同的生成方法
def create_individual():
    individual = []
    for attr_name, attr_info in input_ranges_1.items():
        if attr_name in ['material_0', 'material_1', 'Excipients', 'Additive Species']:
            individual.append(toolbox.attr_int(attr_info['Minimum'], attr_info['Maximum']))
        else:
            individual.append(toolbox.attr_float(attr_info['Minimum'], attr_info['Maximum']))
    return creator.Individual(individual)

# 注册个体生成方法
toolbox.register("individual", create_individual)

# 初始化种群
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

def evaluate(individual):
    # 使用模型进行预测
    individual_with_names = {attr_name: value for attr_name, value in zip(input_ranges_1.keys(), individual)}

    # 检查每个特征值是否超出范围，如果超出范围则返回一个非常大的适应度值
    for key, value in individual_with_names.items():
        if key in ['material_0', 'material_1', 'Excipients', 'Additive Species']:
            if not isinstance(value, int) or value < input_ranges_1[key]['Minimum'] or value > input_ranges_1[key]['Maximum']:
                return (1e6,)  # 返回一个非常大的适应度值，表示不合法的个体
        else:
            if value < input_ranges_1[key]['Minimum'] or value > input_ranges_1[key]['Maximum']:
                return (1e6,)  # 返回一个非常大的适应度值，表示不合法的个体

    # 使用模型进行预测
    prediction = loaded_model_1.predict([individual])

    # 计算模型输出并返回其负值作为适应度（因为我们是最小化问题）
    fitness = -prediction
    return fitness,

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxBlend, alpha=0.5)  # 交叉方式使用 Blend 交叉
# toolbox.register("mate", tools.cxOnePoint)

toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1)  # 变异方式使用高斯变异
toolbox.register("select", tools.selTournament, tournsize=3)

# 进化算法
def main(population_size=100, n_generations=50, cxpb=0.5, mutpb=0.2):
    pop = toolbox.population(n=population_size)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("min", min)

    algorithms.eaSimple(pop, toolbox, cxpb=cxpb, mutpb=mutpb, ngen=n_generations, stats=stats, halloffame=hof, verbose=False)

    return pop, stats, hof

# 调整参数
population_size = 60
n_generations = 100
cxpb = 0.5  # 交叉概率
mutpb = 0.01  # 变异概率

num_runs = 100  # 运行次数
best_individuals = []

for i in range(num_runs):
    pop, stats, hof = main(population_size, n_generations, cxpb, mutpb)
    best_individual = tools.selBest(pop, 1)[0]
    print(f"Run {i+1}: Best individual with fitness {best_individual.fitness.values[0]}")
    best_individual_with_names = {list(input_ranges_1.keys())[idx]: value for idx, value in enumerate(best_individual)}
    best_individuals.append((best_individual, best_individual_with_names))

# 保存最优个体到CSV文件
file_name = "best_individuals_NH3-N loss (%).csv"
data = []
columns = list(input_ranges_1.keys()) + ["NH3-N loss (%)"]
for best_individual, best_individual_with_names in best_individuals:
    row = [best_individual_with_names[attr] for attr in input_ranges_1.keys()]
    row.append(-best_individual.fitness.values[0])  # 添加适应度值
    data.append(row)

df = pd.DataFrame(data, columns=columns)
df.to_csv(file_name, index=False)

print("Best individuals saved to_NH3-N loss (%):", file_name)


Input Ranges:
{'material_0': {'Minimum': 0, 'Maximum': 6}, 'initial CN(%)': {'Minimum': -1.0, 'Maximum': 53.73}, 'initial moisture content(%)': {'Minimum': -1.0, 'Maximum': 89.8}, 'initial pH': {'Minimum': -1.0, 'Maximum': 10.7}, 'material_1': {'Minimum': 0, 'Maximum': 5}, 'Excipients': {'Minimum': 0, 'Maximum': 78}, 'initial TN(%)': {'Minimum': -1.0, 'Maximum': 14.56}, 'initial TC(%)': {'Minimum': -1.0, 'Maximum': 197.0}, 'Additive Species': {'Minimum': 0, 'Maximum': 4}}

Output Ranges:
{'NH3-N loss (%)': {'Minimum': 0.0, 'Maximum': 84.51}, 'TN loss (%)': {'Minimum': 0.2, 'Maximum': 90.5}}
Run 1: Best individual with fitness [-19.99296867]
Run 2: Best individual with fitness [-21.26771572]
Run 3: Best individual with fitness [-23.0845398]
Run 4: Best individual with fitness [-19.5720922]
Run 5: Best individual with fitness [-21.73998721]
Run 6: Best individual with fitness [-21.4707412]
Run 7: Best individual with fitness [-24.56127793]
Run 8: Best individual with fitness [-20.6328138

从最优个体找出最小

In [187]:
# 从保存的文件中找到标题为"NH3-N loss (%)"的最小值对应的组合
min_NH3_N_loss_combinations = []
min_NH3_N_loss = min(df['NH3-N loss (%)'])
for index, row in df.iterrows():
    if row['NH3-N loss (%)'] == min_NH3_N_loss:
        min_NH3_N_loss_combinations.append(row)

print(f"\nCombinations with minimum N2O-N loss ({min_NH3_N_loss}%):")
for combination in min_NH3_N_loss_combinations:
    print(combination)


Combinations with minimum N2O-N loss ([15.77313954]%):
material_0                                        2
initial CN(%)                              13.96201
initial moisture content(%)               72.026239
initial pH                                  2.11136
material_1                                        5
Excipients                                       39
initial TN(%)                              6.427479
initial TC(%)                            131.827844
Additive Species                                  1
NH3-N loss (%)                 [15.773139544012277]
Name: 55, dtype: object


model_TN loss (%)_ctb_model优化

In [170]:
import os
import pickle
import pandas as pd
from deap import base, creator, tools, algorithms
import random

# 加载模型
model_name_1 = 'model_TN loss (%)_ctb_model'
# 假设loaded_models已经定义，用于存储加载的模型
# loaded_models = load_models() # 这里是你加载模型的代码，假设已经定义了
loaded_model_1 = loaded_models[model_name_1]

# 创建输入范围字典和输出范围字典
input_ranges_1 = {key: value for key, value in min_max_values.items() if key not in ['N2O-N loss (%)', 'NH3-N loss (%)', 'TN loss (%)']}
output_range_1 = {key: value for key, value in min_max_values.items() if key in ['TN loss (%)']}

print("Input Ranges:")
print(input_ranges_1)
print("\nOutput Ranges:")
print(output_range_1)

# 创建属性名称到数值的映射
attribute_mapping = {key: idx for idx, key in enumerate(input_ranges_1.keys())}

# 最小化目标函数
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)

toolbox = base.Toolbox()

# 定义生成整数属性的方法
toolbox.register("attr_int", lambda minimum, maximum: random.randint(round(minimum), round(maximum)))

# 定义生成浮点数属性的方法
toolbox.register("attr_float", lambda minimum, maximum: random.uniform(minimum, maximum))

# 创建个体时根据属性类型选择不同的生成方法
def create_individual():
    individual = []
    for attr_name, attr_info in input_ranges_1.items():
        if attr_name in ['material_0', 'material_1', 'Excipients', 'Additive Species']:
            individual.append(toolbox.attr_int(attr_info['Minimum'], attr_info['Maximum']))
        else:
            individual.append(toolbox.attr_float(attr_info['Minimum'], attr_info['Maximum']))
    return creator.Individual(individual)

# 注册个体生成方法
toolbox.register("individual", create_individual)

# 初始化种群
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

def evaluate(individual):
    # 使用模型进行预测
    individual_with_names = {attr_name: value for attr_name, value in zip(input_ranges_1.keys(), individual)}

    # 检查每个特征值是否超出范围，如果超出范围则返回一个非常大的适应度值
    for key, value in individual_with_names.items():
        if key in ['material_0', 'material_1', 'Excipients', 'Additive Species']:
            if not isinstance(value, int) or value < input_ranges_1[key]['Minimum'] or value > input_ranges_1[key]['Maximum']:
                return (1e6,)  # 返回一个非常大的适应度值，表示不合法的个体
        else:
            if value < input_ranges_1[key]['Minimum'] or value > input_ranges_1[key]['Maximum']:
                return (1e6,)  # 返回一个非常大的适应度值，表示不合法的个体

    # 使用模型进行预测
    prediction = loaded_model_1.predict([individual])

    # 计算模型输出并返回其负值作为适应度（因为我们是最小化问题）
    fitness = -prediction
    return fitness,

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxBlend, alpha=0.5)  # 交叉方式使用 Blend 交叉
# toolbox.register("mate", tools.cxOnePoint)

toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.1)  # 变异方式使用高斯变异
toolbox.register("select", tools.selTournament, tournsize=3)

# 进化算法
def main(population_size=100, n_generations=50, cxpb=0.5, mutpb=0.2):
    pop = toolbox.population(n=population_size)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("min", min)

    algorithms.eaSimple(pop, toolbox, cxpb=cxpb, mutpb=mutpb, ngen=n_generations, stats=stats, halloffame=hof, verbose=False)

    return pop, stats, hof

# 调整参数
population_size = 100
n_generations = 50
cxpb = 0.3  # 交叉概率
mutpb = 0.02  # 变异概率

num_runs = 100  # 运行次数
best_individuals = []

for i in range(num_runs):
    pop, stats, hof = main(population_size, n_generations, cxpb, mutpb)
    best_individual = tools.selBest(pop, 1)[0]
    print(f"Run {i+1}: Best individual with fitness {best_individual.fitness.values[0]}")
    best_individual_with_names = {list(input_ranges_1.keys())[idx]: value for idx, value in enumerate(best_individual)}
    best_individuals.append((best_individual, best_individual_with_names))

# 保存最优个体到CSV文件
file_name = "best_individuals_TN loss (%).csv"
data = []
columns = list(input_ranges_1.keys()) + ["TN loss (%)"]
for best_individual, best_individual_with_names in best_individuals:
    row = [best_individual_with_names[attr] for attr in input_ranges_1.keys()]
    row.append(-best_individual.fitness.values[0])  # 添加适应度值
    data.append(row)

df = pd.DataFrame(data, columns=columns)
df.to_csv(file_name, index=False)

print("Best individuals saved to_TN loss (%):", file_name)


Input Ranges:
{'material_0': {'Minimum': 0, 'Maximum': 6}, 'initial CN(%)': {'Minimum': -1.0, 'Maximum': 53.73}, 'initial moisture content(%)': {'Minimum': -1.0, 'Maximum': 89.8}, 'initial pH': {'Minimum': -1.0, 'Maximum': 10.7}, 'material_1': {'Minimum': 0, 'Maximum': 5}, 'Excipients': {'Minimum': 0, 'Maximum': 78}, 'initial TN(%)': {'Minimum': -1.0, 'Maximum': 14.56}, 'initial TC(%)': {'Minimum': -1.0, 'Maximum': 197.0}, 'Additive Species': {'Minimum': 0, 'Maximum': 4}}

Output Ranges:
{'TN loss (%)': {'Minimum': 0.2, 'Maximum': 90.5}}
Run 1: Best individual with fitness [-37.97885911]
Run 2: Best individual with fitness [-45.91499866]
Run 3: Best individual with fitness [-40.86607202]
Run 4: Best individual with fitness [-35.54169814]
Run 5: Best individual with fitness [-46.21301536]
Run 6: Best individual with fitness [-39.5328515]
Run 7: Best individual with fitness [-39.05719107]
Run 8: Best individual with fitness [-36.84531856]
Run 9: Best individual with fitness [-43.6608655]

从最优个体找出最小

In [172]:
# 从保存的文件中找到标题为"TN loss (%)"的最小值对应的组合
min_TN_loss_combinations = []
min_TN_loss = min(df['TN loss (%)'])
for index, row in df.iterrows():
    if row['TN loss (%)'] == min_TN_loss:
        min_TN_loss_combinations.append(row)

print(f"\nCombinations with minimum N2O-N loss ({min_TN_loss}%):")
for combination in min_TN_loss_combinations:
    print(combination)


Combinations with minimum N2O-N loss ([32.24972531]%):
material_0                                       1
initial CN(%)                              4.42864
initial moisture content(%)               33.91675
initial pH                                9.506107
material_1                                       3
Excipients                                      52
initial TN(%)                            11.699848
initial TC(%)                            79.116854
Additive Species                                 4
TN loss (%)                    [32.24972530675092]
Name: 58, dtype: object
