In [11]:
import numpy as np
import pandas as pd

# 定义所有元素列表
all_chemical_elements = ['Fe', 'Co', 'Mn', 'Al', 'Ni', 'B', 'Hf', 'Si', 'Cu', 'P', 'Mo', 'Cr',
                         'La', 'Ti', 'Sn', 'V', 'Ga', 'Nb', 'Zr', 'C', 'Gd', 'Y', 'Ta']
# 定义需要保留的元素
selected_chemical_elements = ['Fe', 'B', 'Al', 'Si', 'Mn', 'Cr']
crystal_structures = ['DO3', 'BCC', 'FCC', 'HCP', 'Orthorhombic', 'hexagonal', 'Tetragonal', 'Primitive Cubic']
other_properties = ['Ms', 'Hc_log']

# 生成 10000 个随机数据集
data = []
for _ in range(100000):
    while True:
        # 生成 Fe、Si、Al 的含量，在指定值附近波动，同时确保大于 0 并保留一位小数
        Fe_content = round(np.clip(np.random.normal(74.0, 8), 0, None), 1)
        Si_content = round(np.clip(np.random.normal(16.3, 4), 0, None), 1)
        Al_content = round(np.clip(np.random.normal(9.7, 4), 0, None), 1)

        # 检查总和是否超过 100
        total = Fe_content + Si_content + Al_content
        if total <= 100:
            break

    # 生成 B、Mn、Cr 的随机含量
    b_mn_cr_ratios = np.random.rand(3)
    # 归一化使得 B、Mn、Cr 含量总和为 100 - (Fe + Si + Al)
    remaining_percentage = 100 - (Fe_content + Si_content + Al_content)
    b_mn_cr_ratios = b_mn_cr_ratios / b_mn_cr_ratios.sum() * remaining_percentage

    # 初始化所有元素含量数组为 0
    element_ratios = np.zeros(len(all_chemical_elements))
    element_ratios[all_chemical_elements.index('Fe')] = Fe_content
    element_ratios[all_chemical_elements.index('Si')] = Si_content
    element_ratios[all_chemical_elements.index('Al')] = Al_content
    element_ratios[all_chemical_elements.index('B')] = round(b_mn_cr_ratios[0], 1)
    element_ratios[all_chemical_elements.index('Mn')] = round(b_mn_cr_ratios[1], 1)
    element_ratios[all_chemical_elements.index('Cr')] = round(b_mn_cr_ratios[2], 1)

    # 生成晶体结构的随机 0 和 1
    structure_flags = np.random.randint(0, 2, len(crystal_structures))

    # 合并化学元素和晶体结构数据
    row = np.concatenate([element_ratios, structure_flags])
    # 添加空的 Ms 和 Hc 值
    row = np.concatenate([row, [None, None]])
    data.append(row)

# 创建 DataFrame
columns = all_chemical_elements + crystal_structures + other_properties
df = pd.DataFrame(data, columns=columns)

# 对数值列保留一位小数
numeric_columns = df.select_dtypes(include=[np.number]).columns
df[numeric_columns] = df[numeric_columns].round(1)

# 保存为 CSV 文件
df.to_csv('test_data_SHAP.csv', index=False)

print("数据已保存为 test_data_SHAP.csv 文件。")

数据已保存为 test_data_SHAP.csv 文件。


In [12]:
df.head()

Unnamed: 0,Fe,Co,Mn,Al,Ni,B,Hf,Si,Cu,P,...,DO3,BCC,FCC,HCP,Orthorhombic,hexagonal,Tetragonal,Primitive Cubic,Ms,Hc_log
0,76.2,0.0,3.1,3.5,0.0,2.8,0.0,12.7,0.0,0.0,...,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,,
1,73.1,0.0,3.1,7.7,0.0,1.2,0.0,14.0,0.0,0.0,...,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,,
2,70.8,0.0,3.6,9.8,0.0,2.7,0.0,11.2,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,,
3,59.8,0.0,7.2,8.6,0.0,6.1,0.0,16.2,0.0,0.0,...,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,,
4,66.1,0.0,1.0,11.5,0.0,4.7,0.0,13.9,0.0,0.0,...,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,,


In [13]:
# 定义所有元素列表
all_chemical_elements = ['Fe', 'Co', 'Mn', 'Al', 'Ni', 'B', 'Hf', 'Si', 'Cu', 'P', 'Mo', 'Cr',
                         'La', 'Ti', 'Sn', 'V', 'Ga', 'Nb', 'Zr', 'C', 'Gd', 'Y', 'Ta']
# 定义需要保留的元素
selected_chemical_elements = ['Fe', 'Si', 'Al', 'B', 'Nb']
crystal_structures = ['DO3', 'BCC', 'FCC', 'HCP', 'Orthorhombic', 'hexagonal', 'Tetragonal', 'Primitive Cubic']
other_properties = ['Ms', 'Hc_log']

# 生成 10000 个随机数据集
data = []
for _ in range(100000):
    while True:
        # 生成 Fe、Si、Al 的含量，在指定值附近波动，同时确保大于 0 并保留一位小数
        Fe_content = round(np.clip(np.random.normal(74.0, 8), 0, None), 1)
        Si_content = round(np.clip(np.random.normal(16.3, 4), 0, None), 1)
        Al_content = round(np.clip(np.random.normal(9.7, 4), 0, None), 1)

        # 检查总和是否超过 100
        total = Fe_content + Si_content + Al_content
        if total <= 100:
            break

    # 生成 B、Nb 的随机含量
    b_nb_ratios = np.random.rand(2)
    # 归一化使得 B、Nb 含量总和为 100 - (Fe + Si + Al)
    remaining_percentage = 100 - (Fe_content + Si_content + Al_content)
    b_nb_ratios = b_nb_ratios / b_nb_ratios.sum() * remaining_percentage

    # 初始化所有元素含量数组为 0
    element_ratios = np.zeros(len(all_chemical_elements))
    element_ratios[all_chemical_elements.index('Fe')] = Fe_content
    element_ratios[all_chemical_elements.index('Si')] = Si_content
    element_ratios[all_chemical_elements.index('Al')] = Al_content
    element_ratios[all_chemical_elements.index('B')] = round(b_nb_ratios[0], 1)
    element_ratios[all_chemical_elements.index('Nb')] = round(b_nb_ratios[1], 1)

    # 生成晶体结构的随机 0 和 1
    structure_flags = np.random.randint(0, 2, len(crystal_structures))

    # 合并化学元素和晶体结构数据
    row = np.concatenate([element_ratios, structure_flags])
    # 添加空的 Ms 和 Hc 值
    row = np.concatenate([row, [None, None]])
    data.append(row)

# 创建 DataFrame
columns = all_chemical_elements + crystal_structures + other_properties
df = pd.DataFrame(data, columns=columns)

# 对数值列保留一位小数
numeric_columns = df.select_dtypes(include=[np.number]).columns
df[numeric_columns] = df[numeric_columns].round(1)

# 保存为 CSV 文件
df.to_csv('test_data_MIC.csv', index=False)

print("数据已保存为 test_data_MIC.csv 文件。")
    

数据已保存为 test_data_MIC.csv 文件。


In [14]:
df.head()

Unnamed: 0,Fe,Co,Mn,Al,Ni,B,Hf,Si,Cu,P,...,DO3,BCC,FCC,HCP,Orthorhombic,hexagonal,Tetragonal,Primitive Cubic,Ms,Hc_log
0,67.9,0.0,0.0,5.2,0.0,6.0,0.0,16.7,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,,
1,58.6,0.0,0.0,5.4,0.0,7.6,0.0,14.7,0.0,0.0,...,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,,
2,74.1,0.0,0.0,5.0,0.0,1.6,0.0,19.1,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,,
3,63.1,0.0,0.0,3.8,0.0,14.1,0.0,18.7,0.0,0.0,...,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,,
4,73.5,0.0,0.0,11.4,0.0,4.1,0.0,8.2,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,,
