In [None]:
import pandas as pd
import random
import itertools
from pymatgen.core import Structure

#################################
#置換した情報を全てまとめたファイルを生成するスクリプト
#################################

# === 入力 ===
CIF_PATH = "/Users/andotakuto/Desktop/MI講習会/internship/CIF_file/LaNi5_2x2x2.cif"      # 元CIF
OUTPUT_CSV = "exploration_space_test.csv"
N_SAMPLE = 1                  # 各ケースでランダムサンプリング数

def get_ni_labels(structure):
    """
    Niサイトの一意ラベルを返す。
    _atom_site_label があればそれを使い、なければsite indexを使う。
    """
    ni_labels = []
    for i, site in enumerate(structure.sites):
        if site.specie.symbol == "Ni":
            label = site.properties.get("label", None)
            if label is None:
                # _atom_site_labelがない場合はsite indexで代用
                label = f"Ni{i+1}"
            ni_labels.append(label)
    return ni_labels

def main():
    # === CIF読み込み ===
    structure = Structure.from_file(CIF_PATH)
    ni_labels = get_ni_labels(structure)
    n_ni = len(ni_labels)
    print(f"[INFO] Niサイト数: {n_ni}")

    # === (Al, Sn, Zn, Ga, Si) の置換数の全組み合わせ（35ケース） ===
    element_types = ["Al", "Sn", "Zn", "Ga", "Si"]
    substitution_cases = []
    for al in range(5):  # Alの数：0,1,2,3,4
        for sn in range(5):  # Snの数：0,1,2,3,4
            for zn in range(5):  # Znの数：0,1,2,3,4
                for ga in range(5): # Gaの数：0,
                    for si in range(5): # Siの数
                        if al + sn + zn + ga  + si <= 4:  # 合計置換数は4個以下
                            substitution_cases.append({"Al": al, "Sn": sn, "Zn": zn, "Ga": ga, "Si": si})

    print(f"[INFO] 置換ケース数: {len(substitution_cases)}")

    # === 探索空間の作成 ===
    records = []
    for case_id, case in enumerate(substitution_cases, start=1):
        n_replace = case["Al"] + case["Sn"] + case["Zn"] + case["Ga"]
        if n_replace == 0:
            # 置換しない場合
            records.append({
                "case_id": case_id,
                "pattern_id": 0,
                "Al": 0,
                "Sn": 0,
                "Zn": 0,
                "Ga": 0,
                "replaced_sites": ""
            })
            continue

        for pattern_id in range(1, N_SAMPLE + 1):
            sites = random.sample(ni_labels, n_replace)
            replaced_sites = []

            # ランダムに置換対象を決定
            remaining_sites = sites.copy()
            for elem in ["Al", "Sn", "Zn", "Ga"]:
                n_elem = case[elem]
                if n_elem > 0:
                    chosen = random.sample(remaining_sites, n_elem)
                    remaining_sites = [s for s in remaining_sites if s not in chosen]
                    replaced_sites.extend([f"{s}:{elem}" for s in chosen])

            replaced_sites_str = ";".join(replaced_sites)

            records.append({
                "case_id": case_id,
                "pattern_id": pattern_id,
                "Al": case["Al"],
                "Sn": case["Sn"],
                "Zn": case["Zn"],
                "Ga": case["Ga"],
                "replaced_sites": replaced_sites_str
            })

    # === CSV出力 ===
    df = pd.DataFrame(records)
    df.to_csv(OUTPUT_CSV, index=False)
    print(f"[OK] {OUTPUT_CSV} を出力: {len(df)}行")

if __name__ == "__main__":
    main()


[INFO] Niサイト数: 40
[INFO] 置換ケース数: 126
[OK] exploration_space_test.csv を出力: 126行




In [11]:
# -*- coding: utf-8 -*-
"""
LaNi5 (Niが40サイト) のNiサイトを {Al, Sn, Zn, Ga, Si} で最大4個まで置換。
各「組成（各元素の置換数の組）」につきランダム配置を最大 N_SAMPLE 個生成し、
組成ごとに summary CSV のみを出力します。

要件:
- pymatgen が必要 (pip install pymatgen)
- CIF_PATH は置換前（Niが40ある）スーパーセルを指す
"""

import os
import random
import itertools
import json
import pandas as pd
from pymatgen.core import Structure

# ===================== ユーザ設定 =====================
CIF_PATH   = "LaNi5_2x2x2.cif"     # 置換前 CIF（Ni サイトが40個ある想定）
OUTPUT_DIR = "LaNi5_dope_50random" # 組成ごとの置換情報が保存するcsvファイルを保存するフォルダパス
N_SAMPLE   = 50                    # 各組成で生成する最大サンプル数
GLOBAL_SEED = 42                   # 乱数の固定（再現性）
# =====================================================

ELEMENTS = ["Al", "Sn", "Zn", "Ga", "Si"]
max_dope_al = 4
max_dope_sn = 4
max_dope_zn = 4
max_dope_ga = 4
max_dope_si = 4

def enumerate_substitution_cases():
    cases = []
    for al in range(max_dope_al+1):
        for sn in range(max_dope_sn+1):
            for zn in range(max_dope_zn+1):
                for ga in range(max_dope_ga+1):
                    for si in range(max_dope_si+1):
                        if al + sn + zn + ga + si <= 4:
                            comp = {"Al": al, "Sn": sn, "Zn": zn, "Ga": ga, "Si": si}
                            '''
                            if sum(comp.values()) == 0:
                                continue
                            '''
                            cases.append(comp)
    return cases

def composition_folder_name(comp):
    return "_".join([f"{el}{comp[el]}" for el in ELEMENTS])

def random_selection_for_composition(ni_indices, comp):
    """各元素ごとの置換数に従って Ni サイトをランダムに割り当てる"""
    available = list(ni_indices)
    random.shuffle(available)
    sel_map = {el: [] for el in ELEMENTS}
    ptr = 0
    for el in ELEMENTS:
        n = comp[el]
        if n <= 0:
            continue
        sel_map[el] = available[ptr:ptr+n]
        ptr += n
    return sel_map

def unique_key_from_selection(sel_map):
    return tuple((el, tuple(sorted(sel_map[el]))) for el in ELEMENTS)

def main():
    random.seed(GLOBAL_SEED)

    base_struct = Structure.from_file(CIF_PATH)
    ni_indices = [i for i, site in enumerate(base_struct) if site.specie.symbol == "Ni"]
    print(f"[INFO] CIF中のNiサイト数: {len(ni_indices)}")

    os.makedirs(OUTPUT_DIR, exist_ok=True)

    compositions = enumerate_substitution_cases()
    print(f"[INFO] 組成ケース数: {len(compositions)}")

    for comp in compositions:
        folder = composition_folder_name(comp)
        out_dir = os.path.join(OUTPUT_DIR, folder)
        os.makedirs(out_dir, exist_ok=True)

        seen = set()
        records = []
        attempts = 0
        max_attempts = max(5 * N_SAMPLE, 500)

        while len(records) < N_SAMPLE and attempts < max_attempts:
            attempts += 1
            sel_map = random_selection_for_composition(ni_indices, comp)
            key = unique_key_from_selection(sel_map)
            if key in seen:
                continue
            seen.add(key)

            sample_id = len(records) + 1
            record = {
                "sample_id": sample_id,
                "n_sub_total": sum(comp.values()),
                **{f"n_{el}": comp[el] for el in ELEMENTS},
                **{f"indices_{el}": ",".join(map(str, sorted(sel_map[el]))) for el in ELEMENTS},
                "substitution_map_json": json.dumps(sel_map)
            }
            records.append(record)

        df = pd.DataFrame(records)
        csv_path = os.path.join(out_dir, f"{folder}__summary.csv")
        df.to_csv(csv_path, index=False)

        if len(records) < N_SAMPLE:
            print(f"[INFO] {folder}: {len(records)}/{N_SAMPLE} 件（ユニーク制約で頭打ち）")
        else:
            print(f"[INFO] {folder}: {len(records)} 件 生成")

    print("\n[DONE] CSV生成が完了しました。出力先:", os.path.abspath(OUTPUT_DIR))

if __name__ == "__main__":
    main()


[INFO] CIF中のNiサイト数: 40
[INFO] 組成ケース数: 126
[INFO] Al0_Sn0_Zn0_Ga0_Si0: 1/50 件（ユニーク制約で頭打ち）
[INFO] Al0_Sn0_Zn0_Ga0_Si1: 40/50 件（ユニーク制約で頭打ち）
[INFO] Al0_Sn0_Zn0_Ga0_Si2: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga0_Si3: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga0_Si4: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga1_Si0: 40/50 件（ユニーク制約で頭打ち）
[INFO] Al0_Sn0_Zn0_Ga1_Si1: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga1_Si2: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga1_Si3: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga2_Si0: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga2_Si1: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga2_Si2: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga3_Si0: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga3_Si1: 50 件 生成
[INFO] Al0_Sn0_Zn0_Ga4_Si0: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga0_Si0: 40/50 件（ユニーク制約で頭打ち）
[INFO] Al0_Sn0_Zn1_Ga0_Si1: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga0_Si2: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga0_Si3: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga1_Si0: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga1_Si1: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga1_Si2: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga2_Si0: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga2_Si1: 50 件 生成
[INFO] Al0_Sn0_Zn1_Ga3_Si0: 50 件 生成
[INFO] Al0_