In [54]:
# %load_ext autoreload
%reload_ext autoreload
%autoreload 2
import os
import itertools
from pathlib import Path
from multiprocessing import Pool, cpu_count

import pandas as pd

from package_file_conversion.poscar2df import poscar2df
from package_file_conversion.nnlist2df import nnlist2df
from package_bond_search_algorithm.algolithm_bond_search_for_trigonal_pyramidal_planar_shape import concat_filter
from package_file_conversion.df2poscar import df2poscar

In [55]:
### 炭酸イオンを含む結晶構造ファイルパス一覧
import numpy as np
np.load('/mnt/ssd_elecom_c2c_960gb/scripts/get_some_ion_contained_pos_folder_p_list/CO3_contained_poscar_folder_path_list_ver2.npy', allow_pickle=True)

array([PosixPath('/mnt/ssd_elecom_c2c_960gb/cif/1/00/00/1000033'),
       PosixPath('/mnt/ssd_elecom_c2c_960gb/cif/1/00/00/1000052'),
       PosixPath('/mnt/ssd_elecom_c2c_960gb/cif/1/00/03/1000320'), ...,
       PosixPath('/mnt/ssd_elecom_c2c_960gb/cif/9/01/77/9017727'),
       PosixPath('/mnt/ssd_elecom_c2c_960gb/cif/9/01/78/9017813'),
       PosixPath('/mnt/ssd_elecom_c2c_960gb/cif/9/01/78/9017837')],
      dtype=object)

In [56]:
def mk_polyatomic_ion_replaced_point_poscar(poscar_path,
                                            nnlist_path,
                                            central_atom_symbol='C', 
                                            neighboring_atom_symbol='O', 
                                            bond_length_lower_end=0.99, 
                                            bond_length_upper_end=1.66, 
                                            generated_poscar_path='./ion_replaced_point/POSCAR'):
    # 0-1. POSCAR, POSCAR.nnlistをDataFrameに変換する
    df_poscar = poscar2df(poscar_path=poscar_path)
    df_nnlist = nnlist2df(nnlist_path=nnlist_path)
    
    # 0-2. 多原子イオンを含むかどうかの判定フィルター関数を実行
    central_atom_symbol = central_atom_symbol
    neighboring_atom_symbol = neighboring_atom_symbol
    bond_length_lower_end = bond_length_lower_end
    bond_length_upper_end = bond_length_upper_end
    bool_, ion_central_atom_ids = concat_filter(df_nnlist=df_nnlist,
                  central_atom_symbol=central_atom_symbol,
                  neighboring_atom_symbol=neighboring_atom_symbol,
                  bond_length_lower_end=bond_length_lower_end,
                  bond_length_upper_end=bond_length_upper_end)
    
    if bool_:
        # 1. 多原子イオンを点で置換した絶対座標のDataFrameを作成
        atom_ids_belonging_to_polyatomic_ions = []
        for ion_central_atom_id in ion_central_atom_ids: 
            df_nnlist_ion_central_atom_id_filterd = df_nnlist[df_nnlist['central_atom_id'] == ion_central_atom_id]
            df_nnlist_ion_central_atom_id_filterd_sorted = df_nnlist_ion_central_atom_id_filterd.sort_values('rel_distance')
            df_nnlist_rel_distance_filter = df_nnlist_ion_central_atom_id_filterd_sorted['rel_distance'] < bond_length_upper_end
            df_nnlist_rel_distance_filterd = df_nnlist_ion_central_atom_id_filterd.sort_values('rel_distance')[df_nnlist_rel_distance_filter]
            ## 'neighboring_atom_id'カラムをリストとして取得
            neighboring_atom_ids = df_nnlist_rel_distance_filterd['neighboring_atom_id'].tolist()
            atom_ids_belonging_to_polyatomic_ions.append(neighboring_atom_ids)
        # 2重リストを1重リストに変換
        atom_ids_belonging_to_polyatomic_ions = list(itertools.chain.from_iterable(atom_ids_belonging_to_polyatomic_ions))
        # 数字の順番に並べ替え
        atom_ids_belonging_to_polyatomic_ions = sorted(atom_ids_belonging_to_polyatomic_ions, key=lambda s: int(s))
        # df_poscarから，多原子イオンに属す原子を抽出するフィルターを作成
        atom_ids_belonging_to_polyatomic_ions_filter = df_poscar['atom_id'].apply(lambda s: s in atom_ids_belonging_to_polyatomic_ions)
        # df_poscarから，多原子イオンに属さない原子を抽出するフィルターを作成
        atom_ids_not_belonging_to_polyatomic_ions_filter = ~atom_ids_belonging_to_polyatomic_ions_filter
        # df_poscarから，多原子イオンに属しかつ中心原子となる原子を抽出するフィルターを作成
        atom_ids_that_polyatomic_ions_center_filter = df_poscar['atom_id'].apply(lambda s: s in ion_central_atom_ids)
        # フィルターを結合し，（多原子イオンに含まれない）または（多原子イオンに含まれかつ多原子イオンの中心）となる行を抽出するフィルターを作成
        new_poscar_atom_ids_filter = atom_ids_not_belonging_to_polyatomic_ions_filter | atom_ids_that_polyatomic_ions_center_filter
        # 作成したフィルターを適用し，多原子イオンを点で置換した絶対座標のDataFrameを作成
        df_poscar_abs_coords = df_poscar[new_poscar_atom_ids_filter]

        # 2. 多原子イオンの相対中心座標のDataFrameを作成
        df_nnlist_rel_coords_series_list = []
        for ion_central_atom_id in ion_central_atom_ids: 
            df_nnlist_ion_central_atom_id_filterd = df_nnlist[df_nnlist['central_atom_id'] == ion_central_atom_id]
            df_nnlist_ion_central_atom_id_filterd_sorted = df_nnlist_ion_central_atom_id_filterd.sort_values('rel_distance')
            df_nnlist_rel_distance_filter = df_nnlist_ion_central_atom_id_filterd_sorted['rel_distance'] < bond_length_upper_end    
            df_nnlist_rel_distance_filterd = df_nnlist_ion_central_atom_id_filterd.sort_values('rel_distance')[df_nnlist_rel_distance_filter]
            ## rel_x, re_y, re_zごとに平均をとる
            df_nnlist_rel_distance_filterd_cols_dropped = df_nnlist_rel_distance_filterd[['central_atom_id', 'rel_x', 'rel_y', 'rel_z']]
            # 'central_atom_symbol'カラムでgroupbyしmeanを計算した後，groupbyed列(:'central_atom_symbol'カラム)をカラムにする   
            df_nnlist_rel_distance_filterd_cols_dropped_meaned = df_nnlist_rel_distance_filterd_cols_dropped.groupby('central_atom_id').mean().reset_index()
            df_nnlist_rel_coords_series_list.append(df_nnlist_rel_distance_filterd_cols_dropped_meaned)
        # df_nnlist_rel_coords_series_listのSeriesを文字列化して，df_poscarと同じ形式のDataFrameに整形する
        df_nnlist_rel_coords_list = [str(s).split(' ')[-4:] for s in df_nnlist_rel_coords_series_list]
        df_nnlist_rel_coords = pd.DataFrame(df_nnlist_rel_coords_list, columns=['central_atom_id', 'rel_x', 'rel_y', 'rel_z'])
        # 'rel_x', 'rel_y', 'rel_z'カラムをstr型からfloat型に変換
        df_nnlist_rel_coords[['rel_x', 'rel_y', 'rel_z']] = df_nnlist_rel_coords[['rel_x', 'rel_y', 'rel_z']].astype(float)

        # 3. 1.で得たdf_poscar_abs_coordsと2.で得たdf_nnlist_rel_coordsを足し合わせる
        # 足し合わせ計算の便宜上，2つのDataFrameを1つのDataFrameに結合する
        df_nnlist_poscar_merged = pd.merge(df_poscar_abs_coords, df_nnlist_rel_coords, left_on='atom_id', right_on='central_atom_id', how='left')
        df_nnlist_poscar_merged[['rel_x', 'rel_y', 'rel_z']] = df_nnlist_poscar_merged[['rel_x', 'rel_y', 'rel_z']].fillna(0)
        # 多原子イオンの絶対中心座標と相対座標を足し合わせる
        df_nnlist_poscar_merged['x'] = df_nnlist_poscar_merged['x'] + df_nnlist_poscar_merged['rel_x']
        df_nnlist_poscar_merged['y'] = df_nnlist_poscar_merged['y'] + df_nnlist_poscar_merged['rel_y']
        df_nnlist_poscar_merged['z'] = df_nnlist_poscar_merged['z'] + df_nnlist_poscar_merged['rel_z']
        # 多原子イオンの絶対中心座標と相対座標の足し合わせのために便宜上用意した，不要なカラムを削除
        df_poscar_ion_replaced_point = df_nnlist_poscar_merged.drop(columns=['central_atom_id', 'rel_x', 'rel_y', 'rel_z'])

        # 4. 3.で生成したdf_poscar_ion_replaced_pointをdf2poscar()を用いてPOSCARファイルとして書き出す
        df2poscar(df_poscar_ion_replaced_point, original_poscar_path=poscar_path, generated_poscar_path=generated_poscar_path)
    
    else:
        pass

In [4]:
# poscar_path = '/mnt/ssd_elecom_c2c_960gb/cif/1/00/03/1000320/POSCAR'
# nnlist_path = '/mnt/ssd_elecom_c2c_960gb/cif/1/00/03/1000320/nnlist_5/POSCAR.nnlist'
# # 0. 多原子イオンごとの情報をCLIから受け取る
# central_atom_symbol = 'C'
# neighboring_atom_symbol = 'O'
# bond_length_lower_end = 0.99
# bond_length_upper_end = 1.66
# generated_poscar_path='./ion_replaced_point/POSCAR'
# mk_polyatomic_ion_replaced_point_poscar(poscar_path=poscar_path,
#                                         nnlist_path=nnlist_path,
#                                         central_atom_symbol=central_atom_symbol,
#                                         neighboring_atom_symbol=neighboring_atom_symbol,
#                                         bond_length_lower_end=bond_length_lower_end, 
#                                         bond_length_upper_end=bond_length_upper_end, 
#                                         generated_poscar_path=generated_poscar_path)

./gen_data/POSCAR に多原子イオンを点置換した情報がPOSCARに書き込まれました．


In [8]:
# # コマンドライン引数を受け取る
# parser = argparse.ArgumentParser(description='This script takes five arguments: arg1, arg2, arg3, arg4, arg5 and arg6.',
#                                  usage='%(prog)s <arg1> <arg2> <arg3> <arg4> <arg5> <arg6> \
#                                  \nexample: python3 %(prog)s CO3 C O 0.99 1.66 ../get_some_ion_contained_pos_folder_p_list/CO3_contained_poscar_folder_path_list_ver2.npy)
# parser.add_argument('arg1', help='target_ion_name: CO3')
# parser.add_argument('arg2', help='central_atom_symbol: C')
# parser.add_argument('arg3', help='neighboring_atom_symbol: O')
# parser.add_argument('arg4', help='bond_length_lower_end: 0.99')
# parser.add_argument('arg5', help='bond_length_upper_end: 1.66')
# parser.add_argument('arg6', help='npy_file_path: ../get_some_ion_contained_pos_folder_p_list/CO3_contained_poscar_folder_path_list_ver2.npy')
# args = parser.parse_args()
# target_ion_name = args.arg1
# central_atom_symbol = args.arg2
# neighboring_atom_symbol = args.arg3
# bond_length_lower_end = args.arg4
# bond_length_upper_end = args.arg5
# target_npy_p = args.arg6

target_ion_name = 'CO3'
central_atom_symbol = 'C'
neighboring_atom_symbol = 'O'
bond_length_lower_end = 0.99
bond_length_upper_end = 1.66
target_npy_p = '../get_some_ion_contained_pos_folder_p_list/CO3_contained_poscar_folder_path_list_ver2.npy'
print(f'target_ion_name: {target_ion_name}')
print(f'central_atom_symbol: {central_atom_symbol}')
print(f'neighboring_atom_symbol: {neighboring_atom_symbol}')
print(f'bond_length_lower_end: {bond_length_lower_end}')
print(f'bond_length_upper_end: {bond_length_upper_end}')
print(f'target_npy_p: {target_npy_p}')
print(f'os.path.exists(target_npy_p): {os.path.exists(target_npy_p)}')

some_species_existed_poscar_folder_path_list = np.load(target_npy_p, allow_pickle=True)
print(f"len(some_species_existed_poscar_folder_path_list): {len(some_species_existed_poscar_folder_path_list)}")

target_ion_name: CO3
central_atom_symbol: C
neighboring_atom_symbol: O
bond_length_lower_end: 0.99
bond_length_upper_end: 1.66
target_npy_p: ../get_some_ion_contained_pos_folder_p_list/CO3_contained_poscar_folder_path_list_ver2.npy
os.path.exists(target_npy_p): True
len(some_species_existed_poscar_folder_path_list): 1143


In [57]:
ion_contained_poscar_folder_p_list = np.load(target_npy_p, allow_pickle=True)
# ion_contained_poscar_folder_p_list[0]
# print(ion_contained_nnlist_path_list[0])
# import os; os.path.exists(ion_contained_nnlist_path_list[0])

In [58]:
def wrap_mk_polyatomic_ion_replaced_point_poscar(args):
    return mk_polyatomic_ion_replaced_point_poscar(*args)


def mk_job_args(ion_contained_poscar_folder_p_list, target_ion_name, central_atom_symbol, neighboring_atom_symbol, bond_length_lower_end, bond_length_upper_end):
    # ターゲットとなるイオンの元素種を含むPOSCARとPOSCAR.nnlistのディレクトリパス一覧を取得
    poscar_add_atr = '/POSCAR'
    nnlist_add_str = '/nnlist_5/POSCAR.nnlist'
    gen_poscar_add_str = f'/{target_ion_name}_ion_replaced_point/POSCAR'
    ion_contained_poscar_path_list = [Path(str(p) + poscar_add_atr) for p in ion_contained_poscar_folder_p_list]
    ion_contained_nnlist_path_list = [Path(str(p) + nnlist_add_str) for p in ion_contained_poscar_folder_p_list]
    generated_poscar_path_list = [Path(str(p) + gen_poscar_add_str) for p in ion_contained_poscar_folder_p_list]
    number_of_poscar = len(ion_contained_poscar_folder_p_list)
    central_atom_symbol_list = [central_atom_symbol for i in range(number_of_poscar)]
    neighboring_atom_symbol_list = [neighboring_atom_symbol for i in range(number_of_poscar)]
    bond_length_lower_end_list = [bond_length_lower_end for i in range(number_of_poscar)]
    bond_length_upper_end_list = [bond_length_upper_end for i in range(number_of_poscar)]
    job_args = zip(ion_contained_poscar_path_list,
                   ion_contained_nnlist_path_list,
                   central_atom_symbol_list,
                   neighboring_atom_symbol_list,
                   bond_length_lower_end_list,
                   bond_length_upper_end_list,
                   generated_poscar_path_list)

    return job_args


job_args = mk_job_args(ion_contained_poscar_folder_p_list,
                       target_ion_name=target_ion_name,
                       central_atom_symbol=central_atom_symbol,
                       neighboring_atom_symbol=neighboring_atom_symbol,
                       bond_length_lower_end=bond_length_lower_end,
                       bond_length_upper_end=bond_length_upper_end)

In [59]:
ion_contained_poscar_folder_p_list[0]

PosixPath('/mnt/ssd_elecom_c2c_960gb/cif/1/00/00/1000033')

In [None]:
os.path.split()

In [60]:
wrap_mk_polyatomic_ion_replaced_point_poscar(list(job_args)[0])

IsADirectoryError: [Errno 21] Is a directory: '/mnt/ssd_elecom_c2c_960gb/cif/1/00/00/1000033/CO3_ion_replaced_point/POSCAR'

In [None]:
# 並列化
pp = Pool(cpu_count() - 1)
total = len(ion_contained_poscar_folder_p_list)
try:
    list(tqdm(pp.imap(wrap_nnlist2df_and_concat_filter, job_args), total=total))
finally:
    pp.close()
    pp.join()