In [4]:
import pandas as pd
import numpy as np
from package_file_conversion.nnlist2df import nnlist2df

In [21]:
nnlist_path = 'sample_test_files/1000459//nnlist_5/POSCAR.nnlist'
df_nnlist = nnlist2df(nnlist_path=nnlist_path)

In [25]:
def filter_2(df_nnlist):
    """
    2．POSCAR.nnlistにおいて，原子Cから0-2．のCO結合距離以内に，原子Oを3つ以上含む，中心原子Cが存在するかどうか判定．
    → 存在する場合、True値，{中心原子Cの'central_atom_id': そのneighborsの'central_atom_id'}の辞書の2つを返す．
    → 存在しない場合，False値，空の辞書を返す．
    
    Usage: 
    ------
    bool_2, dict_2 = filter_2(df_nnlist=df_nnlist)
    
    Parameters:
    -----------
    df_nnlist: pd.DataFrame
    
    Returns:
    --------
    bool_2: bool
    dict_2: dict
    """
    df_nnlist_squeezed_center_atom = df_nnlist[df_nnlist['central_atom_symbol'] == 'C']
    df_nnlist_group_dict = df_nnlist[df_nnlist['central_atom_symbol'] == 'C'].groupby('central_atom_id').groups
    df_nnlist_central_atom_ids = np.array(list(df_nnlist_group_dict.keys()))
    bool_list = []
    for key in df_nnlist_central_atom_ids:
        bool_list.append(df_nnlist.iloc[df_nnlist_group_dict[key]]['neighboring_atom_symbol'].tolist().count('O') >= 3)
    df_nnlist_central_atom_ids_fillterd = df_nnlist_central_atom_ids[bool_list]
    bool_filter_2 = len(df_nnlist_central_atom_ids_fillterd) >= 1
    filtered_df_nnlist_group_dict = {key: df_nnlist_group_dict[key] for key in df_nnlist_group_dict.keys() if key in df_nnlist_central_atom_ids_fillterd}
    
    return bool_filter_2, filtered_df_nnlist_group_dict


def filter_3(df_nnlist, dict_2):
    """
    3．2．の中心原子Cに対して，1番近い原子がOであり，かつ2番目に近い原子もOであり，かつ3番目に近い原子もOである，中心原子Cが存在するかどうか判定．
    → 存在する場合，True値，{中心原子Cの'central_atom_id': そのneighborsの'central_atom_id'}の辞書の2つを返す．
    → 存在しない場合，False値，空の辞書を返す．
    
    Usage:
    ------
    bool_3, dict_3 = filter_3(df_nnlist=df_nnlist, dict_2=dict_2)
    
    Parameters:
    -----------
    df_nnlist: pd.DataFrame
    dict_2: dict
    
    Returns:
    --------
    bool_3: bool
    dict_3: dict
    """
    bool_list_3 = []
    for k, v in dict_2.items():
        bool_list_3.append(set(df_nnlist.iloc[dict_2[k]].sort_values(by='rel_distance')['neighboring_atom_symbol'].tolist()[1:4]) == {'O'})
    df_nnlist_central_atom_ids_fillterd_3 = np.array(list(dict_2.keys()))[bool_list_3]
    filtered_3_df_nnlist_group_dict = {key: dict_2[key] for key in dict_2.keys() if key in df_nnlist_central_atom_ids_fillterd_3}
    bool_filter_3 = len(df_nnlist_central_atom_ids_fillterd_3) >= 1

    return bool_filter_3, filtered_3_df_nnlist_group_dict


def filter_4(df_nnlist, dict_3):
    """
    4．2．の中心原子Cに対して4番目に近い原子が存在しないかを判定．
    → 存在しない場合，True値，{中心原子Cの'central_atom_id': そのneighborsの'central_atom_id'}の辞書の2つを返す．
    → 存在する場合，False値，空の辞書の2つを返す．
    
    Usage:
    ------
    bool_4, dict_4 = filter_4(df_nnlist=df_nnlist, dict_3=dict_3)
    
    Parameters:
    -----------
    df_nnlist: pd.DataFrame
    dict_3: dict
    
    Returns:
    --------
    bool_4: bool
    dict_4: dict
    """
    bool_list_4 = []
    for k, v in dict_3.items():
        bool_list_4.append(len(df_nnlist.iloc[dict_3[k]].sort_values(by='rel_distance')['neighboring_atom_symbol'].tolist()) == 4)
    df_nnlist_central_atom_ids_fillterd_4 = np.array(list(dict_3.keys()))[bool_list_4]
    filtered_4_df_nnlist_group_dict = {key: dict_3[key] for key in dict_3.keys() if key in df_nnlist_central_atom_ids_fillterd_4}
    bool_filter_4 = len(df_nnlist_central_atom_ids_fillterd_4) >= 1

    return bool_filter_4, filtered_4_df_nnlist_group_dict


def filter_5(df_nnlist, dict_3):
    """
    5．2．の中心原子Cに対して4番目に近い原子が，Cに3番目に近い原子OとCのCO距離より大きいCが存在するどうかを判定．
    → 存在する場合，True値，{中心原子Cの'central_atom_id': そのneighborsの'central_atom_id'}の辞書の2つを返す．
    → 存在しない場合，False値，空の辞書を返す．
    
    Usage:
    ------
    bool_5, dict_5 = filter_5(df_nnlist=df_nnlist, dict_3=dict_3)
    
    Parameters:
    -----------
    df_nnlist: pd.DataFrame
    dict_3: dict
    
    Returns:
    --------
    bool_5: bool
    dict_5: dict
    """
    bool_list_5 = []
    for k, v in dict_3.items():
        third_CO_bond_dist = df_nnlist.iloc[dict_3[k]].sort_values(by='rel_distance')['rel_distance'].tolist()[3]
        forth_dist = df_nnlist.iloc[dict_3[k]].sort_values(by='rel_distance')['rel_distance'].tolist()[4]
        bool_list_5.append(third_CO_bond_dist < forth_dist)
    df_nnlist_central_atom_ids_fillterd_5 = np.array(list(dict_3.keys()))[bool_list_5]
    filtered_5_df_nnlist_group_dict = {key: dict_3[key] for key in dict_3.keys() if key in df_nnlist_central_atom_ids_fillterd_5}
    bool_filter_5 = len(df_nnlist_central_atom_ids_fillterd_5) >= 1

    return bool_filter_5, filtered_5_df_nnlist_group_dict


def filter_6(df_nnlist, dict_3):
    """
    6．3．の3つの原子O全てに対して，3．の中心の原子Cとの距離以内に，中心原子C以外の別の原子が存在しないかどうかを判定．
    → 存在しない場合，True値，中心原子Cの'central_atom_id'のndarrayの2つを返す．
    → 存在する場合，False値，空のndarrayを返す．
    
    Usage:
    ------
    bool_6, C_ids = filter_6(df_nnlist=df_nnlist, dict_3=dict_3)
    
    Parameters:
    -----------
    df_nnlist: pd.DataFrame
    dict_3: dict
    
    Returns:
    --------
    bool_6: bool
    C_ids: ndarray
    """
    bool_list_6 = []
    for k, v in dict_3.items():
        df_nnlist.iloc[dict_3[k]].sort_values(by='rel_distance')
        # C周りのO３つのindex
        indices = df_nnlist.iloc[dict_3[k]].sort_values(by='rel_distance').index[1:4]
        O_ids = df_nnlist.iloc[indices].apply(lambda row: row['neighboring_atom_id'], axis = 1).tolist()
        for O_id in O_ids:
            bool_list_temp = []
            bool_list_temp.append(df_nnlist[df_nnlist['central_atom_id'] == O_id].sort_values('rel_distance')['neighboring_atom_symbol'].tolist()[1] == 'C')
        if set(bool_list_temp) == {True}:
            bool_list_6.append(True)
    C_ids = np.array(list(dict_3.keys()))[bool_list_6]
    bool_filter_6 = len(bool_list_6) >= 1
        
    return bool_filter_6, C_ids


def concat_filter(df_nnlist):
    """
    filter_2()~filter_6()の関数を用いて，POSCAR.nnlistを用いて，POSCARファイルに炭酸イオンを含むかどうかの判定algolismを作成．
    → True値が返された場合，炭酸イオンを含む．
    → False値が返され場合，炭酸イオンを含まない．
    
    Usage:
    ------
    concat_filter(df_nnlist=df_nnlist)
    
    Parameters:
    -----------
    df_nnlist: pd.DataFrame
    
    Returns:
    --------
    bool: True or False
    """
    bool_2, dict_2 = filter_2(df_nnlist=df_nnlist)
    if bool_2:
        bool_3, dict_3 = filter_3(df_nnlist=df_nnlist, dict_2=dict_2)
        if bool_3:
            bool_4, dict_4 = filter_4(df_nnlist=df_nnlist, dict_3=dict_3)
            if bool_4:
                return True
            else:
                bool_5, dict_5 = filter_5(df_nnlist=df_nnlist, dict_3=dict_3)
                if bool_5:
                    bool_6, C_ids = filter_6(df_nnlist=df_nnlist, dict_3=dict_3)
                    if bool_6:
                        return True
                else:
                    return False
        else:
            return False
    else:
        return False

In [26]:
concat_filter(df_nnlist=df_nnlist)

True