# CaCO3

### Ca^2+ (https://sakaki.issp.u-tokyo.ac.jp/expdata/periodic/Ca)
### CO3^2-

### python3のバージョンを確認

In [1]:
!which python3

/usr/bin/python3


### packageからVESTAを読みだす

In [2]:
from my_package import visualize

### VESTAでPOSCARファイルを可視化

In [38]:
POSCAR = './POSCAR'
visualize.vesta(POSCAR)

### ASEでPOSCARファイルを可視化

In [41]:
from ase.io import read, write
# POSCARファイルの読み込み
atoms = read('POSCAR')

# POSCARファイルの描画
from ase.visualize import view, ngl
ngl.view_ngl(atoms)

HBox(children=(NGLWidget(), VBox(children=(Dropdown(description='Show', options=('All', 'Ag', 'O', 'C'), value…

### Converting a POSCAR file to a DataFrame

In [5]:
from my_package.textfile2df import poscar2df_coords 

df_coords = poscar2df_coords(filename='./POSCAR')
df_coords

Unnamed: 0,central atom,x,y,z,Species
0,1,0.982,0.25,0.889,O
1,2,0.018,0.75,0.111,O
2,3,0.39,0.6342,0.334,O
3,4,0.39,0.8658,0.334,O
4,5,0.61,0.3658,0.666,O
5,6,0.61,0.1342,0.666,O
6,7,0.2109,0.0781,0.2191,Ag
7,8,0.2109,0.4219,0.2191,Ag
8,9,0.7891,0.9219,0.7809,Ag
9,10,0.7891,0.5781,0.7809,Ag


### converting POSCAR.nnlist to df_nnlist

In [6]:
from my_package.textfile2df import nnlist2df
import pandas as pd

#行の表示数の上限を撤廃
pd.set_option('display.max_rows', None)

df_nnlist = nnlist2df(POSCAR_nnlist='POSCAR.nnlist')
df_nnlist

Unnamed: 0,central atom,neighboring atom,distance,X,Y,Z,unitcell_x,unitcell_y,unitcell_z,central species,neighboring species
0,1,1,0.0,0.0,0.0,0.0,0,0,0,O,O
1,1,12,1.300983,-1.206005,0.0,-0.487964,0,0,0,O,C
2,2,2,0.0,0.0,0.0,0.0,0,0,0,O,O
3,2,11,1.300983,1.206005,0.0,0.487964,0,0,0,O,C
4,3,3,0.0,0.0,0.0,0.0,0,0,0,O,O
5,3,11,1.268764,-0.574113,1.106237,-0.237476,0,0,0,O,C
6,4,4,0.0,0.0,0.0,0.0,0,0,0,O,O
7,4,11,1.268764,-0.574113,-1.106237,-0.237476,0,0,0,O,C
8,5,5,0.0,0.0,0.0,0.0,0,0,0,O,O
9,5,12,1.268764,0.574113,-1.106237,0.237476,0,0,0,O,C


#### df_nnlist.groupby('central atom').count()['neighboring atom']で最も要素数の多いもののcentral atomをクラスタとして得る．
#### → クラスタ化されたcentral atomを重複削除する　
#### → 新しいcentral atomのリストを得る

In [7]:
print(df_nnlist.groupby('central atom').count()['neighboring atom'])
# これをフィルター化したものがget_elelem_max_num_filter_list

central atom
1     2
2     2
3     2
4     2
5     2
6     2
7     1
8     1
9     1
10    1
11    4
12    4
Name: neighboring atom, dtype: int64


#### df_nnlist.groupby('central atom').count()['neighboring atom']で最も要素数の多いもののcentral atomをクラスタとして得る．

In [8]:
def get_elem_max_filter(df_nnlist=df_nnlist):
    """
    To get cluster center abs coords from df_coords, Please use this filter.
    
    Input: df_nnlist 
 -> Output: The max number of element 
            in neighboring column of df_nnlist, 
            when df_nnlist groupbyed neighboring column and .count() 
    
    param1: df_nnlist: It should be created by nnlist2df()
    """
    
    elem_max_num = df_nnlist.groupby('central atom').count()['neighboring atom'].max()
    elem_max_num_filter = df_nnlist.groupby('central atom').count()['neighboring atom'] == elem_max_num
    elem_max_num_filter_list = elem_max_num_filter.to_list()
    return elem_max_num_filter_list

#### メモ：df_coords[elem_max_num_filter_list] によりクラスタ中心の絶対座標を得る

In [9]:
# df_coords[elem_max_num_filter_list]

#### df_nnlistで入力：central atom → 出力：neighboring atom を返す関数get_right_valueを作成．

In [10]:
import pandas as pd

# 入力値が左側の数値と同じ場合、対応する右側の数値を返す関数
def get_right_value(input_value):
    """
    To get all central atoms of a cluster(:neighbors), Input a number of cluster center element number(:central atom)
    
    Input: central atom column element In df_nnlist
 -> Output: All neighboring atom column element that Input(:elemnt) match central atom column element
    
    param1: Input: central atom column element In df_nnlist
    """
    
    # 左側の列から対応する行を選択し、右側の数値を取得
    # result = df_nnlist[df_nnlist['central atom'] == input_value]['neighboring atom'].values
    result = df_nnlist[df_nnlist['central atom'] == input_value]['neighboring atom'].tolist()
    return result

# 関数をテスト
get_right_value(1)

[1, 12]

#### クラスタ中心のcentral atom(id的な番号)の一覧を得る

### クラスタに選ばれなかった残りのcentral atom (≒id)の一覧を得る関数

In [11]:
def get_all_non_cluster_atom(df_nnlist=df_nnlist, df_coords=df_coords):
    """
    dependency: get_elem_max_filter(), get_right_value()
    
    To get non-clusterd central atom list, Use this func.
    
    
    Input: DataFrames
 -> Output: a list 
    
    param1: df_nnlist=df_nnlist
    param2: df_coords=df_coords
    """
    elem_max_num_filter_list = get_elem_max_filter(df_nnlist=df_nnlist)
    cluster_central_atom_list = df_coords[elem_max_num_filter_list]['central atom'].tolist()
    cluster_all_atom_list_dubled = [get_right_value(elem) for elem in cluster_central_atom_list]
    # 2重リストを1重リストにflated
    flat_list = [item for sublist in cluster_all_atom_list_dubled for item in sublist]
    # flat_listの重複削除
    cluster_all_atom_set = set(flat_list)
    
    # ここからdf_coordsを使う
    all_central_atom_set = set(df_coords['central atom'].tolist())
    
    # 差分を取得
    diff_central_atom_list = list(all_central_atom_set.difference(cluster_all_atom_set))
    
    return diff_central_atom_list


In [12]:
get_all_non_cluster_atom(df_nnlist=df_nnlist, df_coords=df_coords)

[8, 9, 10, 7]

### diff_central_atom_listをフィルター化する関数

In [13]:
# diff_central_atom_filter = df_coords['central atom'].apply(lambda row: row in diff_central_atom_list)
# これをget_diff_central_atom_filterに関数化

In [14]:
def get_diff_central_atom_filter(df_nnlist=df_nnlist, df_coords=df_coords):
    """
    dependency: get_all_non_cluster_atom()
    
    To convert list to filter, Use thie func.
    
    param1: df_nnlist=df_nnlist
    param2: df_coords=df_coords
    
    """
    
    diff_central_atom_list = get_all_non_cluster_atom(df_nnlist=df_nnlist, df_coords=df_coords)
    diff_central_atom_filter = df_coords['central atom'].apply(lambda row: row in diff_central_atom_list)
    return diff_central_atom_filter

### get_right_valueとcluster_central_atom_listを用いて，クラスタに入っている全原子のid(central atom)を得る．

In [32]:
# # 1重リストに変換
# flat_list = [item for sublist in cluster_all_atom_list_dubled for item in sublist]
# # 重複削除
# cluster_all_atom_set = set(flat_list)
# # cluster_all_atom_list_fix = list(set(flat_list))
# # print(cluster_all_atom_list_fix)

In [33]:
# cluster_all_atom_set

{1, 2, 3, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}

#### cluster_all_atom_list_fixに含まれないcentral atomを抽出して、さらにdf_coord[]から得る．

In [34]:
# all_central_atom_set = set(df_coords['central atom'].tolist())

In [35]:
# all_central_atom_set

{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}

In [36]:
# # 差分を取得
# diff_central_atom_list = list(all_central_atom_set.difference(cluster_all_atom_set))
# diff_central_atom_list

[8, 5, 6, 7]

### diff_central_atom_listをフィルター化する → df_coords[]に代入する

##### ある値がリストに含まれるかを判定する  ->  ex) 20 in diff_central_atom_list

In [37]:
# 20 in diff_central_atom_list

False

In [38]:
# diff_central_atom_filter = df_coords['central atom'].apply(lambda row: row in diff_central_atom_list)
# # print(diff_central_atom_filter)

In [15]:
# df_coords[get_diff_central_atom_filter(df_nnlist=df_nnlist, df_coords=df_coords)]

<!-- ### 2つのフィルターをorで結合する関数 -->

In [None]:
# def get_union_filter(filter1=diff_central_atom_filter, filter2=):


#     return central_atom_filter_fix

### df_coords[]で、クラスタの中心の絶対座標のフィルターと重複削除された残りの絶対座標のフィルターを結合してフィルターする

#### 過不足のないcentral atomのfilterが完成

In [67]:
# diff_central_atom_filter

In [68]:
# get_elem_max_filter(df_nnlist=df_nnlist)

In [19]:
diff_central_atom_filter = get_diff_central_atom_filter()
central_atom_filter_fix = diff_central_atom_filter | get_elem_max_filter(df_nnlist=df_nnlist)
central_atom_filter_fix

0     False
1     False
2     False
3     False
4     False
5     False
6      True
7      True
8      True
9      True
10     True
11     True
Name: central atom, dtype: bool

#### クラスタ後の新しい絶対座標を得る

In [20]:
df_coords_abs_center = df_coords[central_atom_filter_fix]
df_coords_abs_center

Unnamed: 0,central atom,x,y,z,Species
6,7,0.2109,0.0781,0.2191,Ag
7,8,0.2109,0.4219,0.2191,Ag
8,9,0.7891,0.9219,0.7809,Ag
9,10,0.7891,0.5781,0.7809,Ag
10,11,0.27,0.75,0.261,C
11,12,0.73,0.25,0.739,C


#### クラスタ後の新しい絶対座標を文字列→数値に変換 する関数

In [21]:
def df_elem_str2num(df_coords_abs_center=df_coords_abs_center):
    
    # 文字列を数値化する
    df_coords_abs_center['x'] = pd.to_numeric(df_coords_abs_center['x'], errors='coerce')
    df_coords_abs_center['y'] = pd.to_numeric(df_coords_abs_center['y'], errors='coerce')
    df_coords_abs_center['z'] = pd.to_numeric(df_coords_abs_center['z'], errors='coerce')
    
    return df_coords_abs_center

In [22]:
df_coords_abs_center = df_elem_str2num(df_coords_abs_center=df_coords_abs_center)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_coords_abs_center['x'] = pd.to_numeric(df_coords_abs_center['x'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_coords_abs_center['y'] = pd.to_numeric(df_coords_abs_center['y'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_coords_abs_center['z'] = pd.to

In [23]:
# 数値化したクラスタ後の絶対座標
df_coords_abs_center

Unnamed: 0,central atom,x,y,z,Species
6,7,0.2109,0.0781,0.2191,Ag
7,8,0.2109,0.4219,0.2191,Ag
8,9,0.7891,0.9219,0.7809,Ag
9,10,0.7891,0.5781,0.7809,Ag
10,11,0.27,0.75,0.261,C
11,12,0.73,0.25,0.739,C


#### クラスタの相対中心座標を計算

In [24]:
df_nnlist_grouped = df_nnlist.groupby('central atom').mean()
# central atomカラムでgroupby.mean()した後、index列(central atom)をカラムにする   
df_nnlist_grouped = df_nnlist_grouped.reset_index()   

  df_nnlist_grouped = df_nnlist.groupby('central atom').mean()


In [25]:
df_cluster_relative_center = df_nnlist_grouped[central_atom_filter_fix]
df_cluster_relative_center

Unnamed: 0,central atom,neighboring atom,distance,X,Y,Z,unitcell_x,unitcell_y,unitcell_z
6,7,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,8,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,9,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,10,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,11,5.0,0.959628,-0.014445,0.0,-0.003253,0.0,0.0,0.0
11,12,6.0,0.959628,0.014445,0.0,0.003253,0.0,0.0,0.0


### 絶対座標 + 相対座標の計算

In [26]:
def get_clusterd_coords(df_abs=df_coords_abs_center, df_relative=df_cluster_relative_center):
    df_coords_x = df_abs['x'] + df_relative['X']
    df_coords_y = df_abs['y'] + df_relative['Y']
    df_coords_z = df_abs['z'] + df_relative['Z']
    df_coords_species = df_abs['Species']

    # カラム名を指定してデータフレームを作成
    df_coords_fix = pd.DataFrame({
        'X': df_coords_x,
        'Y': df_coords_y,
        'Z': df_coords_z,
        'Species': df_coords_species,
    })

    return df_coords_fix

In [27]:
df_coords_fix = get_clusterd_coords(df_abs=df_coords_abs_center, df_relative=df_cluster_relative_center)
df_coords_fix

Unnamed: 0,X,Y,Z,Species
6,0.2109,0.0781,0.2191,Ag
7,0.2109,0.4219,0.2191,Ag
8,0.7891,0.9219,0.7809,Ag
9,0.7891,0.5781,0.7809,Ag
10,0.255555,0.75,0.257747,C
11,0.744445,0.25,0.742253,C


In [28]:
### 元のPOSCARファイルから5行目までを抽出して、新しいPOSCARファイルに書き込む関数

In [29]:
def df2poscar(df=df_coords_fix, original_file="./POSCAR", output_file="gen_data/POSCAR"):
    """
    Writing the DataFrame(:df_coords_fix) to a POSCAR file.
    param1: DataFrame that has 'X', 'Y', 'Z' columns about coords.
    param2: original POSCAR file
    param3: generated POSCAR file
    """
    
    # df_coords_fixを文字列に変換
    def df2str(df):
        df_coords_fix_str = df[['X', 'Y', 'Z']].to_string(header=False, index=False, index_names=False)
        return df_coords_fix_str
    
    # df_coords_fixから元素種を文字列として抽出する関数
    def return_species(df):
        speies_0 = df['Species'].unique()[0]
        speies_1 = df['Species'].unique()[1]
        num_C = len(df[df['Species'] == speies_0])
        num_Ba = len(df[df['Species'] == speies_1])

        species_line = f"""   {speies_0}   {speies_1}
       {num_C}   {num_Ba}"""

       #  species_line = f"""   {speies_0}
       # {num_C}   """
        
        return species_line

    
    # 元のPOSCARファイルの5行目までを抽出し，新しいファイルに書き込む
    def wirte_header2poscar():
        # 最初の5行を抽出
        with open(original_file, 'r') as infile:
            lines = infile.readlines()[:5]
        # 新しいPOSCARファイルに書き込む
        with open(output_file, 'w') as outfile:
            outfile.writelines(lines)
    
    
    # 新しいPOSCARファイルに書き込んでいく
    def write_species2poscar():
        with open(output_file, 'a') as file:
            # すでに存在するテキストファイルに元素種を追記
            file.write(return_species(df) + '\n')
            # 元素種まで書かれたファイルにDirectという文字をを追記
            file.write('Direct\n' )
            # 直交座標を追記
            file.write(df2str(df))

            
    # 関数をcall
    df2str(df)
    wirte_header2poscar()
    write_species2poscar()
    
    print(f"{output_file} にクラスタ化後の内容が書き込まれました。")

In [31]:
df2poscar()

gen_data/POSCAR にクラスタ化後の内容が書き込まれました。


#### 編集後のposcarファイルをASEで可視化

In [42]:
from ase.io import read, write

output_file="gen_data/POSCAR"
# POSCARファイルの読み込み
atoms = read(output_file)

# POSCARファイルの描画
from ase.visualize import view, ngl
ngl.view_ngl(atoms)

HBox(children=(NGLWidget(), VBox(children=(Dropdown(description='Show', options=('All', 'Ag', 'C'), value='All…

In [40]:
from ase.io import read, write
# POSCARファイルの読み込み
atoms = read('POSCAR')

# POSCARファイルの描画
from ase.visualize import view, ngl
ngl.view_ngl(atoms)

HBox(children=(NGLWidget(), VBox(children=(Dropdown(description='Show', options=('All', 'Ag', 'O', 'C'), value…

#### 編集後のposcarファイルをVESTAで可視化

In [35]:
from my_package import visualize
output_file="gen_data/POSCAR"
visualize.vesta(output_file)






(VESTA-gui:27941): Gtk-CRITICAL **: 16:45:24.234: gtk_box_gadget_distribute: assertion 'size >= 0' failed in GtkNotebook



(VESTA-gui:27941): Gtk-CRITICAL **: 16:45:24.397: gtk_box_gadget_distribute: assertion 'size >= 0' failed in GtkNotebook





(VESTA-gui:27941): Gtk-CRITICAL **: 16:45:24.712: gtk_box_gadget_distribute: assertion 'size >= 0' failed in GtkNotebook



(VESTA-gui:27941): Gtk-CRITICAL **: 16:45:24.917: gtk_box_gadget_distribute: assertion 'size >= 0' failed in GtkNotebook


type=0 format=0 nitems=0 atom=276 252
workspace= 0 : -1, x=114, y=31; Screen_W=2026
