In [5]:
import pandas as pd
# 表示オプションを設定
pd.set_option('display.float_format', '{:.6f}'.format)

In [8]:
def poscar2df(poscar_path='POSCAR'):
    """
    This func converts POSCAR to pd.DataFrame.

    Usage:
    -------
    df_nnlist = nnlist2df(nnlist_path='POSCAR.nnlist')

    Parameter:
    ------------
    nnlist_path: str or pathlib.Path

    Return:
    -------
    pd.DataFrame
    """
    
    
    # データを読み込みます。列名を指定し、区切り文字として空白を指定します。
    df = pd.read_csv(poscar_path, sep='\\s+', header=None)
    # 列名を設定します。
    df.columns = ["x", "y", "z"]
    # # cast int64 to str for central_atom_id column
    # df['central_atom_id'] = df['central_atom_id'].astype(str)
    # # cast int64 to str for neighboring_atom_id column
    # df['neighboring_atom_id'] = df['neighboring_atom_id'].astype(str)

    return df


In [10]:
poscar2df(poscar_path='sample_test_files/POSCAR')

ValueError: Length mismatch: Expected axis has 18 elements, new values have 3 elements

### poscar2df()を作成

In [1]:
import pandas as pd
# 表示オプションを設定
pd.set_option('display.float_format', '{:.6f}'.format)

In [5]:
# poscar_path='sample_test_files/POSCAR'
poscar_path='/mnt/ssd_elecom_c2c_960gb/cif/1/00/00/1000033/POSCAR'
# POSCARファイルを読み込む
with open(poscar_path, 'r') as file:
    lines = file.readlines()
# print(lines[5])
# 原子種とその数を含む行を抽出
species_names = lines[5].split()
# print(species_names)
ions_per_species = [int(s) for s in lines[6].split()]
# print(ions_per_species)
# 原子種のリストと対応する数のリストを抽出
species_list = [species_name for species_name, count in zip(species_names, ions_per_species) for _ in range(count)]
# 原子種とその数からPandasのSeriesを作成
df_species = pd.DataFrame(species_list, columns=['atom_symbol'])
print(df_species)

   atom_symbol
0           Ba
1           Ba
2           Ba
3           Ba
4            C
5            C
6            C
7            C
8            O
9            O
10           O
11           O
12           O
13           O
14           O
15           O
16           O
17           O
18           O
19           O


In [25]:
with open(poscar_path, 'r') as file:
    lines = file.readlines()
# 構造情報が始まる行を特定
for i, line in enumerate(lines):
    if ('Direct' in line) or ('Cartesian' in line):
        start_line = i + 1
# 原子座標データを取得
ion_positions_list = lines[start_line:]
df_xyz = pd.DataFrame([ion_position_line.split() for ion_position_line in ion_positions_list], columns=['x', 'y', 'z'])

In [26]:
df_xyz

Unnamed: 0,x,y,z
0,0.25,0.41631,0.7549
1,0.25,0.08369,0.2549
2,0.75,0.58369,0.2451
3,0.75,0.91631,0.7451
4,0.25,0.757,0.919
5,0.25,0.743,0.419
6,0.75,0.243,0.081
7,0.75,0.257,0.581
8,0.25,0.9011,0.9122
9,0.25,0.5989,0.4122


In [39]:
df_poscar = pd.concat([df_xyz, df_species], axis=1)
df_poscar

Unnamed: 0,x,y,z,atom_symbol
0,0.25,0.41631,0.7549,Ba
1,0.25,0.08369,0.2549,Ba
2,0.75,0.58369,0.2451,Ba
3,0.75,0.91631,0.7451,Ba
4,0.25,0.757,0.919,C
5,0.25,0.743,0.419,C
6,0.75,0.243,0.081,C
7,0.75,0.257,0.581,C
8,0.25,0.9011,0.9122,O
9,0.25,0.5989,0.4122,O


### 関数化

In [31]:
import pandas as pd
# 表示オプションを設定
pd.set_option('display.float_format', '{:.6f}'.format)


def poscar2df(poscar_path='./POSCAR'):
    """
    This func converts POSCAR to pd.DataFrame.

    Usage:
    -------
    df_poscar = poscar2df(poscar_path=poscar_path)

    Parameter:
    ------------
    poscar_path: str or pathlib.Path

    Return:
    -------
    pd.DataFrame
    """
    # POSCARファイルを読み込む
    with open(poscar_path, 'r') as file:
        lines = file.readlines()

    # 原子種とその数を含む行を抽出
    species_names = lines[5].split()
    ions_per_species = [int(s) for s in lines[6].split()]
    # 原子種のリストと対応する数のリストを抽出
    species_list = [species_name for species_name, count in zip(species_names, ions_per_species) for _ in range(count)]
    # 原子種とその数からDataFrameを作成
    df_species = pd.DataFrame(species_list, columns=['atom_symbol'])
    
    # 原子ごと（行ごと）に，idを割り振る
    df_atom_ids = pd.DataFrame(list(range(1, len(df_species) + 1)), columns=['atom_id'])
    
    # 構造情報が始まる行を特定
    for i, line in enumerate(lines):
        if ('Direct' in line) or ('Cartesian' in line):
            start_line = i + 1
    # 原子座標データを取得
    ion_positions_list = lines[start_line:]
    df_xyz = pd.DataFrame([ion_position_line.split() for ion_position_line in ion_positions_list], columns=['x', 'y', 'z'])

    # 原子idのDataFrameと構造情報のDataFrameと原子集のDataFrameを結合
    df_poscar = pd.concat([df_atom_ids, df_xyz, df_species], axis=1)
    
    # 列ごとにデータ型を変更
    df_poscar['atom_id'] = df_poscar['atom_id'].astype(str)
    df_poscar[['x', 'y', 'z']] = df_poscar[['x', 'y', 'z']].astype(float)

    return df_poscar


if __name__ == '__main__':
    # poscar_path='sample_test_files/POSCAR'
    poscar_path='/mnt/ssd_elecom_c2c_960gb/cif/1/00/00/1000033/POSCAR'
    df_poscar = poscar2df(poscar_path=poscar_path)

In [32]:
df_poscar = poscar2df(poscar_path=poscar_path)

In [33]:
df_poscar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   atom_id      20 non-null     object 
 1   x            20 non-null     float64
 2   y            20 non-null     float64
 3   z            20 non-null     float64
 4   atom_symbol  20 non-null     object 
dtypes: float64(3), object(2)
memory usage: 928.0+ bytes


In [34]:
df_poscar

Unnamed: 0,atom_id,x,y,z,atom_symbol
0,1,0.25,0.41631,0.7549,Ba
1,2,0.25,0.08369,0.2549,Ba
2,3,0.75,0.58369,0.2451,Ba
3,4,0.75,0.91631,0.7451,Ba
4,5,0.25,0.757,0.919,C
5,6,0.25,0.743,0.419,C
6,7,0.75,0.243,0.081,C
7,8,0.75,0.257,0.581,C
8,9,0.25,0.9011,0.9122,O
9,10,0.25,0.5989,0.4122,O


In [36]:
def poscar2df_xyz(poscar_path):
    
    
    with open(poscar_path, 'r') as file:
        lines = file.readlines()
        
    # 構造情報が始まる行を特定
    for i, line in enumerate(lines):
        if 'Direct' in line or 'Cartesian' in line:
            start_line = i + 1
        break

    # 原子座標データを取得
    ion_positions_list = lines[start_line:]
    df_xyz = pd.DataFrame([ion_position_line.split() for ion_position_line in ion_positions_list], columns=['x', 'y', 'z'])

    
    
    
    
    
    return df_xyz

In [37]:
poscar2df_xyz(poscar_path='sample_test_files/POSCAR')

['  0.250000000000000   0.757000000000000   0.919000000000000 \n', '  0.250000000000000   0.743000000000000   0.419000000000000 \n', '  0.750000000000000   0.243000000000000   0.081000000000000 \n', '  0.750000000000000   0.257000000000000   0.581000000000000 \n', '  0.250000000000000   0.416310000000000   0.754900000000000 \n', '  0.250000000000000   0.083690000000000   0.254900000000000 \n', '  0.750000000000000   0.583690000000000   0.245100000000000 \n', '  0.750000000000000   0.916310000000000   0.745100000000000 \n', '  0.250000000000000   0.901100000000000   0.912200000000000 \n', '  0.250000000000000   0.598900000000000   0.412200000000000 \n', '  0.750000000000000   0.098900000000000   0.087800000000000 \n', '  0.750000000000000   0.401100000000000   0.587800000000000 \n', '  0.459500000000000   0.683900000000000   0.921000000000000 \n', '  0.040500000000000   0.816100000000000   0.421000000000000 \n', '  0.540500000000000   0.316100000000000   0.079000000000000 \n', '  0.0405

Unnamed: 0,x,y,z
0,0.25,0.757,0.919
1,0.25,0.743,0.419
2,0.75,0.243,0.081
3,0.75,0.257,0.581
4,0.25,0.41631,0.7549
5,0.25,0.08369,0.2549
6,0.75,0.58369,0.2451
7,0.75,0.91631,0.7451
8,0.25,0.9011,0.9122
9,0.25,0.5989,0.4122


In [15]:
def poscar2df_species(poscar_path='sample_test_files/POSCAR'):    
        # POSCARファイルを読み込む
        with open(poscar_path, 'r') as file:
            lines = file.readlines()

        # 原子種とその数を含む行を抽出
        species_names = lines[5].split()
        ions_per_species = [int(s) for s in lines[6].split()]

        # # 原子種のリストと対応する数のリストを抽出
        species_list = [elem for elem, count in zip(species_names, ions_per_species) for _ in range(count)]

        # # 原子種とその数からPandasのSeriesを作成
        df_species = pd.DataFrame(species_list, columns=['atom_symbol'])

        return df_species

In [16]:
poscar2df_species(poscar_path='sample_test_files/POSCAR')

Unnamed: 0,atom_symbol
0,C
1,C
2,C
3,C
4,Ba
5,Ba
6,Ba
7,Ba
8,O
9,O


In [26]:
# poscar_path='sample_test_files/POSCAR'
poscar_path='/mnt/ssd_elecom_c2c_960gb/cif/1/00/00/1000033/POSCAR'
# POSCARファイルを読み込む
with open(poscar_path, 'r') as file:
    lines = file.readlines()
print(lines[5])
# 原子種とその数を含む行を抽出
species_names = lines[5].split()
print(species_names)
ions_per_species = [int(s) for s in lines[6].split()]
print(ions_per_species)
# 原子種のリストと対応する数のリストを抽出
species_list = [species_name for species_name, count in zip(species_names, ions_per_species) for _ in range(count)]
# 原子種とその数からPandasのSeriesを作成
df_species = pd.DataFrame(species_list, columns=['atom_symbol'])
print(df_species)

  Ba   C   O

['Ba', 'C', 'O']
[4, 4, 12]
   atom_symbol
0           Ba
1           Ba
2           Ba
3           Ba
4            C
5            C
6            C
7            C
8            O
9            O
10           O
11           O
12           O
13           O
14           O
15           O
16           O
17           O
18           O
19           O


In [27]:
def poscar2df_species(poscar_path='/mnt/ssd_elecom_c2c_960gb/cif/1/00/00/1000033/POSCAR'):
    """
    This func converts POSCAR to pd.DataFrame.

    Usage:
    -------
    df_nnlist = nnlist2df(nnlist_path='POSCAR.nnlist')

    Parameter:
    ------------
    nnlist_path: str or pathlib.Path

    Return:
    -------
    pd.DataFrame
    """
    # POSCARファイルを読み込む
    with open(poscar_path, 'r') as file:
        lines = file.readlines()
    # 原子種とその数を含む行（：5行目）を抽出
    species_names = lines[5].split()
    ions_per_species = [int(s) for s in lines[6].split()]
    # 原子種のリストと対応する数のリストを抽出
    species_list = [species_name for species_name, count in zip(species_names, ions_per_species) for _ in range(count)]
    # 原子種とその数からPandasのSeriesを作成
    df_species = pd.DataFrame(species_list, columns=['atom_symbol'])
    
    return df_species