In [12]:
import sys
import os

# モジュールのパスを追加
sys.path.append(os.path.abspath('./PyL3dMD/pyl3dmd'))

import getaway
import descriptors3set
import descriptors4set
import descriptors5set
import descriptors6set
from getatomicproperties import getatomicproperties

import numpy as np
from scipy import sparse
from ase.io import Trajectory,read,write
from ase.neighborlist import NeighborList, natural_cutoffs
import pandas as pd

In [21]:
###############################################################################
# (A) ASEのtrajから分子情報を抜き出す: eachMolsIdx and eachMolsBonds
###############################################################################
def build_molecule_dicts(atoms):
    # natural cutoffsを使うと今のところ分子数を正しく判別できている
    cutoffs = natural_cutoffs(atoms)
    
    # NeighborList作る
    nl = NeighborList(cutoffs, self_interaction=False, bothways=True)
    nl.update(atoms)
    
    # NeighborListから接続行列を取得し、CSR形式に変換
    connectivity_matrix = nl.get_connectivity_matrix(sparse=True).tocsr()
    
    # 分子数とそれぞれの原子が属する成分IDを取得
    n_components, labels = sparse.csgraph.connected_components(connectivity_matrix)
    
    # インデックスを整理する
    eachMolsIdx = {i: [] for i in range(n_components)}
    for atom_index, component_id in enumerate(labels):
        eachMolsIdx[component_id].append(atom_index)
    
    # 各分子の結合リストを構築
    eachMolsBonds = {}
    for mol_id, atom_indices in eachMolsIdx.items():
        local_map = {g_idx: i_local for i_local, g_idx in enumerate(atom_indices)}
        bond_list = []
        for g_i in atom_indices:
            for g_j in connectivity_matrix[g_i].indices: 
                if g_j in atom_indices and g_j > g_i:  
                    li = local_map[g_i] + 1
                    lj = local_map[g_j] + 1
                    bond_list.append([li, lj])
        eachMolsBonds[mol_id] = np.array(bond_list, dtype=int)
    
    print(f"系内の分子数: {len(eachMolsIdx)}")
    return eachMolsIdx, eachMolsBonds


def calgeometricdistancematrix(xyz):
        """
        Calculate Euclidean Distance of atoms in a molecule - (numAtoms x numAtoms)
        """
        onesMat = np.ones([len(xyz), len(xyz)])
        Gx = onesMat * xyz[:, 0] - np.transpose(onesMat * xyz[:, 0])
        Gy = onesMat * xyz[:, 1] - np.transpose(onesMat * xyz[:, 1])
        Gz = onesMat * xyz[:, 2] - np.transpose(onesMat * xyz[:, 2])

        # Geometric Distance Matrix
        G = np.sqrt(Gx ** 2 + Gy ** 2 + Gz ** 2)
        return G

###############################################################################
# (B) Main function to process the last N frames of a .traj, 
#     compute GETAWAY descriptors in dictionary form
###############################################################################


def Calculate_descriptors(trajfile, last_n=10, whichdescriptors='all'):
    """
    trajファイルの末尾から `last_n` フレーム分を読み込み、
    whichdescriptors の指定に応じた記述子を計算して返す。

    whichdescriptors は以下のいずれか:
      - 'all'
      - 'set3'
      - 'set4'
      - 'set5'
      - 'set6'
      (set1, set2 は angles / dihedrals が計算できないので現状スキップ)

    戻り値は次のようなリスト:
    [
      {
        'frame': frame_index,
        'molDescriptors': {
           mol_id_0: {...},  # 記述子辞書
           mol_id_1: {...},
           ...
        }
      },
      ...
    ]
    """

    traj = Trajectory(trajfile, mode='r')
    n_frames = len(traj)
    start_frame = max(0, n_frames - last_n)

    results = []

    for frame_idx in range(start_frame, n_frames):
        atoms = traj[frame_idx]

        # (1) 分子情報を取得 -> eachMolsIdx, eachMolsBonds
        eachMolsIdx, eachMolsBonds = build_molecule_dicts(atoms)

        # (2) masses と charges
        all_masses = atoms.get_masses()
        charges = atoms.get_charges()
        if charges is None:
            print("Warning: No partial charges found. Set charges to 0.")
            charges = np.zeros_like(all_masses)

        # atomic properties
        (eachMolsZ,
         eachMolsL,
         eachMolsZv,
         eachMolsRv,
         eachMolsRc,
         eachMolsm,
         eachMolsV,
         eachMolsEn,
         eachMolsalapha,
         eachMolsIP,
         eachMolsEA) = getatomicproperties(all_masses, eachMolsIdx)

        positions = atoms.get_positions()  # (N,3)

        molDescriptors = {}
        numMols = len(eachMolsIdx)

        for mol_id in range(numMols):
            # ---- 分子ごとの下準備 ----
            g_indices = eachMolsIdx[mol_id]

            xyz_local = positions[g_indices, :]
            mass_local = all_masses[g_indices]
            c_local    = charges[g_indices]

            # getatomicproperties の結果から取り出したプロパティ
            m_local  = mass_local
            V_local  = eachMolsV[mol_id]
            En_local = eachMolsEn[mol_id]
            P_local  = eachMolsalapha[mol_id]
            IP_local = eachMolsIP[mol_id]
            EA_local = eachMolsEA[mol_id]

            bonds_local = eachMolsBonds[mol_id]  # 1-based
            G_local = calgeometricdistancematrix(xyz_local)
            
            # 追加情報（例: 分子番号やタイムフレームなど）
            others = {
                'molecule': mol_id + 1,
                'Timeframe': frame_idx,
                # 'rho': density  # 必要ならここで
            }

            # ---- whichdescriptors で分岐して計算 ----

            if whichdescriptors == 'all':
                # 例: set6 と set5, set4, set3 のすべてを計算
                #   (実際には descriptors1set / 2set も含めるなら書く)
                #   ここでは例として set6 -> RDF,..., set3 -> GETAWAY, set4 -> CPSA, set5 -> WHIM のように呼ぶ

                # set6
                RDF, ATS, GATS, MATS, MoRSE = descriptors6set.getragmmdescriptors(
                    G_local, c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                # set5
                WHIM = descriptors5set.getwhimdescriptors(
                    xyz_local, c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                # set4
                CPSA = descriptors4set.getcpsadescriptors(
                    xyz_local, c_local, eachMolsRc[mol_id]
                )
                # set3
                GETAWAY = descriptors3set.getgetawayhatsindexes(
                    xyz_local, mass_local, bonds_local,
                    c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )

                # まとめる
                res = {**others, **RDF, **ATS, **GATS, **MATS, **MoRSE,
                       **WHIM, **CPSA, **GETAWAY}

            elif whichdescriptors == 'set3':
                # set3 のみ
                GETAWAY = descriptors3set.getgetawayhatsindexes(
                    xyz_local, mass_local, bonds_local,
                    c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                res = {**others, **GETAWAY}

            elif whichdescriptors == 'set4':
                # set4 => CPSA
                CPSA = descriptors4set.getcpsadescriptors(
                    xyz_local, c_local, eachMolsRc[mol_id]
                )
                res = {**others, **CPSA}

            elif whichdescriptors == 'set5':
                # set5 => WHIM
                WHIM = descriptors5set.getwhimdescriptors(
                    xyz_local, c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                res = {**others, **WHIM}

            elif whichdescriptors == 'set6':
                # set6 => getragmmdescriptors
                RDF, ATS, GATS, MATS, MoRSE = descriptors6set.getragmmdescriptors(
                    G_local, c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                res = {**others, **RDF, **ATS, **GATS, **MATS, **MoRSE}

            else:
                # set1, set2 は angles, dihedrals が必要なので未実装としてスキップ
                print(f"Warning: '{whichdescriptors}' not implemented (need angle/dihedral). Skipped.")
                res = {**others}  # 空辞書に近い

            molDescriptors[mol_id] = res

        # フレームごとにまとめる
        frame_res = {
            'frame': frame_idx,
            'molDescriptors': molDescriptors
        }
        results.append(frame_res)

    return results

def Run_pyl3dmd_asetraj(trajfile, last_n=10, output_csv=None, whichdescriptors='all'):
    """
    1) Run process_traj_getaway(trajfile, last_n).
    2) Convert the results to a pandas DataFrame.
    3) Optionally save the DataFrame to CSV if output_csv is given.
    4) Return the DataFrame.
    """
    # Step 1: Get the results list
    results_all = Calculate_descriptors(trajfile, last_n,whichdescriptors)

    # Step 2: Flatten results into a list of row-dicts
    # Each row => one (frame, molID) pair with all descriptor key/value
    rows = []
    for frame_result in results_all:
        frame_idx = frame_result['frame']
        for mol_id, desc_dict in frame_result['molDescriptors'].items():
            # Create one row with columns: frame, molID, plus the descriptor items
            row = {
                'frame': frame_idx,
                'molID': mol_id
            }
            # Merge the descriptor dictionary
            for k, v in desc_dict.items():
                row[k] = v
            rows.append(row)

    # Create a DataFrame
    df = pd.DataFrame(rows)

    # Step 3: Optionally write to CSV
    if output_csv is not None:
        df.to_csv(output_csv, index=False)
        print(f"Saved results to {output_csv}")



In [None]:
Run_pyl3dmd_asetraj("./data/hexadecane_NVE.traj", 10,"output.csv","all")

系内の分子数: 10


  result = np.zeros([size,size])*inf
  Ginv = 1/G
  Ginv2 = 1/(G**2)
  R = np.sqrt((onesMat*leverage)*(np.transpose(onesMat*leverage)))/G


系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
Saved results to output.csv


In [25]:
df = pd.read_csv("output.csv")
df

Unnamed: 0,frame,molID,molecule,Timeframe,RDFu1,RDFu2,RDFu3,RDFu4,RDFu5,RDFu6,...,getawayREA17,getawayRmaxEA17,getawayREA18,getawayRmaxEA18,getawayREA19,getawayRmaxEA19,getawayREA20,getawayRmaxEA20,getawayRTEA,getawayRTmaxEA
0,41,0,1,41,3.224357e-13,11.847505,12.971947,10.207647,33.812725,38.509139,...,0.049007,0.006947,0.0,0,0.0,0,0.0,0,186058.701990,0.057527
1,41,1,2,41,8.175495e-14,11.731579,13.413324,13.285887,39.871686,37.360803,...,0.026245,0.003296,0.0,0,0.0,0,0.0,0,170194.499747,0.067645
2,41,2,3,41,1.067800e-13,12.998137,13.231375,13.070553,36.764061,34.241656,...,0.028582,0.003485,0.0,0,0.0,0,0.0,0,179303.805178,0.062914
3,41,3,4,41,4.990955e-13,13.209748,11.762092,11.903472,27.004921,36.443555,...,0.025295,0.003952,0.0,0,0.0,0,0.0,0,160438.966812,0.065198
4,41,4,5,41,8.799427e-13,12.561309,10.447940,8.952172,31.165212,34.039087,...,0.017038,0.002299,0.0,0,0.0,0,0.0,0,147102.669670,0.053005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,50,5,6,50,1.761575e-13,13.277602,12.678418,10.911981,36.337078,39.141426,...,0.020242,0.002481,0.0,0,0.0,0,0.0,0,153161.520248,0.062488
96,50,6,7,50,5.081226e-14,11.016067,12.169567,7.392851,37.349312,36.445851,...,0.022342,0.003131,0.0,0,0.0,0,0.0,0,169266.984461,0.059240
97,50,7,8,50,1.293049e-13,10.792834,12.682429,12.155721,30.464292,30.333082,...,0.013565,0.002391,0.0,0,0.0,0,0.0,0,150627.673993,0.049198
98,50,8,9,50,1.641733e-13,12.360033,13.029033,7.947055,29.911393,36.525588,...,0.022694,0.002825,0.0,0,0.0,0,0.0,0,157899.171956,0.073926
