In [25]:
import sys
import os

# モジュールのパスを追加
sys.path.append(os.path.abspath('./PyL3dMD/pyl3dmd'))
import math
import numpy as np
np.math = math

import getaway
import descriptors1set
import descriptors2set
import descriptors3set
import descriptors4set
import descriptors5set
import descriptors6set
from getatomicproperties import getatomicproperties
from getinfofromlammpsdatafile import caladjacencymatrix,buildangleslist,builddihedralslist
import getadjacencyanddistancematrices
import numpy as np
from scipy import sparse
from ase.io import Trajectory,read,write
from ase.neighborlist import NeighborList, natural_cutoffs
import pandas as pd

In [26]:
###############################################################################
# (A) ASEのtrajから分子情報を抜き出す: eachMolsIdx and eachMolsBonds
###############################################################################
def build_molecule_dicts(atoms):
    """
    1) NeighborList を使って分子(connected component)を特定し、
       eachMolsIdx (分子ID -> [グローバル原子インデックス]) と
       eachMolsBonds (分子ID -> (nBonds,2)形状の局所1-based結合リスト) を作成
    2) さらに angles, dihedrals を計算し、
       eachMolsAngles, eachMolsDihedrals 辞書に格納して返す
    3) 各分子の原子質量配列 eachMolsMass も取得して返す
    """

    # (A) 全原子の質量を取得 (shape=(N,))
    all_masses = atoms.get_masses()

    # (B) 原子ごとの自然カットオフを計算して NeighborList を構築
    cutoffs = natural_cutoffs(atoms)
    nl = NeighborList(cutoffs, self_interaction=False, bothways=True)
    nl.update(atoms)

    # (C) スパースな接続行列(CSR形式)を取得 -> connected_components
    connectivity_matrix = nl.get_connectivity_matrix(sparse=True).tocsr()
    n_components, labels = sparse.csgraph.connected_components(connectivity_matrix)

    # (D) 各分子ID -> グローバル原子インデックス
    eachMolsIdx = {i: [] for i in range(n_components)}
    for atom_index, component_id in enumerate(labels):
        eachMolsIdx[component_id].append(atom_index)

    # (E) 分子ごとの結合リスト(bonds), 角度(angles), ジヒドラル(dihedrals), 質量 を構築
    eachMolsBonds = {}
    eachMolsAngles = {}
    eachMolsDihedrals = {}
    eachMolsMass = {}

    for mol_id, atom_indices in eachMolsIdx.items():
        # グローバル -> ローカルのマッピングを作成
        local_map = {g_idx: i_local for i_local, g_idx in enumerate(atom_indices)}

        # (E1) bondsを抽出（1-basedの局所インデックスに変換）
        bond_list = []
        for g_i in atom_indices:
            for g_j in connectivity_matrix[g_i].indices:
                if g_j in atom_indices and g_j > g_i:
                    li = local_map[g_i] + 1
                    lj = local_map[g_j] + 1
                    bond_list.append([li, lj])
        bonds_local = np.array(bond_list, dtype=int)
        eachMolsBonds[mol_id] = bonds_local

        # (E2) 角度(angles), ジヒドラル(dihedrals) を計算するために
        #      まずは 0-based の adjacency を作成
        adjMatrix, adjList = caladjacencymatrix(bonds_local)

        angles_list = buildangleslist(adjList)       # 0-based shape (nAngles, 3)
        dihedrals_list = builddihedralslist(adjList) # 0-based shape (nDihedrals, 4)

        # 1-based に変換
        eachMolsAngles[mol_id] = angles_list + 1
        eachMolsDihedrals[mol_id] = dihedrals_list + 1

        # (E3) 質量配列 (分子内原子の質量)
        # 全体の all_masses を atom_indices でスライス
        eachMolsMass[mol_id] = all_masses[atom_indices]

    print(f"系内の分子数: {len(eachMolsIdx)}")

    # (F) まとめて返す
    return eachMolsIdx,eachMolsBonds,eachMolsAngles,eachMolsDihedrals,eachMolsMass


def calgeometricdistancematrix(xyz):
        """
        Calculate Euclidean Distance of atoms in a molecule - (numAtoms x numAtoms)
        """
        onesMat = np.ones([len(xyz), len(xyz)])
        Gx = onesMat * xyz[:, 0] - np.transpose(onesMat * xyz[:, 0])
        Gy = onesMat * xyz[:, 1] - np.transpose(onesMat * xyz[:, 1])
        Gz = onesMat * xyz[:, 2] - np.transpose(onesMat * xyz[:, 2])

        # Geometric Distance Matrix
        G = np.sqrt(Gx ** 2 + Gy ** 2 + Gz ** 2)
        return G

def caldensity(massBox, volBox):
        """
        simulation calculated density of the fluid [g/cc]
        """
        NA = 6.0221408e+23  # Avogadro's number [1/mol]
        volBoxcc = volBox * 1.0E-24  # volume of the simulation box [cc]

        # Simulation-calculated density [g/cc]
        rho = (massBox / volBoxcc) * (1.0 / NA)
        return rho

###############################################################################
# (B) Main function to process the last N frames of a .traj, 
#     compute GETAWAY descriptors in dictionary form
###############################################################################


def Calculate_descriptors(trajfile, last_n=10, whichdescriptors='all'):
    """
    trajファイルの末尾から `last_n` フレーム分を読み込み、
    whichdescriptors の指定に応じた記述子を計算して返す。

    whichdescriptors は以下のいずれか:
      - 'all'
      - 'set3'
      - 'set4'
      - 'set5'
      - 'set6'
      (set1, set2 は angles / dihedrals が計算できないので現状スキップ)

    戻り値は次のようなリスト:
    [
      {
        'frame': frame_index,
        'molDescriptors': {
           mol_id_0: {...},  # 記述子辞書
           mol_id_1: {...},
           ...
        }
      },
      ...
    ]
    """

    traj = Trajectory(trajfile, mode='r')
    n_frames = len(traj)
    start_frame = max(0, n_frames - last_n)

    results = []

    for frame_idx in range(start_frame, n_frames):
        atoms = traj[frame_idx]

        # (1) 分子情報を取得 -> eeachMolsIdx,eachMolsBonds,eachMolsAngles,eachMolsDihedrals,eachMolsMass
        eachMolsIdx,eachMolsBonds,eachMolsAngles,eachMolsDihedrals,eachMolsMass = build_molecule_dicts(atoms)
        eachMolsAdjMat, eachMolsDisMat = getadjacencyanddistancematrices.getadjANDdismatrices(eachMolsMass, eachMolsBonds)
        charges = atoms.get_charges()
        if charges is None:
            print("Warning: No partial charges found. Set charges to 0.")
            charges = np.zeros_like(eachMolsMass)
        all_masses = atoms.get_masses()
        # atomic properties
        (eachMolsZ,
         eachMolsL,
         eachMolsZv,
         eachMolsRv,
         eachMolsRc,
         eachMolsm,
         eachMolsV,
         eachMolsEn,
         eachMolsalapha,
         eachMolsIP,
         eachMolsEA) = getatomicproperties(all_masses, eachMolsIdx)

        positions = atoms.get_positions()  # (N,3)

        molDescriptors = {}
        numMols = len(eachMolsIdx)

        mass_per_atom = atoms.get_masses()  # shape (N,)
        total_mass_amu = np.sum(mass_per_atom)

        rho = []
        for i in range(n_frames):
            atoms_i = traj[i]
            volume_A3 = atoms_i.get_volume()  # ASE が計算するセル体積 (Å^3)
            rho_i = caldensity(total_mass_amu, volume_A3)  # g/cc
            rho.append(rho_i)
        
        for mol_id in range(numMols):
            # ---- 分子ごとの下準備 ----
            g_indices = eachMolsIdx[mol_id]
            mass_local = eachMolsMass[mol_id]
            angles_local = eachMolsAngles[mol_id]
            dihedrals_local = eachMolsDihedrals[mol_id]
            density_local = rho[mol_id]
            disMat_local = eachMolsDisMat[mol_id]
            adjMat_local = eachMolsAdjMat[mol_id]

            xyz_local = positions[g_indices, :]
            c_local    = charges[g_indices]

            # getatomicproperties の結果から取り出したプロパティ
            m_local  = mass_local
            V_local  = eachMolsV[mol_id]
            En_local = eachMolsEn[mol_id]
            P_local  = eachMolsalapha[mol_id]
            IP_local = eachMolsIP[mol_id]
            EA_local = eachMolsEA[mol_id]

            bonds_local = eachMolsBonds[mol_id]  # 1-based
            G_local = calgeometricdistancematrix(xyz_local)
            
            # 追加情報（例: 分子番号やタイムフレームなど）
            others = {
                'molecule': mol_id + 1,
                'Timeframe': frame_idx,
            
            }

            # ---- whichdescriptors で分岐して計算 ----

            if whichdescriptors == 'all':
                # 例: set6 と set5, set4, set3 のすべてを計算
                #   (実際には descriptors1set / 2set も含めるなら書く)
                #   ここでは例として set6 -> RDF,..., set3 -> GETAWAY, set4 -> CPSA, set5 -> WHIM のように呼ぶ

                # set6
                RDF, ATS, GATS, MATS, MoRSE = descriptors6set.getragmmdescriptors(
                    G_local, c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                # set5
                WHIM = descriptors5set.getwhimdescriptors(
                    xyz_local, c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                # set4
                CPSA = descriptors4set.getcpsadescriptors(
                    xyz_local, c_local, eachMolsRc[mol_id]
                )
                # set3
                GETAWAY = descriptors3set.getgetawayhatsindexes(
                    xyz_local, mass_local, bonds_local,
                    c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                # set2
                GMdes, DDMdes = descriptors2set.calgeometricdescriptors(
                    xyz_local, mass_local, c_local, bonds_local, angles_local, 
                    dihedrals_local, density_local, disMat_local
                    )
                # set1
                TCdes = descriptors1set.caltopologyconnectivitydescriptors(
                    xyz_local, mass_local, bonds_local, angles_local, 
                    dihedrals_local, adjMat_local, disMat_local
                    )

                # まとめる
                res = {**others, **RDF, **ATS, **GATS, **MATS, **MoRSE,
                       **WHIM, **CPSA, **GETAWAY, **GMdes, **DDMdes, **TCdes}
                
            elif whichdescriptors == 'set1':
                # set1 のみ
                TCdes = descriptors1set.caltopologyconnectivitydescriptors(
                    xyz_local, mass_local, bonds_local, angles_local, 
                    dihedrals_local, adjMat_local, disMat_local
                    )
                res = {**TCdes}
            
            elif whichdescriptors == 'set2':
                # set2
                GMdes, DDMdes = descriptors2set.calgeometricdescriptors(
                    xyz_local, mass_local, c_local, bonds_local, angles_local, 
                    dihedrals_local, density_local, disMat_local
                    )
                res = {**GMdes, **DDMdes}

            elif whichdescriptors == 'set3':
                # set3 のみ
                GETAWAY = descriptors3set.getgetawayhatsindexes(
                    xyz_local, mass_local, bonds_local,
                    c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                res = {**others, **GETAWAY}

            elif whichdescriptors == 'set4':
                # set4 => CPSA
                CPSA = descriptors4set.getcpsadescriptors(
                    xyz_local, c_local, eachMolsRc[mol_id]
                )
                res = {**others, **CPSA}

            elif whichdescriptors == 'set5':
                # set5 => WHIM
                WHIM = descriptors5set.getwhimdescriptors(
                    xyz_local, c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                res = {**others, **WHIM}

            elif whichdescriptors == 'set6':
                # set6 => getragmmdescriptors
                RDF, ATS, GATS, MATS, MoRSE = descriptors6set.getragmmdescriptors(
                    G_local, c_local, m_local, V_local, En_local, P_local, IP_local, EA_local
                )
                res = {**others, **RDF, **ATS, **GATS, **MATS, **MoRSE}

            else:
                print(f"Warning: '{whichdescriptors}' not implemented. Skipped.")
                res = {**others} 

            molDescriptors[mol_id] = res

        # フレームごとにまとめる
        frame_res = {
            'frame': frame_idx,
            'molDescriptors': molDescriptors
        }
        results.append(frame_res)

    return results

def Run_pyl3dmd_asetraj(trajfile, last_n=10, output_csv=None, whichdescriptors='all'):
    """
    1) Run process_traj_getaway(trajfile, last_n).
    2) Convert the results to a pandas DataFrame.
    3) Optionally save the DataFrame to CSV if output_csv is given.
    4) Return the DataFrame.
    """
    # Step 1: Get the results list
    results_all = Calculate_descriptors(trajfile, last_n,whichdescriptors)

    # Step 2: Flatten results into a list of row-dicts
    # Each row => one (frame, molID) pair with all descriptor key/value
    rows = []
    for frame_result in results_all:
        frame_idx = frame_result['frame']
        for mol_id, desc_dict in frame_result['molDescriptors'].items():
            # Create one row with columns: frame, molID, plus the descriptor items
            row = {
                'frame': frame_idx,
                'molID': mol_id
            }
            # Merge the descriptor dictionary
            for k, v in desc_dict.items():
                row[k] = v
            rows.append(row)

    # Create a DataFrame
    df = pd.DataFrame(rows)

    # Step 3: Optionally write to CSV
    if output_csv is not None:
        df.to_csv(output_csv, index=False)
        print(f"Saved results to {output_csv}")



In [27]:
Run_pyl3dmd_asetraj("./data/hexadecane_NVE.traj", 10,"output.csv","set2")

系内の分子数: 10
系内の分子数: 10
系内の分子数: 10


  result = np.zeros([size,size])*inf
  Ghinv = 1/Gh
  Ghinv2 = 1/Gh2
  disdisMat = disMat3D/disMat


系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
系内の分子数: 10
Saved results to output.csv


In [25]:
df = pd.read_csv("output.csv")
df

Unnamed: 0,frame,molID,molecule,Timeframe,RDFu1,RDFu2,RDFu3,RDFu4,RDFu5,RDFu6,...,getawayREA17,getawayRmaxEA17,getawayREA18,getawayRmaxEA18,getawayREA19,getawayRmaxEA19,getawayREA20,getawayRmaxEA20,getawayRTEA,getawayRTmaxEA
0,41,0,1,41,3.224357e-13,11.847505,12.971947,10.207647,33.812725,38.509139,...,0.049007,0.006947,0.0,0,0.0,0,0.0,0,186058.701990,0.057527
1,41,1,2,41,8.175495e-14,11.731579,13.413324,13.285887,39.871686,37.360803,...,0.026245,0.003296,0.0,0,0.0,0,0.0,0,170194.499747,0.067645
2,41,2,3,41,1.067800e-13,12.998137,13.231375,13.070553,36.764061,34.241656,...,0.028582,0.003485,0.0,0,0.0,0,0.0,0,179303.805178,0.062914
3,41,3,4,41,4.990955e-13,13.209748,11.762092,11.903472,27.004921,36.443555,...,0.025295,0.003952,0.0,0,0.0,0,0.0,0,160438.966812,0.065198
4,41,4,5,41,8.799427e-13,12.561309,10.447940,8.952172,31.165212,34.039087,...,0.017038,0.002299,0.0,0,0.0,0,0.0,0,147102.669670,0.053005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,50,5,6,50,1.761575e-13,13.277602,12.678418,10.911981,36.337078,39.141426,...,0.020242,0.002481,0.0,0,0.0,0,0.0,0,153161.520248,0.062488
96,50,6,7,50,5.081226e-14,11.016067,12.169567,7.392851,37.349312,36.445851,...,0.022342,0.003131,0.0,0,0.0,0,0.0,0,169266.984461,0.059240
97,50,7,8,50,1.293049e-13,10.792834,12.682429,12.155721,30.464292,30.333082,...,0.013565,0.002391,0.0,0,0.0,0,0.0,0,150627.673993,0.049198
98,50,8,9,50,1.641733e-13,12.360033,13.029033,7.947055,29.911393,36.525588,...,0.022694,0.002825,0.0,0,0.0,0,0.0,0,157899.171956,0.073926


720

In [None]:
import