In [6]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import re

In [43]:
filename = 'btxt.txt' # the molecule file is btxt.txt (stands for batch_txt.txt)
contents = None
with open(filename, 'r') as openedfile: # read in all the lines
    contents = openedfile.readlines()
contents.append('\n')                   # append a newline at the end

In [175]:
pre_step_pat = re.compile(r"^ITEM: TIMESTEP\n$") # creeate regex for start
end_pattern = re.compile(r"^\n$")                 # and end of a block
pre_box = re.compile(r"^ITEM: BOX BOUNDS pp pp pp\n$") # before the bounds
pre_atom = re.compile(r"^ITEM: ATOMS element x y z\n$") # before an atom
bounds_pat = re.compile(r" ([\d].[\d]*)\n")      # to pick out the boundries
position_pat = re.compile(r" ([-]?[\d].[\d]*)")  # to pick out the positions
atom_pat = re.compile(r"([A-Za-z]{1,2})")            # to pick out the atom type

In [226]:
def create_boxes():
    '''Returns a list of boxes (which are also lists) which each contain:
    1: the time step
    2: the box boundries
    3: the remainder are the atoms
    '''
    boxes = []  # sets up the list of boxes    
    new_box = None            # bool if a new box needs to be created or not
    next_timestep_idx = None  # index that contains the timestep
    next_bounds_idx = None    # index that contains box boundary info
    next_atom_idx = None      # index that contains atom position info
    cb = None                 # current box index (= len(boxes) - 1)
    this_is_atom = False      # bool if current index contains an atom info

    Labels = ["timestep","boundry"] # create a list of lables for the pd dataframe
    Atoms_labled = False            # will turn to true once atoms are labeled
    atom_counter = 0

    
    # at the end of this step, all lines still have a newline at the end
    for i,c in enumerate(contents):

        if bool(pre_step_pat.search(c)): # if we are at the start of a new box
            new_box = True
            next_timestep_idx = i+1

        if new_box:
            boxes.append([])
            cb = len(boxes) - 1  # represents the index of the current box
            new_box = False

        if bool(end_pattern.search(c)) and cb != None: # if it's the end of one boxes' info
            this_is_atom = False
            next_atom_idx = None        # stop collecting the atom data
            Atoms_labled = True         # only create the atom lables once
            
        if bool(pre_atom.search(c)):    # indicates the next element is an atom
            this_is_atom = True
            next_atom_idx = i+1

        if bool(pre_box.search(c)):    # indicates the index of the next bounds info
            next_bounds_idx = i+1    

        if i == next_timestep_idx and cb != None: # the first element of a box is the timestep
            boxes[cb].append(float(c[:-1]))         # strip the newline, and make it an int

        if i == next_bounds_idx:  # the second element of a box is the bounds
            num = float(bounds_pat.findall(c)[0])
            boxes[cb].append(num)

        if i == next_atom_idx and this_is_atom:  # the second element of a box is the bounds
            positions = [float(num) for num in position_pat.findall(c)]
            for p in positions: boxes[cb].append(p);
            next_atom_idx += 1
            
            if not Atoms_labled:    # on the first run, create a list of the atoms names/ ids
                atom = atom_pat.findall(c)[0]
                Labels.append(atom + str(atom_counter) + 'x') 
                Labels.append(atom + str(atom_counter) + 'y')
                Labels.append(atom + str(atom_counter) + 'z')
                atom_counter += 1

    return boxes, Labels

In [227]:
boxes, Labels = create_boxes() # create the appropriate data and labels

In [230]:
df = pd.DataFrame(boxes) # make the pandas dataframe and label it
df.columns = Labels

In [232]:
goodtimes = df[df['timestep'] >= 38445] # we only care about the timesteps
                                        # after things have 'stabalized'

In [237]:
boundset = lambda bound, num: bound%num # creates the appropriate position
boundset(4,2.2)                         # for the atoms based on the mod of the bounds

1.7999999999999998

In [258]:
goodtimes

Unnamed: 0,timestep,boundry,F0x,F0y,F0z,Be1x,Be1y,Be1z,F2x,F2y,...,Li98z,F99x,F99y,F99z,Li100x,Li100y,Li100z,Be101x,Be101y,Be101z
1089,38445.0,5.35854,0.855397,-2.673989,-8.334906,-0.582539,-3.128704,-7.921900,-2.502117,-2.620931,...,0.642221,-0.162514,0.634153,3.292919,6.640273,1.618582,3.263494,-1.338923,0.037007,2.792048
1090,38445.5,5.35838,0.852697,-2.669997,-8.336902,-0.581856,-3.130544,-7.921926,-2.503063,-2.620059,...,0.634288,-0.164857,0.635619,3.291999,6.637580,1.612785,3.252708,-1.345660,0.038591,2.788705
1091,38446.0,5.35840,0.849922,-2.666259,-8.339177,-0.581383,-3.132250,-7.922512,-2.504028,-2.619330,...,0.626574,-0.166751,0.637211,3.291238,6.635322,1.607200,3.242773,-1.352253,0.039787,2.785198
1092,38446.5,5.35843,0.846982,-2.662590,-8.341516,-0.581073,-3.133764,-7.923408,-2.504963,-2.618634,...,0.618874,-0.168267,0.638943,3.290501,6.633228,1.601631,3.233314,-1.358780,0.040630,2.781395
1093,38447.0,5.35834,0.843784,-2.658835,-8.343819,-0.580895,-3.135059,-7.924482,-2.505853,-2.617896,...,0.610982,-0.169459,0.640846,3.289698,6.631115,1.595896,3.224010,-1.365337,0.041138,2.777182
1094,38447.5,5.35844,0.840552,-2.655336,-8.346392,-0.580879,-3.136226,-7.926076,-2.506788,-2.617286,...,0.603374,-0.170289,0.642844,3.289035,6.629444,1.590390,3.215532,-1.371588,0.041282,2.772988
1095,38448.0,5.35827,0.836919,-2.651544,-8.348777,-0.580966,-3.137105,-7.927675,-2.507653,-2.616524,...,0.595302,-0.170837,0.645065,3.288167,6.627509,1.584467,3.206822,-1.377944,0.041058,2.768246
1096,38448.5,5.35808,0.833355,-2.648018,-8.351045,-0.581159,-3.137724,-7.929411,-2.508461,-2.615805,...,0.587756,-0.171064,0.647271,3.287330,6.625823,1.578896,3.199044,-1.383659,0.040487,2.763697
1097,38449.0,5.35806,0.829601,-2.644565,-8.353718,-0.581486,-3.138264,-7.931729,-2.509387,-2.615168,...,0.580133,-0.171020,0.649664,3.286574,6.624503,1.573267,3.191656,-1.389230,0.039518,2.759133
1098,38449.5,5.35805,0.825726,-2.641182,-8.356453,-0.581910,-3.138602,-7.934297,-2.510333,-2.614537,...,0.572609,-0.170716,0.652162,3.285803,6.623365,1.567666,3.184736,-1.394421,0.038152,2.754643


In [257]:
goodbounds = goodtimes.copy()
for i in range(len(goodtimes)):
    bound = goodtimes.iloc[i]['boundry']
    for j in goodtimes.iloc[i][2:]:
        goodbounds.iloc[i][j]
        print(j)
    break

0.8553969496
-2.673989126
-8.3349055679
-0.5825388844
-3.1287037904
-7.9219003529
-2.5021172019
-2.6209313604
-5.0744055891
-6.0097412759
-3.9088333888
-8.9156901798
-1.1487284663
-2.6908351027
-4.3681477693
-5.9789970864
-7.6736036304
-2.775661216
-1.5990267761
-2.4313147222
-2.504610895
-0.0564565199
-1.82092062
-4.762094105
-4.1022380309
-6.0618722603
1.2402348026
-3.7005766005
-3.944481661
0.0574505574
1.585485985
-1.8632155793
5.5671273955
-0.6385326299
-3.5943542605
-6.5027423128
-4.0951248017
-2.1335233957
-8.6882149888
-0.8622561807
-4.2566141797
-3.9550336455
-4.9712039312
-5.1559660886
-7.7564443528
8.3849487105
6.4571216574
-5.8682339858
-3.5718788135
-5.9075048346
-1.2783401668
-1.3460312362
-1.7075048704
2.2509307248
-1.2344525714
-4.250768003
1.8208165448
1.4270984625
-1.2529608157
4.188108677
-3.1206111509
-0.4427073973
-3.9766708889
1.975488474
1.0904484625
-2.8793322658
-3.308963252
1.2462159976
-0.5387501141
-8.7780334562
8.2996471285
-2.6499754651
-3.5809663177
3.150

In [252]:
goodtimes[goodtimes.columns[2:]].values

array([[ 0.85539695, -2.67398913, -8.33490557, ..., -1.33892315,
         0.03700735,  2.79204848],
       [ 0.85269667, -2.66999651, -8.33690245, ..., -1.34565961,
         0.03859121,  2.78870453],
       [ 0.84992216, -2.66625923, -8.33917732, ..., -1.35225329,
         0.03978686,  2.78519808],
       ...,
       [ 0.58700198, -4.37473227, -8.57700336, ..., -1.54085411,
        -1.60835288,  2.39356528],
       [ 0.58558595, -4.37656396, -8.57330962, ..., -1.53770728,
        -1.60247471,  2.40087478],
       [ 0.58410071, -4.37851833, -8.56965389, ..., -1.53470278,
        -1.59670465,  2.40815032]])