In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import sportran as st

import lammps_logfile as ll

from ase.io import read


# From LAMMPS/GPUMD to archive

## LAMMPS input

In [76]:
root = './data_manager/lammps'

### LAMMPS printed table

A simple line of pandas is sufficient to read a table file printed by lammps using the command:

`fix 		thermo_print all print 5 "$(step) $(time) $(econserve) $(pe) $(etotal) $(temp) $(press) $(density) $(enthalpy) $(v_Jx) $(v_Jy) $(v_Jz) $(v_vcm11) $(v_vcm12) $(v_vcm13) $(v_vcm21) $(v_vcm22) $(v_vcm23)" append sample.lammps screen no title "step time econserve etotal T P density enthalpy j[1] j[2] j[3] v1[1] v1[2] v1[3] v2[1] v2[2] v2[3]"`

The column names of the `pandas.DataFrame` will be the ones defined in by the `title` variable of the LAMMPS `fix print` command. We should be careful and choose a set of keywords we consistently use.

In [84]:
timeseries = pd.read_csv(f'{root}/sample.lammps', sep = '\s+')

# We split thermodynamic data from the timeseries of the currents (for consistency with GPUMD)
thermo = timeseries.loc[:, ['step', 'time', 'econserve', 'etotal', 'T', 'P', 'density', 'enthalpy']]
timeseries = timeseries.loc[:, ['j[1]', 'j[2]', 'j[3]', 'v1[1]', 'v1[2]', 'v1[3]', 'v2[1]', 'v2[2]', 'v2[3]']] 

# We should also load a lammps data file to get the volume, since it's not present in the timeseries.
structure = read(f'{root}/config.lammpsdata', format = 'lammps-data') 

Save the timeseries to a feather file

In [85]:
timeseries.to_feather(f'{root}/sample.lammps.ft')

Read it from file

In [86]:
timeseries = pd.read_feather(f'{root}/sample.lammps.ft')

In [87]:
timeseries

Unnamed: 0,j[1],j[2],j[3],v1[1],v1[2],v1[3],v2[1],v2[2],v2[3]
0,98.860585,451.412579,401.256234,-0.024594,0.243474,0.100957,0.015949,-0.157896,-0.065472
5,86.835391,431.087326,414.761925,-0.032700,0.263837,0.091336,0.021207,-0.171101,-0.059232
10,111.486685,386.972491,440.930754,-0.036475,0.283372,0.081802,0.023654,-0.183770,-0.053049
15,175.084996,326.531957,411.807814,-0.035150,0.302389,0.071609,0.022795,-0.196103,-0.046439
20,178.459243,269.363316,440.358015,-0.029682,0.323938,0.061421,0.019249,-0.210078,-0.039833
...,...,...,...,...,...,...,...,...,...
499975,-258.913911,36.169839,-553.521873,0.445324,-0.307098,0.387822,-0.288798,0.199157,-0.251507
499980,-283.186906,40.685236,-500.809352,0.460423,-0.355443,0.366227,-0.298590,0.230509,-0.237503
499985,-302.987081,23.344070,-450.029797,0.472723,-0.399537,0.333738,-0.306567,0.259105,-0.216434
499990,-329.423859,16.555491,-400.434345,0.482686,-0.440561,0.293111,-0.313028,0.285709,-0.190086


In [88]:
thermo

Unnamed: 0,step,time,econserve,etotal,T,P,density,enthalpy
0,0.0000,-2810.806235,-3083.548118,-2823.832117,2011.261684,-392.441199,1.171598,-2833.976211
5,0.0025,-2810.806294,-3083.112614,-2821.114775,2028.932428,-235.167801,1.171598,-2827.193556
10,0.0050,-2810.805897,-3082.615031,-2823.061548,2010.003130,-239.968940,1.171598,-2829.264433
15,0.0075,-2810.806471,-3082.154176,-2826.750674,1977.865345,-267.817847,1.171598,-2833.673417
20,0.0100,-2810.806200,-3081.739821,-2826.784147,1974.397324,-224.982999,1.171598,-2832.599665
...,...,...,...,...,...,...,...,...
499975,249.9875,-2810.794373,-3082.953261,-2825.205822,1996.016989,-688.964326,1.171598,-2843.014653
499980,249.9900,-2810.794285,-3082.957909,-2822.482228,2017.144709,-640.411150,1.171598,-2839.036023
499985,249.9925,-2810.794678,-3082.987220,-2822.597515,2016.478906,-646.861749,1.171598,-2839.318050
499990,249.9950,-2810.794972,-3082.977744,-2822.821297,2014.672535,-660.713589,1.171598,-2839.899884


In [89]:
print(structure)
print(f'Temperature = {thermo["T"].mean():.2f} K')
print(f'Volume = {structure.get_volume():.2f} Ang^3')

Atoms(symbols='Cl500Na500', pbc=True, cell=[34.59789548758798, 34.59789548758798, 34.59789548758798], id=..., masses=..., momenta=..., type=...)
Temperature = 1999.05 K
Volume = 41414.18 Ang^3


## GPUMD input

The output format of GPUMD is much less flexible than LAMMPS', so here we know what to expect when we read a file. The only thing that  can change is the number of columns of the `compute.out` file, which depend on what's in the `run.in` file.

There are packages (e.g. `gpyumd`) that supposedly can read GPUMD outputs, but they don't seem to be updated anymore. Since we just need to read a couple of tables, I think we can do it from scratch.

In [90]:
root = './data_manager/gpumd'

In [91]:
structure = read(f'{root}/model.xyz')

In [92]:
try:
    timeseries = pd.read_feather(f'{root}/timeseries.ft')
except FileNotFoundError:
    timeseries = {}
    timeseries_ = pd.read_csv(f'{root}/compute.out',
                             sep = '\s+',
                             names = [
                                 'jv1[1]', 'jv2[1]', 'jv1[2]', 'jv2[2]', 'jv1[3]', 'jv2[3]',
                                 'jk1[1]', 'jk2[1]', 'jk1[2]', 'jk2[2]', 'jk1[3]', 'jk2[3]',
                                 'v1[1]', 'v2[1]', 'v1[2]', 'v2[2]', 'v1[3]', 'v2[3]'
                                 ]
                                 )
    for ii in [1,2,3]:
        timeseries[f'j[{ii}]'] = timeseries_[f'jk1[{ii}]'] + timeseries_[f'jv1[{ii}]'] + timeseries_[f'jk2[{ii}]'] + timeseries_[f'jv2[{ii}]']
        timeseries[f'v1[{ii}]'] = timeseries_[f'v1[{ii}]'] + timeseries_[f'v1[{ii}]'] + timeseries_[f'v1[{ii}]'] + timeseries_[f'v1[{ii}]']
        timeseries[f'v2[{ii}]'] = timeseries_[f'v2[{ii}]'] + timeseries_[f'v2[{ii}]'] + timeseries_[f'v2[{ii}]'] + timeseries_[f'v2[{ii}]']
    timeseries = pd.DataFrame(timeseries)
    timeseries.to_feather(f'{root}/timeseries.ft')
except Exception as e:
    print(f'Some other exception: {e}')

In [93]:
timeseries

Unnamed: 0,j[1],v1[1],v2[1],j[2],v1[2],v2[2],j[3],v1[3],v2[3]
0,5.307160,-50.682440,50.684640,8.807900,52.03160,-52.02848,-9.734899,146.46300,-146.46056
1,6.197453,-57.130560,57.132720,8.981368,48.65744,-48.65432,-9.020685,139.95560,-139.95312
2,6.776381,-57.573560,57.575760,8.692182,45.97476,-45.97164,-8.215225,132.22656,-132.22412
3,7.068126,-52.376000,52.378200,7.965965,44.11236,-44.10928,-7.356712,123.72356,-123.72112
4,7.104579,-41.995240,41.997440,6.861634,43.06760,-43.06448,-6.456287,114.46780,-114.46536
...,...,...,...,...,...,...,...,...,...
99995,-11.275199,151.886520,-151.883200,-2.732424,133.54872,-133.54416,8.721313,-255.50924,255.51268
99996,-8.661137,114.979760,-114.976480,-1.979022,123.92116,-123.91660,9.341784,-276.86204,276.86548
99997,-5.984299,76.342480,-76.339160,-1.205753,111.79908,-111.79452,9.964473,-297.18272,297.18620
99998,-3.333353,36.762828,-36.759532,-0.367569,97.48352,-97.47896,10.649682,-316.32052,316.32400


GPUMD saves global thermodynamic quantities to a different file (`thermo.out`), and it usually happens that `compute.out` and `thermo.out` are printed at different rates. So in this case it makes sense to read another table with the thermodynamic quantities.

In [94]:
try:
    thermo = pd.read_feather(f'{root}/thermo.ft')
except FileNotFoundError:
    thermo = pd.read_csv(f'{root}/thermo.out', sep = '\s+', names = ['T', 'K', 'U', 'Px', 'Py', 'Pz', 'Pyz', 'Pxz', 'Pxy', 'Lx', 'Ly', 'Lz'])
except Exception as e:
    print(f'Some other exception: {e}')

In [95]:
thermo

Unnamed: 0,T,K,U,Px,Py,Pz,Pyz,Pxz,Pxy,Lx,Ly,Lz
0,395.125378,3603.778879,-289042.41148,-1.167067,-1.215918,-1.248022,0.001475,0.009405,0.009959,96.9385,109.056,121.173
1,378.045897,3448.003832,-289780.76109,-1.291412,-1.292198,-1.327587,-0.002286,-0.007696,0.001941,96.9385,109.056,121.173
2,352.709011,3216.916335,-290148.59903,-1.361265,-1.381595,-1.374045,-0.004406,-0.011146,0.016371,96.9385,109.056,121.173
3,334.223087,3048.313696,-290353.04438,-1.373173,-1.409258,-1.400900,-0.017500,0.007733,-0.001820,96.9385,109.056,121.173
4,321.288342,2930.341110,-290513.09910,-1.414994,-1.409434,-1.421372,-0.000430,-0.015094,-0.012644,96.9385,109.056,121.173
...,...,...,...,...,...,...,...,...,...,...,...,...
8995,300.752425,2743.041310,-293261.94191,-1.633069,-1.643559,-1.656173,-0.001073,0.015467,-0.002861,96.9385,109.056,121.173
8996,300.716094,2742.709958,-293262.24822,-1.643525,-1.631987,-1.667063,-0.011431,0.014692,-0.001572,96.9385,109.056,121.173
8997,298.985521,2726.926099,-293263.35138,-1.653012,-1.658252,-1.667866,-0.008714,0.008375,-0.006385,96.9385,109.056,121.173
8998,301.947746,2753.943349,-293247.86899,-1.641305,-1.640563,-1.664131,-0.011629,0.025223,0.004753,96.9385,109.056,121.173


In [96]:
print(structure)
print(f'Temperature = {thermo["T"].mean():.2f} K')
print(f'Volume = {structure.get_volume():.2f} Ang^3')

Atoms(symbols='Li24480Si46080', pbc=True, cell=[96.9385, 109.056, 121.173], group=..., mass=..., vel=...)
Temperature = 300.14 K
Volume = 1281007.64 Ang^3
